]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[cleanup] Standardize `import datetime as dt` (#8978)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import hashlib
8 import itertools
9 import json
10 import math
11 import os.path
12 import random
13 import re
14 import shlex
15 import sys
16 import threading
17 import time
18 import traceback
19 import urllib.parse
20
21 from .common import InfoExtractor, SearchInfoExtractor
22 from .openload import PhantomJSwrapper
23 from ..compat import functools
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 traverse_obj,
59 try_call,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unified_timestamp,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
69 )
70
71 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
73 INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
77 'client': {
78 'clientName': 'WEB',
79 'clientVersion': '2.20220801.00.00',
80 }
81 },
82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
83 },
84 'web_embedded': {
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
87 'client': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
89 'clientVersion': '1.20220731.00.00',
90 },
91 },
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
93 },
94 'web_music': {
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
98 'client': {
99 'clientName': 'WEB_REMIX',
100 'clientVersion': '1.20220727.01.00',
101 }
102 },
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
104 },
105 'web_creator': {
106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
107 'INNERTUBE_CONTEXT': {
108 'client': {
109 'clientName': 'WEB_CREATOR',
110 'clientVersion': '1.20220726.00.00',
111 }
112 },
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
114 },
115 'android': {
116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
117 'INNERTUBE_CONTEXT': {
118 'client': {
119 'clientName': 'ANDROID',
120 'clientVersion': '19.09.37',
121 'androidSdkVersion': 30,
122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
123 }
124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
126 'REQUIRE_JS_PLAYER': False
127 },
128 'android_embedded': {
129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
133 'clientVersion': '19.09.37',
134 'androidSdkVersion': 30,
135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
136 },
137 },
138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False
140 },
141 'android_music': {
142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '6.42.52',
147 'androidSdkVersion': 30,
148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False
153 },
154 'android_creator': {
155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
162 },
163 },
164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
165 'REQUIRE_JS_PLAYER': False
166 },
167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
169 'ios': {
170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS',
174 'clientVersion': '19.09.3',
175 'deviceModel': 'iPhone14,3',
176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
177 }
178 },
179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
180 'REQUIRE_JS_PLAYER': False
181 },
182 'ios_embedded': {
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
186 'clientVersion': '19.09.3',
187 'deviceModel': 'iPhone14,3',
188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
189 },
190 },
191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
192 'REQUIRE_JS_PLAYER': False
193 },
194 'ios_music': {
195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'IOS_MUSIC',
199 'clientVersion': '6.33.3',
200 'deviceModel': 'iPhone14,3',
201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
202 },
203 },
204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
205 'REQUIRE_JS_PLAYER': False
206 },
207 'ios_creator': {
208 'INNERTUBE_CONTEXT': {
209 'client': {
210 'clientName': 'IOS_CREATOR',
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
214 },
215 },
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
217 'REQUIRE_JS_PLAYER': False
218 },
219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
221 'mweb': {
222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
223 'INNERTUBE_CONTEXT': {
224 'client': {
225 'clientName': 'MWEB',
226 'clientVersion': '2.20220801.00.00',
227 }
228 },
229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
230 },
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
233 'tv_embedded': {
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
236 'client': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
239 },
240 },
241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
242 },
243 }
244
245
246 def _split_innertube_client(client_name):
247 variant, *base = client_name.rsplit('.', 1)
248 if base:
249 return variant, base[0], variant
250 base, *variant = client_name.split('_', 1)
251 return client_name, base, variant[0] if variant else None
252
253
254 def short_client_name(client_name):
255 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
256 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
257
258
259 def build_innertube_clients():
260 THIRD_PARTY = {
261 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
262 }
263 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
264 priority = qualities(BASE_CLIENTS[::-1])
265
266 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
267 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
268 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
269 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
270 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
271
272 _, base_client, variant = _split_innertube_client(client)
273 ytcfg['priority'] = 10 * priority(base_client)
274
275 if not variant:
276 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
277 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
278 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
279 embedscreen['priority'] -= 3
280 elif variant == 'embedded':
281 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
282 ytcfg['priority'] -= 2
283 else:
284 ytcfg['priority'] -= 3
285
286
287 build_innertube_clients()
288
289
290 class BadgeType(enum.Enum):
291 AVAILABILITY_UNLISTED = enum.auto()
292 AVAILABILITY_PRIVATE = enum.auto()
293 AVAILABILITY_PUBLIC = enum.auto()
294 AVAILABILITY_PREMIUM = enum.auto()
295 AVAILABILITY_SUBSCRIPTION = enum.auto()
296 LIVE_NOW = enum.auto()
297 VERIFIED = enum.auto()
298
299
300 class YoutubeBaseInfoExtractor(InfoExtractor):
301 """Provide base functions for Youtube extractors"""
302
303 _RESERVED_NAMES = (
304 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
305 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
306 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
307 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
308
309 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
310
311 # _NETRC_MACHINE = 'youtube'
312
313 # If True it will raise an error if no login info is provided
314 _LOGIN_REQUIRED = False
315
316 _INVIDIOUS_SITES = (
317 # invidious-redirect websites
318 r'(?:www\.)?redirect\.invidious\.io',
319 r'(?:(?:www|dev)\.)?invidio\.us',
320 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
321 r'(?:www\.)?invidious\.pussthecat\.org',
322 r'(?:www\.)?invidious\.zee\.li',
323 r'(?:www\.)?invidious\.ethibox\.fr',
324 r'(?:www\.)?iv\.ggtyler\.dev',
325 r'(?:www\.)?inv\.vern\.i2p',
326 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
327 r'(?:www\.)?inv\.riverside\.rocks',
328 r'(?:www\.)?invidious\.silur\.me',
329 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
330 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
331 r'(?:www\.)?invidious\.slipfox\.xyz',
332 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
333 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
334 r'(?:www\.)?invidious\.tiekoetter\.com',
335 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
336 r'(?:www\.)?invidious\.nerdvpn\.de',
337 r'(?:www\.)?invidious\.weblibre\.org',
338 r'(?:www\.)?inv\.odyssey346\.dev',
339 r'(?:www\.)?invidious\.dhusch\.de',
340 r'(?:www\.)?iv\.melmac\.space',
341 r'(?:www\.)?watch\.thekitty\.zone',
342 r'(?:www\.)?invidious\.privacydev\.net',
343 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
344 r'(?:www\.)?invidious\.drivet\.xyz',
345 r'(?:www\.)?vid\.priv\.au',
346 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
347 r'(?:www\.)?inv\.vern\.cc',
348 r'(?:www\.)?invidious\.esmailelbob\.xyz',
349 r'(?:www\.)?invidious\.sethforprivacy\.com',
350 r'(?:www\.)?yt\.oelrichsgarcia\.de',
351 r'(?:www\.)?yt\.artemislena\.eu',
352 r'(?:www\.)?invidious\.flokinet\.to',
353 r'(?:www\.)?invidious\.baczek\.me',
354 r'(?:www\.)?y\.com\.sb',
355 r'(?:www\.)?invidious\.epicsite\.xyz',
356 r'(?:www\.)?invidious\.lidarshield\.cloud',
357 r'(?:www\.)?yt\.funami\.tech',
358 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
359 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
360 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
361 # youtube-dl invidious instances list
362 r'(?:(?:www|no)\.)?invidiou\.sh',
363 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
364 r'(?:www\.)?invidious\.kabi\.tk',
365 r'(?:www\.)?invidious\.mastodon\.host',
366 r'(?:www\.)?invidious\.zapashcanon\.fr',
367 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
368 r'(?:www\.)?invidious\.tinfoil-hat\.net',
369 r'(?:www\.)?invidious\.himiko\.cloud',
370 r'(?:www\.)?invidious\.reallyancient\.tech',
371 r'(?:www\.)?invidious\.tube',
372 r'(?:www\.)?invidiou\.site',
373 r'(?:www\.)?invidious\.site',
374 r'(?:www\.)?invidious\.xyz',
375 r'(?:www\.)?invidious\.nixnet\.xyz',
376 r'(?:www\.)?invidious\.048596\.xyz',
377 r'(?:www\.)?invidious\.drycat\.fr',
378 r'(?:www\.)?inv\.skyn3t\.in',
379 r'(?:www\.)?tube\.poal\.co',
380 r'(?:www\.)?tube\.connect\.cafe',
381 r'(?:www\.)?vid\.wxzm\.sx',
382 r'(?:www\.)?vid\.mint\.lgbt',
383 r'(?:www\.)?vid\.puffyan\.us',
384 r'(?:www\.)?yewtu\.be',
385 r'(?:www\.)?yt\.elukerio\.org',
386 r'(?:www\.)?yt\.lelux\.fi',
387 r'(?:www\.)?invidious\.ggc-project\.de',
388 r'(?:www\.)?yt\.maisputain\.ovh',
389 r'(?:www\.)?ytprivate\.com',
390 r'(?:www\.)?invidious\.13ad\.de',
391 r'(?:www\.)?invidious\.toot\.koeln',
392 r'(?:www\.)?invidious\.fdn\.fr',
393 r'(?:www\.)?watch\.nettohikari\.com',
394 r'(?:www\.)?invidious\.namazso\.eu',
395 r'(?:www\.)?invidious\.silkky\.cloud',
396 r'(?:www\.)?invidious\.exonip\.de',
397 r'(?:www\.)?invidious\.riverside\.rocks',
398 r'(?:www\.)?invidious\.blamefran\.net',
399 r'(?:www\.)?invidious\.moomoo\.de',
400 r'(?:www\.)?ytb\.trom\.tf',
401 r'(?:www\.)?yt\.cyberhost\.uk',
402 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
403 r'(?:www\.)?qklhadlycap4cnod\.onion',
404 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
405 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
406 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
407 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
408 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
409 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
410 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
411 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
412 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
413 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
414 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
415 r'(?:www\.)?piped\.kavin\.rocks',
416 r'(?:www\.)?piped\.tokhmi\.xyz',
417 r'(?:www\.)?piped\.syncpundit\.io',
418 r'(?:www\.)?piped\.mha\.fi',
419 r'(?:www\.)?watch\.whatever\.social',
420 r'(?:www\.)?piped\.garudalinux\.org',
421 r'(?:www\.)?piped\.rivo\.lol',
422 r'(?:www\.)?piped-libre\.kavin\.rocks',
423 r'(?:www\.)?yt\.jae\.fi',
424 r'(?:www\.)?piped\.mint\.lgbt',
425 r'(?:www\.)?il\.ax',
426 r'(?:www\.)?piped\.esmailelbob\.xyz',
427 r'(?:www\.)?piped\.projectsegfau\.lt',
428 r'(?:www\.)?piped\.privacydev\.net',
429 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
430 r'(?:www\.)?piped\.smnz\.de',
431 r'(?:www\.)?piped\.adminforge\.de',
432 r'(?:www\.)?watch\.whatevertinfoil\.de',
433 r'(?:www\.)?piped\.qdi\.fi',
434 r'(?:(?:www|cf)\.)?piped\.video',
435 r'(?:www\.)?piped\.aeong\.one',
436 r'(?:www\.)?piped\.moomoo\.me',
437 r'(?:www\.)?piped\.chauvet\.pro',
438 r'(?:www\.)?watch\.leptons\.xyz',
439 r'(?:www\.)?pd\.vern\.cc',
440 r'(?:www\.)?piped\.hostux\.net',
441 r'(?:www\.)?piped\.lunar\.icu',
442 # Hyperpipe instances from https://hyperpipe.codeberg.page/
443 r'(?:www\.)?hyperpipe\.surge\.sh',
444 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
445 r'(?:www\.)?listen\.whatever\.social',
446 r'(?:www\.)?music\.adminforge\.de',
447 )
448
449 # extracted from account/account_menu ep
450 # XXX: These are the supported YouTube UI and API languages,
451 # which is slightly different from languages supported for translation in YouTube studio
452 _SUPPORTED_LANG_CODES = [
453 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
454 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
455 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
456 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
457 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
458 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
459 ]
460
461 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
462
463 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
464 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
465
466 def ucid_or_none(self, ucid):
467 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
468
469 def handle_or_none(self, handle):
470 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
471
472 def handle_from_url(self, url):
473 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
474 url, 'channel handle', default=None)
475
476 def ucid_from_url(self, url):
477 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
478 url, 'channel id', default=None)
479
480 @functools.cached_property
481 def _preferred_lang(self):
482 """
483 Returns a language code supported by YouTube for the user preferred language.
484 Returns None if no preferred language set.
485 """
486 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
487 if not preferred_lang:
488 return
489 if preferred_lang not in self._SUPPORTED_LANG_CODES:
490 raise ExtractorError(
491 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
492 expected=True)
493 elif preferred_lang != 'en':
494 self.report_warning(
495 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
496 return preferred_lang
497
498 def _initialize_consent(self):
499 cookies = self._get_cookies('https://www.youtube.com/')
500 if cookies.get('__Secure-3PSID'):
501 return
502 socs = cookies.get('SOCS')
503 if socs and not socs.value.startswith('CAA'): # not consented
504 return
505 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
506
507 def _initialize_pref(self):
508 cookies = self._get_cookies('https://www.youtube.com/')
509 pref_cookie = cookies.get('PREF')
510 pref = {}
511 if pref_cookie:
512 try:
513 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
514 except ValueError:
515 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
516 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
517 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
518
519 def _real_initialize(self):
520 self._initialize_pref()
521 self._initialize_consent()
522 self._check_login_required()
523
524 def _check_login_required(self):
525 if self._LOGIN_REQUIRED and not self._cookies_passed:
526 self.raise_login_required('Login details are needed to download this content', method='cookies')
527
528 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
529 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
530
531 def _get_default_ytcfg(self, client='web'):
532 return copy.deepcopy(INNERTUBE_CLIENTS[client])
533
534 def _get_innertube_host(self, client='web'):
535 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
536
537 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
538 # try_get but with fallback to default ytcfg client values when present
539 _func = lambda y: try_get(y, getter, expected_type)
540 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
541
542 def _extract_client_name(self, ytcfg, default_client='web'):
543 return self._ytcfg_get_safe(
544 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
545 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
546
547 def _extract_client_version(self, ytcfg, default_client='web'):
548 return self._ytcfg_get_safe(
549 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
550 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
551
552 def _select_api_hostname(self, req_api_hostname, default_client=None):
553 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
554 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
555
556 def _extract_api_key(self, ytcfg=None, default_client='web'):
557 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
558
559 def _extract_context(self, ytcfg=None, default_client='web'):
560 context = get_first(
561 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
562 # Enforce language and tz for extraction
563 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
564 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
565 return context
566
567 _SAPISID = None
568
569 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
570 time_now = round(time.time())
571 if self._SAPISID is None:
572 yt_cookies = self._get_cookies('https://www.youtube.com')
573 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
574 # See: https://github.com/yt-dlp/yt-dlp/issues/393
575 sapisid_cookie = dict_get(
576 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
577 if sapisid_cookie and sapisid_cookie.value:
578 self._SAPISID = sapisid_cookie.value
579 self.write_debug('Extracted SAPISID cookie')
580 # SAPISID cookie is required if not already present
581 if not yt_cookies.get('SAPISID'):
582 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
583 self._set_cookie(
584 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
585 else:
586 self._SAPISID = False
587 if not self._SAPISID:
588 return None
589 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
590 sapisidhash = hashlib.sha1(
591 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
592 return f'SAPISIDHASH {time_now}_{sapisidhash}'
593
594 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
595 note='Downloading API JSON', errnote='Unable to download API page',
596 context=None, api_key=None, api_hostname=None, default_client='web'):
597
598 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
599 data.update(query)
600 real_headers = self.generate_api_headers(default_client=default_client)
601 real_headers.update({'content-type': 'application/json'})
602 if headers:
603 real_headers.update(headers)
604 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
605 or api_key or self._extract_api_key(default_client=default_client))
606 return self._download_json(
607 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
608 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
609 data=json.dumps(data).encode('utf8'), headers=real_headers,
610 query={'key': api_key, 'prettyPrint': 'false'})
611
612 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
613 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
614
615 @staticmethod
616 def _extract_session_index(*data):
617 """
618 Index of current account in account list.
619 See: https://github.com/yt-dlp/yt-dlp/pull/519
620 """
621 for ytcfg in data:
622 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
623 if session_index is not None:
624 return session_index
625
626 # Deprecated?
627 def _extract_identity_token(self, ytcfg=None, webpage=None):
628 if ytcfg:
629 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
630 if token:
631 return token
632 if webpage:
633 return self._search_regex(
634 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
635 'identity token', default=None, fatal=False)
636
637 @staticmethod
638 def _extract_account_syncid(*args):
639 """
640 Extract syncId required to download private playlists of secondary channels
641 @params response and/or ytcfg
642 """
643 for data in args:
644 # ytcfg includes channel_syncid if on secondary channel
645 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
646 if delegated_sid:
647 return delegated_sid
648 sync_ids = (try_get(
649 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
650 lambda x: x['DATASYNC_ID']), str) or '').split('||')
651 if len(sync_ids) >= 2 and sync_ids[1]:
652 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
653 # and just "user_syncid||" for primary channel. We only want the channel_syncid
654 return sync_ids[0]
655
656 @staticmethod
657 def _extract_visitor_data(*args):
658 """
659 Extracts visitorData from an API response or ytcfg
660 Appears to be used to track session state
661 """
662 return get_first(
663 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
664 expected_type=str)
665
666 @functools.cached_property
667 def is_authenticated(self):
668 return bool(self._generate_sapisidhash_header())
669
670 def extract_ytcfg(self, video_id, webpage):
671 if not webpage:
672 return {}
673 return self._parse_json(
674 self._search_regex(
675 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
676 default='{}'), video_id, fatal=False) or {}
677
678 def generate_api_headers(
679 self, *, ytcfg=None, account_syncid=None, session_index=None,
680 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
681
682 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
683 headers = {
684 'X-YouTube-Client-Name': str(
685 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
686 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
687 'Origin': origin,
688 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
689 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
690 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
691 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
692 }
693 if session_index is None:
694 session_index = self._extract_session_index(ytcfg)
695 if account_syncid or session_index is not None:
696 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
697
698 auth = self._generate_sapisidhash_header(origin)
699 if auth is not None:
700 headers['Authorization'] = auth
701 headers['X-Origin'] = origin
702 return filter_dict(headers)
703
704 def _download_ytcfg(self, client, video_id):
705 url = {
706 'web': 'https://www.youtube.com',
707 'web_music': 'https://music.youtube.com',
708 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
709 }.get(client)
710 if not url:
711 return {}
712 webpage = self._download_webpage(
713 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
714 return self.extract_ytcfg(video_id, webpage) or {}
715
716 @staticmethod
717 def _build_api_continuation_query(continuation, ctp=None):
718 query = {
719 'continuation': continuation
720 }
721 # TODO: Inconsistency with clickTrackingParams.
722 # Currently we have a fixed ctp contained within context (from ytcfg)
723 # and a ctp in root query for continuation.
724 if ctp:
725 query['clickTracking'] = {'clickTrackingParams': ctp}
726 return query
727
728 @classmethod
729 def _extract_next_continuation_data(cls, renderer):
730 next_continuation = try_get(
731 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
732 lambda x: x['continuation']['reloadContinuationData']), dict)
733 if not next_continuation:
734 return
735 continuation = next_continuation.get('continuation')
736 if not continuation:
737 return
738 ctp = next_continuation.get('clickTrackingParams')
739 return cls._build_api_continuation_query(continuation, ctp)
740
741 @classmethod
742 def _extract_continuation_ep_data(cls, continuation_ep: dict):
743 if isinstance(continuation_ep, dict):
744 continuation = try_get(
745 continuation_ep, lambda x: x['continuationCommand']['token'], str)
746 if not continuation:
747 return
748 ctp = continuation_ep.get('clickTrackingParams')
749 return cls._build_api_continuation_query(continuation, ctp)
750
751 @classmethod
752 def _extract_continuation(cls, renderer):
753 next_continuation = cls._extract_next_continuation_data(renderer)
754 if next_continuation:
755 return next_continuation
756
757 return traverse_obj(renderer, (
758 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
759 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
760 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
761
762 @classmethod
763 def _extract_alerts(cls, data):
764 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
765 if not isinstance(alert_dict, dict):
766 continue
767 for alert in alert_dict.values():
768 alert_type = alert.get('type')
769 if not alert_type:
770 continue
771 message = cls._get_text(alert, 'text')
772 if message:
773 yield alert_type, message
774
775 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
776 errors, warnings = [], []
777 for alert_type, alert_message in alerts:
778 if alert_type.lower() == 'error' and fatal:
779 errors.append([alert_type, alert_message])
780 elif alert_message not in self._IGNORED_WARNINGS:
781 warnings.append([alert_type, alert_message])
782
783 for alert_type, alert_message in (warnings + errors[:-1]):
784 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
785 if errors:
786 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
787
788 def _extract_and_report_alerts(self, data, *args, **kwargs):
789 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
790
791 def _extract_badges(self, badge_list: list):
792 """
793 Extract known BadgeType's from a list of badge renderers.
794 @returns [{'type': BadgeType}]
795 """
796 icon_type_map = {
797 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
798 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
799 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
800 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
801 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
802 'CHECK': BadgeType.VERIFIED,
803 }
804
805 badge_style_map = {
806 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
807 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
808 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
809 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
810 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
811 }
812
813 label_map = {
814 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
815 'private': BadgeType.AVAILABILITY_PRIVATE,
816 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
817 'live': BadgeType.LIVE_NOW,
818 'premium': BadgeType.AVAILABILITY_PREMIUM,
819 'verified': BadgeType.VERIFIED,
820 'official artist channel': BadgeType.VERIFIED,
821 }
822
823 badges = []
824 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
825 badge_type = (
826 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
827 or badge_style_map.get(traverse_obj(badge, 'style'))
828 )
829 if badge_type:
830 badges.append({'type': badge_type})
831 continue
832
833 # fallback, won't work in some languages
834 label = traverse_obj(
835 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
836 for match, label_badge_type in label_map.items():
837 if match in label.lower():
838 badges.append({'type': label_badge_type})
839 break
840
841 return badges
842
843 @staticmethod
844 def _has_badge(badges, badge_type):
845 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
846
847 @staticmethod
848 def _get_text(data, *path_list, max_runs=None):
849 for path in path_list or [None]:
850 if path is None:
851 obj = [data]
852 else:
853 obj = traverse_obj(data, path, default=[])
854 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
855 obj = [obj]
856 for item in obj:
857 text = try_get(item, lambda x: x['simpleText'], str)
858 if text:
859 return text
860 runs = try_get(item, lambda x: x['runs'], list) or []
861 if not runs and isinstance(item, list):
862 runs = item
863
864 runs = runs[:min(len(runs), max_runs or len(runs))]
865 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
866 if text:
867 return text
868
869 def _get_count(self, data, *path_list):
870 count_text = self._get_text(data, *path_list) or ''
871 count = parse_count(count_text)
872 if count is None:
873 count = str_to_int(
874 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
875 return count
876
877 @staticmethod
878 def _extract_thumbnails(data, *path_list):
879 """
880 Extract thumbnails from thumbnails dict
881 @param path_list: path list to level that contains 'thumbnails' key
882 """
883 thumbnails = []
884 for path in path_list or [()]:
885 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
886 thumbnail_url = url_or_none(thumbnail.get('url'))
887 if not thumbnail_url:
888 continue
889 # Sometimes youtube gives a wrong thumbnail URL. See:
890 # https://github.com/yt-dlp/yt-dlp/issues/233
891 # https://github.com/ytdl-org/youtube-dl/issues/28023
892 if 'maxresdefault' in thumbnail_url:
893 thumbnail_url = thumbnail_url.split('?')[0]
894 thumbnails.append({
895 'url': thumbnail_url,
896 'height': int_or_none(thumbnail.get('height')),
897 'width': int_or_none(thumbnail.get('width')),
898 })
899 return thumbnails
900
901 @staticmethod
902 def extract_relative_time(relative_time_text):
903 """
904 Extracts a relative time from string and converts to dt object
905 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
906 """
907
908 # XXX: this could be moved to a general function in utils/_utils.py
909 # The relative time text strings are roughly the same as what
910 # Javascript's Intl.RelativeTimeFormat function generates.
911 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
912 mobj = re.search(
913 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
914 relative_time_text)
915 if mobj:
916 start = mobj.group('start')
917 if start:
918 return datetime_from_str(start)
919 try:
920 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
921 except ValueError:
922 return None
923
924 def _parse_time_text(self, text):
925 if not text:
926 return
927 dt_ = self.extract_relative_time(text)
928 timestamp = None
929 if isinstance(dt_, dt.datetime):
930 timestamp = calendar.timegm(dt_.timetuple())
931
932 if timestamp is None:
933 timestamp = (
934 unified_timestamp(text) or unified_timestamp(
935 self._search_regex(
936 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
937 text.lower(), 'time text', default=None)))
938
939 if text and timestamp is None and self._preferred_lang in (None, 'en'):
940 self.report_warning(
941 f'Cannot parse localized time text "{text}"', only_once=True)
942 return timestamp
943
944 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
945 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
946 default_client='web'):
947 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
948 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
949 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
950 icd_rm = next(icd_retries)
951 main_retries = iter(self.RetryManager())
952 main_rm = next(main_retries)
953 # Manual retry loop for multiple RetryManagers
954 # The proper RetryManager MUST be advanced after an error
955 # and its result MUST be checked if the manager is non fatal
956 while True:
957 try:
958 response = self._call_api(
959 ep=ep, fatal=True, headers=headers,
960 video_id=item_id, query=query, note=note,
961 context=self._extract_context(ytcfg, default_client),
962 api_key=self._extract_api_key(ytcfg, default_client),
963 api_hostname=api_hostname, default_client=default_client)
964 except ExtractorError as e:
965 if not isinstance(e.cause, network_exceptions):
966 return self._error_or_warning(e, fatal=fatal)
967 elif not isinstance(e.cause, HTTPError):
968 main_rm.error = e
969 next(main_retries)
970 continue
971
972 first_bytes = e.cause.response.read(512)
973 if not is_html(first_bytes):
974 yt_error = try_get(
975 self._parse_json(
976 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
977 lambda x: x['error']['message'], str)
978 if yt_error:
979 self._report_alerts([('ERROR', yt_error)], fatal=False)
980 # Downloading page may result in intermittent 5xx HTTP error
981 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
982 # We also want to catch all other network exceptions since errors in later pages can be troublesome
983 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
984 if e.cause.status not in (403, 429):
985 main_rm.error = e
986 next(main_retries)
987 continue
988 return self._error_or_warning(e, fatal=fatal)
989
990 try:
991 self._extract_and_report_alerts(response, only_once=True)
992 except ExtractorError as e:
993 # YouTube's servers may return errors we want to retry on in a 200 OK response
994 # See: https://github.com/yt-dlp/yt-dlp/issues/839
995 if 'unknown error' in e.msg.lower():
996 main_rm.error = e
997 next(main_retries)
998 continue
999 return self._error_or_warning(e, fatal=fatal)
1000 # Youtube sometimes sends incomplete data
1001 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1002 if not traverse_obj(response, *variadic(check_get_keys)):
1003 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1004 should_retry = next(icd_retries, None)
1005 if not should_retry:
1006 return None
1007 continue
1008
1009 return response
1010
1011 @staticmethod
1012 def is_music_url(url):
1013 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1014
1015 def _extract_video(self, renderer):
1016 video_id = renderer.get('videoId')
1017
1018 reel_header_renderer = traverse_obj(renderer, (
1019 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1020 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1021
1022 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1023 description = self._get_text(renderer, 'descriptionSnippet')
1024
1025 duration = int_or_none(renderer.get('lengthSeconds'))
1026 if duration is None:
1027 duration = parse_duration(self._get_text(
1028 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1029 if duration is None:
1030 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1031 duration = parse_duration(self._search_regex(
1032 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1033 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1034 video_id, default=None, group='duration'))
1035
1036 channel_id = traverse_obj(
1037 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1038 expected_type=str, get_all=False)
1039 if not channel_id:
1040 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1041
1042 channel_id = self.ucid_or_none(channel_id)
1043
1044 overlay_style = traverse_obj(
1045 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1046 get_all=False, expected_type=str)
1047 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1048 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1049 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1050 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1051 expected_type=str)) or ''
1052 url = f'https://www.youtube.com/watch?v={video_id}'
1053 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1054 url = f'https://www.youtube.com/shorts/{video_id}'
1055
1056 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1057 or self._get_text(reel_header_renderer, 'timestampText') or '')
1058 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1059
1060 live_status = (
1061 'is_upcoming' if scheduled_timestamp is not None
1062 else 'was_live' if 'streamed' in time_text.lower()
1063 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1064 else None)
1065
1066 # videoInfo is a string like '50K views • 10 years ago'.
1067 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1068 view_count = (0 if 'no views' in view_count_text.lower()
1069 else self._get_count({'simpleText': view_count_text}))
1070 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1071
1072 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1073 or self._get_text(reel_header_renderer, 'channelTitleText'))
1074
1075 channel_handle = traverse_obj(renderer, (
1076 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1077 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1078 expected_type=self.handle_from_url, get_all=False)
1079 return {
1080 '_type': 'url',
1081 'ie_key': YoutubeIE.ie_key(),
1082 'id': video_id,
1083 'url': url,
1084 'title': title,
1085 'description': description,
1086 'duration': duration,
1087 'channel_id': channel_id,
1088 'channel': channel,
1089 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1090 'uploader': channel,
1091 'uploader_id': channel_handle,
1092 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1093 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1094 'timestamp': (self._parse_time_text(time_text)
1095 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1096 else None),
1097 'release_timestamp': scheduled_timestamp,
1098 'availability':
1099 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1100 else self._availability(
1101 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1102 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1103 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1104 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1105 view_count_field: view_count,
1106 'live_status': live_status,
1107 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
1108 }
1109
1110
1111 class YoutubeIE(YoutubeBaseInfoExtractor):
1112 IE_DESC = 'YouTube'
1113 _VALID_URL = r"""(?x)^
1114 (
1115 (?:https?://|//) # http(s):// or protocol-independent URL
1116 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1117 (?:www\.)?deturl\.com/www\.youtube\.com|
1118 (?:www\.)?pwnyoutube\.com|
1119 (?:www\.)?hooktube\.com|
1120 (?:www\.)?yourepeat\.com|
1121 tube\.majestyc\.net|
1122 %(invidious)s|
1123 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1124 (?:.*?\#/)? # handle anchor (#/) redirect urls
1125 (?: # the various things that can precede the ID:
1126 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1127 |(?: # or the v= param in all its forms
1128 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1129 (?:\?|\#!?) # the params delimiter ? or # or #!
1130 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1131 v=
1132 )
1133 ))
1134 |(?:
1135 youtu\.be| # just youtu.be/xxxx
1136 vid\.plus| # or vid.plus/xxxx
1137 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1138 %(invidious)s
1139 )/
1140 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1141 )
1142 )? # all until now is optional -> you can pass the naked ID
1143 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1144 (?(1).+)? # if we found the ID, everything can follow
1145 (?:\#|$)""" % {
1146 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1147 }
1148 _EMBED_REGEX = [
1149 r'''(?x)
1150 (?:
1151 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1152 data-video-url=|
1153 <embed[^>]+?src=|
1154 embedSWF\(?:\s*|
1155 <object[^>]+data=|
1156 new\s+SWFObject\(
1157 )
1158 (["\'])
1159 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1160 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1161 \1''',
1162 # https://wordpress.org/plugins/lazy-load-for-videos/
1163 r'''(?xs)
1164 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1165 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1166 ]
1167 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1168
1169 _PLAYER_INFO_RE = (
1170 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1171 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1172 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1173 )
1174 _formats = {
1175 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1176 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1177 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1178 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1179 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1180 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1181 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1182 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1183 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1184 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1185 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1186 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1187 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1188 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1189 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1190 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1191 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1192 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1193
1194
1195 # 3D videos
1196 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1197 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1198 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1199 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1200 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1201 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1202 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1203
1204 # Apple HTTP Live Streaming
1205 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1206 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1207 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1208 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1209 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1210 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1211 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1212 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1213
1214 # DASH mp4 video
1215 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1216 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1217 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1218 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1219 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1220 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1221 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1222 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1223 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1224 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1225 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1226 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1227
1228 # Dash mp4 audio
1229 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1230 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1231 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1232 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1233 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1234 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1235 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1236
1237 # Dash webm
1238 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1239 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1240 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1241 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1242 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1243 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1244 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1245 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1246 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1247 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1248 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1249 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1250 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1251 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1252 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1253 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1254 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1255 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1256 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1257 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1258 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1260
1261 # Dash webm audio
1262 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1263 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1264
1265 # Dash webm audio with opus inside
1266 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1267 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1268 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1269
1270 # RTMP (unnamed)
1271 '_rtmp': {'protocol': 'rtmp'},
1272
1273 # av01 video only formats sometimes served with "unknown" codecs
1274 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1275 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1276 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1277 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1278 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1279 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1280 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1281 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1282 }
1283 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1284
1285 _GEO_BYPASS = False
1286
1287 IE_NAME = 'youtube'
1288 _TESTS = [
1289 {
1290 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1291 'info_dict': {
1292 'id': 'BaW_jenozKc',
1293 'ext': 'mp4',
1294 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1295 'channel': 'Philipp Hagemeister',
1296 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1297 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1298 'upload_date': '20121002',
1299 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1300 'categories': ['Science & Technology'],
1301 'tags': ['youtube-dl'],
1302 'duration': 10,
1303 'view_count': int,
1304 'like_count': int,
1305 'availability': 'public',
1306 'playable_in_embed': True,
1307 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1308 'live_status': 'not_live',
1309 'age_limit': 0,
1310 'start_time': 1,
1311 'end_time': 9,
1312 'comment_count': int,
1313 'channel_follower_count': int,
1314 'uploader': 'Philipp Hagemeister',
1315 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1316 'uploader_id': '@PhilippHagemeister',
1317 'heatmap': 'count:100',
1318 }
1319 },
1320 {
1321 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1322 'note': 'Embed-only video (#1746)',
1323 'info_dict': {
1324 'id': 'yZIXLfi8CZQ',
1325 'ext': 'mp4',
1326 'upload_date': '20120608',
1327 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1328 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1329 'age_limit': 18,
1330 },
1331 'skip': 'Private video',
1332 },
1333 {
1334 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1335 'note': 'Use the first video ID in the URL',
1336 'info_dict': {
1337 'id': 'BaW_jenozKc',
1338 'ext': 'mp4',
1339 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1340 'channel': 'Philipp Hagemeister',
1341 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1342 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1343 'upload_date': '20121002',
1344 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1345 'categories': ['Science & Technology'],
1346 'tags': ['youtube-dl'],
1347 'duration': 10,
1348 'view_count': int,
1349 'like_count': int,
1350 'availability': 'public',
1351 'playable_in_embed': True,
1352 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1353 'live_status': 'not_live',
1354 'age_limit': 0,
1355 'comment_count': int,
1356 'channel_follower_count': int,
1357 'uploader': 'Philipp Hagemeister',
1358 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1359 'uploader_id': '@PhilippHagemeister',
1360 'heatmap': 'count:100',
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
1365 },
1366 {
1367 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1368 'note': '256k DASH audio (format 141) via DASH manifest',
1369 'info_dict': {
1370 'id': 'a9LDPn-MO4I',
1371 'ext': 'm4a',
1372 'upload_date': '20121002',
1373 'description': '',
1374 'title': 'UHDTV TEST 8K VIDEO.mp4'
1375 },
1376 'params': {
1377 'youtube_include_dash_manifest': True,
1378 'format': '141',
1379 },
1380 'skip': 'format 141 not served anymore',
1381 },
1382 # DASH manifest with encrypted signature
1383 {
1384 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1385 'info_dict': {
1386 'id': 'IB3lcPjvWLA',
1387 'ext': 'm4a',
1388 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1389 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1390 'duration': 244,
1391 'upload_date': '20131011',
1392 'abr': 129.495,
1393 'like_count': int,
1394 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1395 'playable_in_embed': True,
1396 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1397 'view_count': int,
1398 'track': 'The Spark',
1399 'live_status': 'not_live',
1400 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1401 'channel': 'Afrojack',
1402 'tags': 'count:19',
1403 'availability': 'public',
1404 'categories': ['Music'],
1405 'age_limit': 0,
1406 'alt_title': 'The Spark',
1407 'channel_follower_count': int,
1408 'uploader': 'Afrojack',
1409 'uploader_url': 'https://www.youtube.com/@Afrojack',
1410 'uploader_id': '@Afrojack',
1411 },
1412 'params': {
1413 'youtube_include_dash_manifest': True,
1414 'format': '141/bestaudio[ext=m4a]',
1415 },
1416 },
1417 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1418 {
1419 'note': 'Embed allowed age-gate video',
1420 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1421 'info_dict': {
1422 'id': 'HtVdAasjOgU',
1423 'ext': 'mp4',
1424 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1425 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1426 'duration': 142,
1427 'upload_date': '20140605',
1428 'age_limit': 18,
1429 'categories': ['Gaming'],
1430 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1431 'availability': 'needs_auth',
1432 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1433 'like_count': int,
1434 'channel': 'The Witcher',
1435 'live_status': 'not_live',
1436 'tags': 'count:17',
1437 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1438 'playable_in_embed': True,
1439 'view_count': int,
1440 'channel_follower_count': int,
1441 'uploader': 'The Witcher',
1442 'uploader_url': 'https://www.youtube.com/@thewitcher',
1443 'uploader_id': '@thewitcher',
1444 'comment_count': int,
1445 'channel_is_verified': True,
1446 'heatmap': 'count:100',
1447 },
1448 },
1449 {
1450 'note': 'Age-gate video with embed allowed in public site',
1451 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1452 'info_dict': {
1453 'id': 'HsUATh_Nc2U',
1454 'ext': 'mp4',
1455 'title': 'Godzilla 2 (Official Video)',
1456 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1457 'upload_date': '20200408',
1458 'age_limit': 18,
1459 'availability': 'needs_auth',
1460 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1461 'channel': 'FlyingKitty',
1462 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1463 'view_count': int,
1464 'categories': ['Entertainment'],
1465 'live_status': 'not_live',
1466 'tags': ['Flyingkitty', 'godzilla 2'],
1467 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1468 'like_count': int,
1469 'duration': 177,
1470 'playable_in_embed': True,
1471 'channel_follower_count': int,
1472 'uploader': 'FlyingKitty',
1473 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1474 'uploader_id': '@FlyingKitty900',
1475 'comment_count': int,
1476 'channel_is_verified': True,
1477 },
1478 },
1479 {
1480 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1481 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1482 'info_dict': {
1483 'id': 'Tq92D6wQ1mg',
1484 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1485 'ext': 'mp4',
1486 'upload_date': '20191228',
1487 'description': 'md5:17eccca93a786d51bc67646756894066',
1488 'age_limit': 18,
1489 'like_count': int,
1490 'availability': 'needs_auth',
1491 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1492 'view_count': int,
1493 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1494 'channel': 'Projekt Melody',
1495 'live_status': 'not_live',
1496 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1497 'playable_in_embed': True,
1498 'categories': ['Entertainment'],
1499 'duration': 106,
1500 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1501 'comment_count': int,
1502 'channel_follower_count': int,
1503 'uploader': 'Projekt Melody',
1504 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1505 'uploader_id': '@ProjektMelody',
1506 },
1507 },
1508 {
1509 'note': 'Non-Agegated non-embeddable video',
1510 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1511 'info_dict': {
1512 'id': 'MeJVWBSsPAY',
1513 'ext': 'mp4',
1514 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1515 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1516 'upload_date': '20130730',
1517 'track': 'Such mich find mich',
1518 'age_limit': 0,
1519 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1520 'like_count': int,
1521 'playable_in_embed': False,
1522 'creator': 'OOMPH!',
1523 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1524 'view_count': int,
1525 'alt_title': 'Such mich find mich',
1526 'duration': 210,
1527 'channel': 'Herr Lurik',
1528 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1529 'categories': ['Music'],
1530 'availability': 'public',
1531 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1532 'live_status': 'not_live',
1533 'artist': 'OOMPH!',
1534 'channel_follower_count': int,
1535 'uploader': 'Herr Lurik',
1536 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1537 'uploader_id': '@HerrLurik',
1538 },
1539 },
1540 {
1541 'note': 'Non-bypassable age-gated video',
1542 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1543 'only_matching': True,
1544 },
1545 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1546 # YouTube Red ad is not captured for creator
1547 {
1548 'url': '__2ABJjxzNo',
1549 'info_dict': {
1550 'id': '__2ABJjxzNo',
1551 'ext': 'mp4',
1552 'duration': 266,
1553 'upload_date': '20100430',
1554 'creator': 'deadmau5',
1555 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1556 'title': 'Deadmau5 - Some Chords (HD)',
1557 'alt_title': 'Some Chords',
1558 'availability': 'public',
1559 'tags': 'count:14',
1560 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1561 'view_count': int,
1562 'live_status': 'not_live',
1563 'channel': 'deadmau5',
1564 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1565 'like_count': int,
1566 'track': 'Some Chords',
1567 'artist': 'deadmau5',
1568 'playable_in_embed': True,
1569 'age_limit': 0,
1570 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1571 'categories': ['Music'],
1572 'album': 'Some Chords',
1573 'channel_follower_count': int,
1574 'uploader': 'deadmau5',
1575 'uploader_url': 'https://www.youtube.com/@deadmau5',
1576 'uploader_id': '@deadmau5',
1577 },
1578 'expected_warnings': [
1579 'DASH manifest missing',
1580 ]
1581 },
1582 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1583 {
1584 'url': 'lqQg6PlCWgI',
1585 'info_dict': {
1586 'id': 'lqQg6PlCWgI',
1587 'ext': 'mp4',
1588 'duration': 6085,
1589 'upload_date': '20150827',
1590 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1591 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1592 'like_count': int,
1593 'release_timestamp': 1343767800,
1594 'playable_in_embed': True,
1595 'categories': ['Sports'],
1596 'release_date': '20120731',
1597 'channel': 'Olympics',
1598 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1599 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1600 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1601 'age_limit': 0,
1602 'availability': 'public',
1603 'live_status': 'was_live',
1604 'view_count': int,
1605 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1606 'channel_follower_count': int,
1607 'uploader': 'Olympics',
1608 'uploader_url': 'https://www.youtube.com/@Olympics',
1609 'uploader_id': '@Olympics',
1610 'channel_is_verified': True,
1611 },
1612 'params': {
1613 'skip_download': 'requires avconv',
1614 }
1615 },
1616 # Non-square pixels
1617 {
1618 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1619 'info_dict': {
1620 'id': '_b-2C3KPAM0',
1621 'ext': 'mp4',
1622 'stretched_ratio': 16 / 9.,
1623 'duration': 85,
1624 'upload_date': '20110310',
1625 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1626 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1627 'playable_in_embed': True,
1628 'channel': '孫ᄋᄅ',
1629 'age_limit': 0,
1630 'tags': 'count:11',
1631 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1632 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1633 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1634 'view_count': int,
1635 'categories': ['People & Blogs'],
1636 'like_count': int,
1637 'live_status': 'not_live',
1638 'availability': 'unlisted',
1639 'comment_count': int,
1640 'channel_follower_count': int,
1641 'uploader': '孫ᄋᄅ',
1642 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1643 'uploader_id': '@AllenMeow',
1644 },
1645 },
1646 # url_encoded_fmt_stream_map is empty string
1647 {
1648 'url': 'qEJwOuvDf7I',
1649 'info_dict': {
1650 'id': 'qEJwOuvDf7I',
1651 'ext': 'webm',
1652 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1653 'description': '',
1654 'upload_date': '20150404',
1655 },
1656 'params': {
1657 'skip_download': 'requires avconv',
1658 },
1659 'skip': 'This live event has ended.',
1660 },
1661 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1662 {
1663 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1664 'info_dict': {
1665 'id': 'FIl7x6_3R5Y',
1666 'ext': 'webm',
1667 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1668 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1669 'duration': 220,
1670 'upload_date': '20150625',
1671 'formats': 'mincount:31',
1672 },
1673 'skip': 'not actual anymore',
1674 },
1675 # DASH manifest with segment_list
1676 {
1677 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1678 'md5': '8ce563a1d667b599d21064e982ab9e31',
1679 'info_dict': {
1680 'id': 'CsmdDsKjzN8',
1681 'ext': 'mp4',
1682 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1683 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1684 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1685 },
1686 'params': {
1687 'youtube_include_dash_manifest': True,
1688 'format': '135', # bestvideo
1689 },
1690 'skip': 'This live event has ended.',
1691 },
1692 {
1693 # Multifeed videos (multiple cameras), URL can be of any Camera
1694 # TODO: fix multifeed titles
1695 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1696 'info_dict': {
1697 'id': 'zaPI8MvL8pg',
1698 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1699 'description': 'md5:563ccbc698b39298481ca3c571169519',
1700 },
1701 'playlist': [{
1702 'info_dict': {
1703 'id': 'j5yGuxZ8lLU',
1704 'ext': 'mp4',
1705 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1706 'description': 'md5:563ccbc698b39298481ca3c571169519',
1707 'duration': 10120,
1708 'channel_follower_count': int,
1709 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1710 'availability': 'public',
1711 'playable_in_embed': True,
1712 'upload_date': '20131105',
1713 'categories': ['Gaming'],
1714 'live_status': 'was_live',
1715 'tags': 'count:24',
1716 'release_timestamp': 1383701910,
1717 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1718 'comment_count': int,
1719 'age_limit': 0,
1720 'like_count': int,
1721 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1722 'channel': 'WiiLikeToPlay',
1723 'view_count': int,
1724 'release_date': '20131106',
1725 'uploader': 'WiiLikeToPlay',
1726 'uploader_id': '@WLTP',
1727 'uploader_url': 'https://www.youtube.com/@WLTP',
1728 },
1729 }, {
1730 'info_dict': {
1731 'id': 'zaPI8MvL8pg',
1732 'ext': 'mp4',
1733 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1734 'availability': 'public',
1735 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1736 'channel': 'WiiLikeToPlay',
1737 'channel_follower_count': int,
1738 'description': 'md5:563ccbc698b39298481ca3c571169519',
1739 'duration': 10108,
1740 'age_limit': 0,
1741 'like_count': int,
1742 'tags': 'count:24',
1743 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1744 'release_timestamp': 1383701915,
1745 'comment_count': int,
1746 'upload_date': '20131105',
1747 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1748 'release_date': '20131106',
1749 'playable_in_embed': True,
1750 'live_status': 'was_live',
1751 'categories': ['Gaming'],
1752 'view_count': int,
1753 'uploader': 'WiiLikeToPlay',
1754 'uploader_id': '@WLTP',
1755 'uploader_url': 'https://www.youtube.com/@WLTP',
1756 },
1757 }, {
1758 'info_dict': {
1759 'id': 'R7r3vfO7Hao',
1760 'ext': 'mp4',
1761 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1762 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1763 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1764 'like_count': int,
1765 'availability': 'public',
1766 'playable_in_embed': True,
1767 'upload_date': '20131105',
1768 'description': 'md5:563ccbc698b39298481ca3c571169519',
1769 'channel_follower_count': int,
1770 'tags': 'count:24',
1771 'release_date': '20131106',
1772 'comment_count': int,
1773 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1774 'channel': 'WiiLikeToPlay',
1775 'categories': ['Gaming'],
1776 'release_timestamp': 1383701914,
1777 'live_status': 'was_live',
1778 'age_limit': 0,
1779 'duration': 10128,
1780 'view_count': int,
1781 'uploader': 'WiiLikeToPlay',
1782 'uploader_id': '@WLTP',
1783 'uploader_url': 'https://www.youtube.com/@WLTP',
1784 },
1785 }],
1786 'params': {'skip_download': True},
1787 },
1788 {
1789 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1790 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1791 'info_dict': {
1792 'id': 'gVfLd0zydlo',
1793 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1794 },
1795 'playlist_count': 2,
1796 'skip': 'Not multifeed anymore',
1797 },
1798 {
1799 'url': 'https://vid.plus/FlRa-iH7PGw',
1800 'only_matching': True,
1801 },
1802 {
1803 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1804 'only_matching': True,
1805 },
1806 {
1807 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1808 # Also tests cut-off URL expansion in video description (see
1809 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1810 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1811 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1812 'info_dict': {
1813 'id': 'lsguqyKfVQg',
1814 'ext': 'mp4',
1815 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1816 'alt_title': 'Dark Walk',
1817 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1818 'duration': 133,
1819 'upload_date': '20151119',
1820 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1821 'track': 'Dark Walk',
1822 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1823 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1824 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1825 'categories': ['Film & Animation'],
1826 'view_count': int,
1827 'live_status': 'not_live',
1828 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1829 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1830 'tags': 'count:13',
1831 'availability': 'public',
1832 'channel': 'IronSoulElf',
1833 'playable_in_embed': True,
1834 'like_count': int,
1835 'age_limit': 0,
1836 'channel_follower_count': int
1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
1842 {
1843 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1844 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1845 'only_matching': True,
1846 },
1847 {
1848 # Video with yt:stretch=17:0
1849 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1850 'info_dict': {
1851 'id': 'Q39EVAstoRM',
1852 'ext': 'mp4',
1853 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1854 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1855 'upload_date': '20151107',
1856 },
1857 'params': {
1858 'skip_download': True,
1859 },
1860 'skip': 'This video does not exist.',
1861 },
1862 {
1863 # Video with incomplete 'yt:stretch=16:'
1864 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1865 'only_matching': True,
1866 },
1867 {
1868 # Video licensed under Creative Commons
1869 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1870 'info_dict': {
1871 'id': 'M4gD1WSo5mA',
1872 'ext': 'mp4',
1873 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1874 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1875 'duration': 721,
1876 'upload_date': '20150128',
1877 'license': 'Creative Commons Attribution license (reuse allowed)',
1878 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1879 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1880 'like_count': int,
1881 'age_limit': 0,
1882 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1883 'channel': 'The Berkman Klein Center for Internet & Society',
1884 'availability': 'public',
1885 'view_count': int,
1886 'categories': ['Education'],
1887 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1888 'live_status': 'not_live',
1889 'playable_in_embed': True,
1890 'channel_follower_count': int,
1891 'chapters': list,
1892 'uploader': 'The Berkman Klein Center for Internet & Society',
1893 'uploader_id': '@BKCHarvard',
1894 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1895 },
1896 'params': {
1897 'skip_download': True,
1898 },
1899 },
1900 {
1901 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1902 'info_dict': {
1903 'id': 'eQcmzGIKrzg',
1904 'ext': 'mp4',
1905 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1906 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1907 'duration': 4060,
1908 'upload_date': '20151120',
1909 'license': 'Creative Commons Attribution license (reuse allowed)',
1910 'playable_in_embed': True,
1911 'tags': 'count:12',
1912 'like_count': int,
1913 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1914 'age_limit': 0,
1915 'availability': 'public',
1916 'categories': ['News & Politics'],
1917 'channel': 'Bernie Sanders',
1918 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1919 'view_count': int,
1920 'live_status': 'not_live',
1921 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1922 'comment_count': int,
1923 'channel_follower_count': int,
1924 'chapters': list,
1925 'uploader': 'Bernie Sanders',
1926 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1927 'uploader_id': '@BernieSanders',
1928 'channel_is_verified': True,
1929 'heatmap': 'count:100',
1930 },
1931 'params': {
1932 'skip_download': True,
1933 },
1934 },
1935 {
1936 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1937 'only_matching': True,
1938 },
1939 {
1940 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1941 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1942 'only_matching': True,
1943 },
1944 {
1945 # Rental video preview
1946 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1947 'info_dict': {
1948 'id': 'uGpuVWrhIzE',
1949 'ext': 'mp4',
1950 'title': 'Piku - Trailer',
1951 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1952 'upload_date': '20150811',
1953 'license': 'Standard YouTube License',
1954 },
1955 'params': {
1956 'skip_download': True,
1957 },
1958 'skip': 'This video is not available.',
1959 },
1960 {
1961 # YouTube Red video with episode data
1962 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1963 'info_dict': {
1964 'id': 'iqKdEhx-dD4',
1965 'ext': 'mp4',
1966 'title': 'Isolation - Mind Field (Ep 1)',
1967 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1968 'duration': 2085,
1969 'upload_date': '20170118',
1970 'series': 'Mind Field',
1971 'season_number': 1,
1972 'episode_number': 1,
1973 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1974 'tags': 'count:12',
1975 'view_count': int,
1976 'availability': 'public',
1977 'age_limit': 0,
1978 'channel': 'Vsauce',
1979 'episode': 'Episode 1',
1980 'categories': ['Entertainment'],
1981 'season': 'Season 1',
1982 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1983 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1984 'like_count': int,
1985 'playable_in_embed': True,
1986 'live_status': 'not_live',
1987 'channel_follower_count': int,
1988 'uploader': 'Vsauce',
1989 'uploader_url': 'https://www.youtube.com/@Vsauce',
1990 'uploader_id': '@Vsauce',
1991 'comment_count': int,
1992 'channel_is_verified': True,
1993 },
1994 'params': {
1995 'skip_download': True,
1996 },
1997 'expected_warnings': [
1998 'Skipping DASH manifest',
1999 ],
2000 },
2001 {
2002 # The following content has been identified by the YouTube community
2003 # as inappropriate or offensive to some audiences.
2004 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2005 'info_dict': {
2006 'id': '6SJNVb0GnPI',
2007 'ext': 'mp4',
2008 'title': 'Race Differences in Intelligence',
2009 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2010 'duration': 965,
2011 'upload_date': '20140124',
2012 },
2013 'params': {
2014 'skip_download': True,
2015 },
2016 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2017 },
2018 {
2019 # itag 212
2020 'url': '1t24XAntNCY',
2021 'only_matching': True,
2022 },
2023 {
2024 # geo restricted to JP
2025 'url': 'sJL6WA-aGkQ',
2026 'only_matching': True,
2027 },
2028 {
2029 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2030 'only_matching': True,
2031 },
2032 {
2033 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2034 'only_matching': True,
2035 },
2036 {
2037 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2038 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2039 'only_matching': True,
2040 },
2041 {
2042 # DRM protected
2043 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2044 'only_matching': True,
2045 },
2046 {
2047 # Video with unsupported adaptive stream type formats
2048 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2049 'info_dict': {
2050 'id': 'Z4Vy8R84T1U',
2051 'ext': 'mp4',
2052 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2053 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2054 'duration': 433,
2055 'upload_date': '20130923',
2056 'formats': 'maxcount:10',
2057 },
2058 'params': {
2059 'skip_download': True,
2060 'youtube_include_dash_manifest': False,
2061 },
2062 'skip': 'not actual anymore',
2063 },
2064 {
2065 # Youtube Music Auto-generated description
2066 # TODO: fix metadata extraction
2067 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2068 'info_dict': {
2069 'id': 'MgNrAu2pzNs',
2070 'ext': 'mp4',
2071 'title': 'Voyeur Girl',
2072 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2073 'upload_date': '20190312',
2074 'artists': ['Stephen'],
2075 'creators': ['Stephen'],
2076 'track': 'Voyeur Girl',
2077 'album': 'it\'s too much love to know my dear',
2078 'release_date': '20190313',
2079 'alt_title': 'Voyeur Girl',
2080 'view_count': int,
2081 'playable_in_embed': True,
2082 'like_count': int,
2083 'categories': ['Music'],
2084 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2085 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2086 'uploader': 'Stephen',
2087 'availability': 'public',
2088 'duration': 169,
2089 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2090 'age_limit': 0,
2091 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2092 'tags': 'count:11',
2093 'live_status': 'not_live',
2094 'channel_follower_count': int
2095 },
2096 'params': {
2097 'skip_download': True,
2098 },
2099 },
2100 {
2101 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2102 'only_matching': True,
2103 },
2104 {
2105 # invalid -> valid video id redirection
2106 'url': 'DJztXj2GPfl',
2107 'info_dict': {
2108 'id': 'DJztXj2GPfk',
2109 'ext': 'mp4',
2110 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2111 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2112 'upload_date': '20090125',
2113 'artist': 'Panjabi MC',
2114 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2115 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2116 },
2117 'params': {
2118 'skip_download': True,
2119 },
2120 'skip': 'Video unavailable',
2121 },
2122 {
2123 # empty description results in an empty string
2124 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2125 'info_dict': {
2126 'id': 'x41yOUIvK2k',
2127 'ext': 'mp4',
2128 'title': 'IMG 3456',
2129 'description': '',
2130 'upload_date': '20170613',
2131 'view_count': int,
2132 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2133 'like_count': int,
2134 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2135 'tags': [],
2136 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2137 'availability': 'public',
2138 'age_limit': 0,
2139 'categories': ['Pets & Animals'],
2140 'duration': 7,
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
2143 'channel': 'l\'Or Vert asbl',
2144 'channel_follower_count': int,
2145 'uploader': 'l\'Or Vert asbl',
2146 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2147 'uploader_id': '@ElevageOrVert',
2148 },
2149 'params': {
2150 'skip_download': True,
2151 },
2152 },
2153 {
2154 # with '};' inside yt initial data (see [1])
2155 # see [2] for an example with '};' inside ytInitialPlayerResponse
2156 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2157 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2158 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2159 'info_dict': {
2160 'id': 'CHqg6qOn4no',
2161 'ext': 'mp4',
2162 'title': 'Part 77 Sort a list of simple types in c#',
2163 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2164 'upload_date': '20130831',
2165 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2166 'like_count': int,
2167 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2168 'live_status': 'not_live',
2169 'categories': ['Education'],
2170 'availability': 'public',
2171 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2172 'tags': 'count:12',
2173 'playable_in_embed': True,
2174 'age_limit': 0,
2175 'view_count': int,
2176 'duration': 522,
2177 'channel': 'kudvenkat',
2178 'comment_count': int,
2179 'channel_follower_count': int,
2180 'chapters': list,
2181 'uploader': 'kudvenkat',
2182 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2183 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2184 'channel_is_verified': True,
2185 'heatmap': 'count:100',
2186 },
2187 'params': {
2188 'skip_download': True,
2189 },
2190 },
2191 {
2192 # another example of '};' in ytInitialData
2193 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2194 'only_matching': True,
2195 },
2196 {
2197 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2198 'only_matching': True,
2199 },
2200 {
2201 # https://github.com/ytdl-org/youtube-dl/pull/28094
2202 'url': 'OtqTfy26tG0',
2203 'info_dict': {
2204 'id': 'OtqTfy26tG0',
2205 'ext': 'mp4',
2206 'title': 'Burn Out',
2207 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2208 'upload_date': '20141120',
2209 'artist': 'The Cinematic Orchestra',
2210 'track': 'Burn Out',
2211 'album': 'Every Day',
2212 'like_count': int,
2213 'live_status': 'not_live',
2214 'alt_title': 'Burn Out',
2215 'duration': 614,
2216 'age_limit': 0,
2217 'view_count': int,
2218 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2219 'creator': 'The Cinematic Orchestra',
2220 'channel': 'The Cinematic Orchestra',
2221 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2222 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2223 'availability': 'public',
2224 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2225 'categories': ['Music'],
2226 'playable_in_embed': True,
2227 'channel_follower_count': int,
2228 'uploader': 'The Cinematic Orchestra',
2229 'comment_count': int,
2230 },
2231 'params': {
2232 'skip_download': True,
2233 },
2234 },
2235 {
2236 # controversial video, only works with bpctr when authenticated with cookies
2237 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2238 'only_matching': True,
2239 },
2240 {
2241 # controversial video, requires bpctr/contentCheckOk
2242 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2243 'info_dict': {
2244 'id': 'SZJvDhaSDnc',
2245 'ext': 'mp4',
2246 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2247 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2248 'upload_date': '20140716',
2249 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2250 'duration': 170,
2251 'categories': ['News & Politics'],
2252 'view_count': int,
2253 'channel': 'CBS Mornings',
2254 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2255 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2256 'age_limit': 18,
2257 'availability': 'needs_auth',
2258 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2259 'like_count': int,
2260 'live_status': 'not_live',
2261 'playable_in_embed': True,
2262 'channel_follower_count': int,
2263 'uploader': 'CBS Mornings',
2264 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2265 'uploader_id': '@CBSMornings',
2266 'comment_count': int,
2267 'channel_is_verified': True,
2268 }
2269 },
2270 {
2271 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2272 'url': 'cBvYw8_A0vQ',
2273 'info_dict': {
2274 'id': 'cBvYw8_A0vQ',
2275 'ext': 'mp4',
2276 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2277 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2278 'upload_date': '20201120',
2279 'duration': 1456,
2280 'categories': ['Travel & Events'],
2281 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2282 'view_count': int,
2283 'channel': 'Walk around Japan',
2284 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2285 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2286 'age_limit': 0,
2287 'availability': 'public',
2288 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2289 'live_status': 'not_live',
2290 'playable_in_embed': True,
2291 'channel_follower_count': int,
2292 'uploader': 'Walk around Japan',
2293 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2294 'uploader_id': '@walkaroundjapan7124',
2295 },
2296 'params': {
2297 'skip_download': True,
2298 },
2299 }, {
2300 # Has multiple audio streams
2301 'url': 'WaOKSUlf4TM',
2302 'only_matching': True
2303 }, {
2304 # Requires Premium: has format 141 when requested using YTM url
2305 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2306 'only_matching': True
2307 }, {
2308 # multiple subtitles with same lang_code
2309 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2310 'only_matching': True,
2311 }, {
2312 # Force use android client fallback
2313 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2314 'info_dict': {
2315 'id': 'YOelRv7fMxY',
2316 'title': 'DIGGING A SECRET TUNNEL Part 1',
2317 'ext': '3gp',
2318 'upload_date': '20210624',
2319 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2320 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2321 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2322 'duration': 596,
2323 'categories': ['Entertainment'],
2324 'view_count': int,
2325 'channel': 'colinfurze',
2326 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2327 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2328 'age_limit': 0,
2329 'availability': 'public',
2330 'like_count': int,
2331 'live_status': 'not_live',
2332 'playable_in_embed': True,
2333 'channel_follower_count': int,
2334 'chapters': list,
2335 'uploader': 'colinfurze',
2336 'uploader_url': 'https://www.youtube.com/@colinfurze',
2337 'uploader_id': '@colinfurze',
2338 'comment_count': int,
2339 'channel_is_verified': True,
2340 'heatmap': 'count:100',
2341 },
2342 'params': {
2343 'format': '17', # 3gp format available on android
2344 'extractor_args': {'youtube': {'player_client': ['android']}},
2345 },
2346 },
2347 {
2348 # Skip download of additional client configs (remix client config in this case)
2349 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2350 'only_matching': True,
2351 'params': {
2352 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2353 },
2354 }, {
2355 # shorts
2356 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2357 'only_matching': True,
2358 }, {
2359 'note': 'Storyboards',
2360 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2361 'info_dict': {
2362 'id': '5KLPxDtMqe8',
2363 'ext': 'mhtml',
2364 'format_id': 'sb0',
2365 'title': 'Your Brain is Plastic',
2366 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2367 'upload_date': '20140324',
2368 'like_count': int,
2369 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2370 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2371 'view_count': int,
2372 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2373 'playable_in_embed': True,
2374 'tags': 'count:12',
2375 'availability': 'public',
2376 'channel': 'SciShow',
2377 'live_status': 'not_live',
2378 'duration': 248,
2379 'categories': ['Education'],
2380 'age_limit': 0,
2381 'channel_follower_count': int,
2382 'chapters': list,
2383 'uploader': 'SciShow',
2384 'uploader_url': 'https://www.youtube.com/@SciShow',
2385 'uploader_id': '@SciShow',
2386 'comment_count': int,
2387 'channel_is_verified': True,
2388 'heatmap': 'count:100',
2389 }, 'params': {'format': 'mhtml', 'skip_download': True}
2390 }, {
2391 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2392 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2393 'info_dict': {
2394 'id': '2NUZ8W2llS4',
2395 'ext': 'mp4',
2396 'title': 'The NP that test your phone performance 🙂',
2397 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2398 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2399 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2400 'duration': 21,
2401 'view_count': int,
2402 'age_limit': 0,
2403 'categories': ['Gaming'],
2404 'tags': 'count:23',
2405 'playable_in_embed': True,
2406 'live_status': 'not_live',
2407 'upload_date': '20220103',
2408 'like_count': int,
2409 'availability': 'public',
2410 'channel': 'Leon Nguyen',
2411 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2412 'comment_count': int,
2413 'channel_follower_count': int,
2414 'uploader': 'Leon Nguyen',
2415 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2416 'uploader_id': '@LeonNguyen',
2417 'heatmap': 'count:100',
2418 }
2419 }, {
2420 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2421 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2422 'info_dict': {
2423 'id': '2NUZ8W2llS4',
2424 'ext': 'mp4',
2425 'title': 'The NP that test your phone performance 🙂',
2426 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2427 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2428 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2429 'duration': 21,
2430 'view_count': int,
2431 'age_limit': 0,
2432 'categories': ['Gaming'],
2433 'tags': 'count:23',
2434 'playable_in_embed': True,
2435 'live_status': 'not_live',
2436 'upload_date': '20220102',
2437 'like_count': int,
2438 'availability': 'public',
2439 'channel': 'Leon Nguyen',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2441 'comment_count': int,
2442 'channel_follower_count': int,
2443 'uploader': 'Leon Nguyen',
2444 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2445 'uploader_id': '@LeonNguyen',
2446 'heatmap': 'count:100',
2447 },
2448 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2449 }, {
2450 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2451 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2452 'info_dict': {
2453 'id': 'mzZzzBU6lrM',
2454 'ext': 'mp4',
2455 'title': 'I Met GeorgeNotFound In Real Life...',
2456 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2457 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2458 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2459 'duration': 955,
2460 'view_count': int,
2461 'age_limit': 0,
2462 'categories': ['Entertainment'],
2463 'tags': 'count:26',
2464 'playable_in_embed': True,
2465 'live_status': 'not_live',
2466 'release_timestamp': 1641172509,
2467 'release_date': '20220103',
2468 'upload_date': '20220103',
2469 'like_count': int,
2470 'availability': 'public',
2471 'channel': 'Quackity',
2472 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2473 'channel_follower_count': int,
2474 'uploader': 'Quackity',
2475 'uploader_id': '@Quackity',
2476 'uploader_url': 'https://www.youtube.com/@Quackity',
2477 'comment_count': int,
2478 'channel_is_verified': True,
2479 'heatmap': 'count:100',
2480 }
2481 },
2482 { # continuous livestream. Microformat upload date should be preferred.
2483 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2484 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2485 'info_dict': {
2486 'id': 'kgx4WGK0oNU',
2487 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2488 'ext': 'mp4',
2489 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2490 'availability': 'public',
2491 'age_limit': 0,
2492 'release_timestamp': 1637975704,
2493 'upload_date': '20210619',
2494 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2495 'live_status': 'is_live',
2496 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2497 'channel': 'Abao in Tokyo',
2498 'channel_follower_count': int,
2499 'release_date': '20211127',
2500 'tags': 'count:39',
2501 'categories': ['People & Blogs'],
2502 'like_count': int,
2503 'view_count': int,
2504 'playable_in_embed': True,
2505 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2506 'concurrent_view_count': int,
2507 'uploader': 'Abao in Tokyo',
2508 'uploader_url': 'https://www.youtube.com/@abaointokyo',
2509 'uploader_id': '@abaointokyo',
2510 },
2511 'params': {'skip_download': True}
2512 }, {
2513 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2514 'info_dict': {
2515 'id': 'tjjjtzRLHvA',
2516 'ext': 'mp4',
2517 'title': 'ハッシュタグ無し };if window.ytcsi',
2518 'upload_date': '20220323',
2519 'like_count': int,
2520 'availability': 'unlisted',
2521 'channel': 'Lesmiscore',
2522 'thumbnail': r're:^https?://.*\.jpg',
2523 'age_limit': 0,
2524 'categories': ['Music'],
2525 'view_count': int,
2526 'description': '',
2527 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2528 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2529 'live_status': 'not_live',
2530 'playable_in_embed': True,
2531 'channel_follower_count': int,
2532 'duration': 6,
2533 'tags': [],
2534 'uploader_id': '@lesmiscore',
2535 'uploader': 'Lesmiscore',
2536 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2537 }
2538 }, {
2539 # Prefer primary title+description language metadata by default
2540 # Do not prefer translated description if primary is empty
2541 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2542 'info_dict': {
2543 'id': 'el3E4MbxRqQ',
2544 'ext': 'mp4',
2545 'title': 'dlp test video 2 - primary sv no desc',
2546 'description': '',
2547 'channel': 'cole-dlp-test-acc',
2548 'tags': [],
2549 'view_count': int,
2550 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2551 'like_count': int,
2552 'playable_in_embed': True,
2553 'availability': 'unlisted',
2554 'thumbnail': r're:^https?://.*\.jpg',
2555 'age_limit': 0,
2556 'duration': 5,
2557 'live_status': 'not_live',
2558 'upload_date': '20220908',
2559 'categories': ['People & Blogs'],
2560 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2561 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2562 'uploader_id': '@coletdjnz',
2563 'uploader': 'cole-dlp-test-acc',
2564 },
2565 'params': {'skip_download': True}
2566 }, {
2567 # Extractor argument: prefer translated title+description
2568 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2569 'info_dict': {
2570 'id': 'gHKT4uU8Zng',
2571 'ext': 'mp4',
2572 'channel': 'cole-dlp-test-acc',
2573 'tags': [],
2574 'duration': 5,
2575 'live_status': 'not_live',
2576 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2577 'upload_date': '20220728',
2578 'view_count': int,
2579 'categories': ['People & Blogs'],
2580 'thumbnail': r're:^https?://.*\.jpg',
2581 'title': 'dlp test video title translated (fr)',
2582 'availability': 'public',
2583 'age_limit': 0,
2584 'description': 'dlp test video description translated (fr)',
2585 'playable_in_embed': True,
2586 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2587 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2588 'uploader_id': '@coletdjnz',
2589 'uploader': 'cole-dlp-test-acc',
2590 },
2591 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2592 'expected_warnings': [r'Preferring "fr" translated fields'],
2593 }, {
2594 'note': '6 channel audio',
2595 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2596 'only_matching': True,
2597 }, {
2598 'note': 'Multiple HLS formats with same itag',
2599 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2600 'info_dict': {
2601 'id': 'kX3nB4PpJko',
2602 'ext': 'mp4',
2603 'categories': ['Entertainment'],
2604 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2605 'live_status': 'not_live',
2606 'duration': 937,
2607 'channel_follower_count': int,
2608 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2609 'title': 'Last To Take Hand Off Jet, Keeps It!',
2610 'channel': 'MrBeast',
2611 'playable_in_embed': True,
2612 'view_count': int,
2613 'upload_date': '20221112',
2614 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2615 'age_limit': 0,
2616 'availability': 'public',
2617 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2618 'like_count': int,
2619 'tags': [],
2620 'uploader': 'MrBeast',
2621 'uploader_url': 'https://www.youtube.com/@MrBeast',
2622 'uploader_id': '@MrBeast',
2623 'comment_count': int,
2624 'channel_is_verified': True,
2625 'heatmap': 'count:100',
2626 },
2627 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2628 }, {
2629 'note': 'Audio formats with Dynamic Range Compression',
2630 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2631 'info_dict': {
2632 'id': 'Tq92D6wQ1mg',
2633 'ext': 'webm',
2634 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2635 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2636 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2637 'channel_follower_count': int,
2638 'description': 'md5:17eccca93a786d51bc67646756894066',
2639 'upload_date': '20191228',
2640 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2641 'playable_in_embed': True,
2642 'like_count': int,
2643 'categories': ['Entertainment'],
2644 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2645 'age_limit': 18,
2646 'channel': 'Projekt Melody',
2647 'view_count': int,
2648 'availability': 'needs_auth',
2649 'comment_count': int,
2650 'live_status': 'not_live',
2651 'duration': 106,
2652 'uploader': 'Projekt Melody',
2653 'uploader_id': '@ProjektMelody',
2654 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2655 },
2656 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2657 },
2658 {
2659 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2660 'info_dict': {
2661 'id': 'qVv6vCqciTM',
2662 'ext': 'mp4',
2663 'age_limit': 0,
2664 'comment_count': int,
2665 'chapters': 'count:13',
2666 'upload_date': '20221223',
2667 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2668 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2669 'like_count': int,
2670 'release_date': '20221223',
2671 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2672 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2673 'view_count': int,
2674 'playable_in_embed': True,
2675 'duration': 4438,
2676 'availability': 'public',
2677 'channel_follower_count': int,
2678 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2679 'categories': ['Entertainment'],
2680 'live_status': 'was_live',
2681 'release_timestamp': 1671793345,
2682 'channel': 'さなちゃんねる',
2683 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2684 'uploader': 'さなちゃんねる',
2685 'uploader_url': 'https://www.youtube.com/@sana_natori',
2686 'uploader_id': '@sana_natori',
2687 'channel_is_verified': True,
2688 'heatmap': 'count:100',
2689 },
2690 },
2691 {
2692 # Fallbacks when webpage and web client is unavailable
2693 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2694 'info_dict': {
2695 'id': 'wSSmNUl9Snw',
2696 'ext': 'mp4',
2697 # 'categories': ['Science & Technology'],
2698 'view_count': int,
2699 'chapters': 'count:2',
2700 'channel': 'Scott Manley',
2701 'like_count': int,
2702 'age_limit': 0,
2703 # 'availability': 'public',
2704 'channel_follower_count': int,
2705 'live_status': 'not_live',
2706 'upload_date': '20170831',
2707 'duration': 682,
2708 'tags': 'count:8',
2709 'uploader_url': 'https://www.youtube.com/@scottmanley',
2710 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2711 'uploader': 'Scott Manley',
2712 'uploader_id': '@scottmanley',
2713 'title': 'The Computer Hack That Saved Apollo 14',
2714 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2715 'thumbnail': r're:^https?://.*\.webp',
2716 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2717 'playable_in_embed': True,
2718 'comment_count': int,
2719 'channel_is_verified': True,
2720 'heatmap': 'count:100',
2721 },
2722 'params': {
2723 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
2724 },
2725 },
2726 ]
2727
2728 _WEBPAGE_TESTS = [
2729 # YouTube <object> embed
2730 {
2731 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2732 'md5': '873c81d308b979f0e23ee7e620b312a3',
2733 'info_dict': {
2734 'id': 'msN87y-iEx0',
2735 'ext': 'mp4',
2736 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2737 'upload_date': '20080526',
2738 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2739 'age_limit': 0,
2740 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2741 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2742 'playable_in_embed': True,
2743 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2744 'like_count': int,
2745 'comment_count': int,
2746 'channel': 'Christopher Sykes',
2747 'live_status': 'not_live',
2748 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2749 'availability': 'public',
2750 'duration': 195,
2751 'view_count': int,
2752 'categories': ['Science & Technology'],
2753 'channel_follower_count': int,
2754 'uploader': 'Christopher Sykes',
2755 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2756 'uploader_id': '@ChristopherSykesDocumentaries',
2757 'heatmap': 'count:100',
2758 },
2759 'params': {
2760 'skip_download': True,
2761 }
2762 },
2763 ]
2764
2765 @classmethod
2766 def suitable(cls, url):
2767 from ..utils import parse_qs
2768
2769 qs = parse_qs(url)
2770 if qs.get('list', [None])[0]:
2771 return False
2772 return super().suitable(url)
2773
2774 def __init__(self, *args, **kwargs):
2775 super().__init__(*args, **kwargs)
2776 self._code_cache = {}
2777 self._player_cache = {}
2778
2779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2780 lock = threading.Lock()
2781 start_time = time.time()
2782 formats = [f for f in formats if f.get('is_from_start')]
2783
2784 def refetch_manifest(format_id, delay):
2785 nonlocal formats, start_time, is_live
2786 if time.time() <= start_time + delay:
2787 return
2788
2789 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2790 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2791 microformats = traverse_obj(
2792 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2793 expected_type=dict)
2794 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2795 is_live = live_status == 'is_live'
2796 start_time = time.time()
2797
2798 def mpd_feed(format_id, delay):
2799 """
2800 @returns (manifest_url, manifest_stream_number, is_live) or None
2801 """
2802 for retry in self.RetryManager(fatal=False):
2803 with lock:
2804 refetch_manifest(format_id, delay)
2805
2806 f = next((f for f in formats if f['format_id'] == format_id), None)
2807 if not f:
2808 if not is_live:
2809 retry.error = f'{video_id}: Video is no longer live'
2810 else:
2811 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2812 continue
2813 return f['manifest_url'], f['manifest_stream_number'], is_live
2814 return None
2815
2816 for f in formats:
2817 f['is_live'] = is_live
2818 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2819 live_start_time, mpd_feed, not is_live and f.copy())
2820 if is_live:
2821 f['fragments'] = gen
2822 f['protocol'] = 'http_dash_segments_generator'
2823 else:
2824 f['fragments'] = LazyList(gen({}))
2825 del f['is_from_start']
2826
2827 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2828 FETCH_SPAN, MAX_DURATION = 5, 432000
2829
2830 mpd_url, stream_number, is_live = None, None, True
2831
2832 begin_index = 0
2833 download_start_time = ctx.get('start') or time.time()
2834
2835 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2836 if lack_early_segments:
2837 self.report_warning(bug_reports_message(
2838 'Starting download from the last 120 hours of the live stream since '
2839 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2840 lack_early_segments = True
2841
2842 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2843 fragments, fragment_base_url = None, None
2844
2845 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2846 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2847 # Obtain from MPD's maximum seq value
2848 old_mpd_url = mpd_url
2849 last_error = ctx.pop('last_error', None)
2850 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2851 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2852 or (mpd_url, stream_number, False))
2853 if not refresh_sequence:
2854 if expire_fast and not is_live:
2855 return False, last_seq
2856 elif old_mpd_url == mpd_url:
2857 return True, last_seq
2858 if manifestless_orig_fmt:
2859 fmt_info = manifestless_orig_fmt
2860 else:
2861 try:
2862 fmts, _ = self._extract_mpd_formats_and_subtitles(
2863 mpd_url, None, note=False, errnote=False, fatal=False)
2864 except ExtractorError:
2865 fmts = None
2866 if not fmts:
2867 no_fragment_score += 2
2868 return False, last_seq
2869 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2870 fragments = fmt_info['fragments']
2871 fragment_base_url = fmt_info['fragment_base_url']
2872 assert fragment_base_url
2873
2874 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2875 return True, _last_seq
2876
2877 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2878 while is_live:
2879 fetch_time = time.time()
2880 if no_fragment_score > 30:
2881 return
2882 if last_segment_url:
2883 # Obtain from "X-Head-Seqnum" header value from each segment
2884 try:
2885 urlh = self._request_webpage(
2886 last_segment_url, None, note=False, errnote=False, fatal=False)
2887 except ExtractorError:
2888 urlh = None
2889 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2890 if last_seq is None:
2891 no_fragment_score += 2
2892 last_segment_url = None
2893 continue
2894 else:
2895 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2896 no_fragment_score += 2
2897 if not should_continue:
2898 continue
2899
2900 if known_idx > last_seq:
2901 last_segment_url = None
2902 continue
2903
2904 last_seq += 1
2905
2906 if begin_index < 0 and known_idx < 0:
2907 # skip from the start when it's negative value
2908 known_idx = last_seq + begin_index
2909 if lack_early_segments:
2910 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2911 try:
2912 for idx in range(known_idx, last_seq):
2913 # do not update sequence here or you'll get skipped some part of it
2914 should_continue, _ = _extract_sequence_from_mpd(False, False)
2915 if not should_continue:
2916 known_idx = idx - 1
2917 raise ExtractorError('breaking out of outer loop')
2918 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2919 yield {
2920 'url': last_segment_url,
2921 'fragment_count': last_seq,
2922 }
2923 if known_idx == last_seq:
2924 no_fragment_score += 5
2925 else:
2926 no_fragment_score = 0
2927 known_idx = last_seq
2928 except ExtractorError:
2929 continue
2930
2931 if manifestless_orig_fmt:
2932 # Stop at the first iteration if running for post-live manifestless;
2933 # fragment count no longer increase since it starts
2934 break
2935
2936 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2937
2938 def _extract_player_url(self, *ytcfgs, webpage=None):
2939 player_url = traverse_obj(
2940 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2941 get_all=False, expected_type=str)
2942 if not player_url:
2943 return
2944 return urljoin('https://www.youtube.com', player_url)
2945
2946 def _download_player_url(self, video_id, fatal=False):
2947 res = self._download_webpage(
2948 'https://www.youtube.com/iframe_api',
2949 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2950 if res:
2951 player_version = self._search_regex(
2952 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2953 if player_version:
2954 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2955
2956 def _signature_cache_id(self, example_sig):
2957 """ Return a string representation of a signature """
2958 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2959
2960 @classmethod
2961 def _extract_player_info(cls, player_url):
2962 for player_re in cls._PLAYER_INFO_RE:
2963 id_m = re.search(player_re, player_url)
2964 if id_m:
2965 break
2966 else:
2967 raise ExtractorError('Cannot identify player %r' % player_url)
2968 return id_m.group('id')
2969
2970 def _load_player(self, video_id, player_url, fatal=True):
2971 player_id = self._extract_player_info(player_url)
2972 if player_id not in self._code_cache:
2973 code = self._download_webpage(
2974 player_url, video_id, fatal=fatal,
2975 note='Downloading player ' + player_id,
2976 errnote='Download of %s failed' % player_url)
2977 if code:
2978 self._code_cache[player_id] = code
2979 return self._code_cache.get(player_id)
2980
2981 def _extract_signature_function(self, video_id, player_url, example_sig):
2982 player_id = self._extract_player_info(player_url)
2983
2984 # Read from filesystem cache
2985 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2986 assert os.path.basename(func_id) == func_id
2987
2988 self.write_debug(f'Extracting signature function {func_id}')
2989 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2990
2991 if not cache_spec:
2992 code = self._load_player(video_id, player_url)
2993 if code:
2994 res = self._parse_sig_js(code)
2995 test_string = ''.join(map(chr, range(len(example_sig))))
2996 cache_spec = [ord(c) for c in res(test_string)]
2997 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2998
2999 return lambda s: ''.join(s[i] for i in cache_spec)
3000
3001 def _print_sig_code(self, func, example_sig):
3002 if not self.get_param('youtube_print_sig_code'):
3003 return
3004
3005 def gen_sig_code(idxs):
3006 def _genslice(start, end, step):
3007 starts = '' if start == 0 else str(start)
3008 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3009 steps = '' if step == 1 else (':%d' % step)
3010 return f's[{starts}{ends}{steps}]'
3011
3012 step = None
3013 # Quelch pyflakes warnings - start will be set when step is set
3014 start = '(Never used)'
3015 for i, prev in zip(idxs[1:], idxs[:-1]):
3016 if step is not None:
3017 if i - prev == step:
3018 continue
3019 yield _genslice(start, prev, step)
3020 step = None
3021 continue
3022 if i - prev in [-1, 1]:
3023 step = i - prev
3024 start = prev
3025 continue
3026 else:
3027 yield 's[%d]' % prev
3028 if step is None:
3029 yield 's[%d]' % i
3030 else:
3031 yield _genslice(start, i, step)
3032
3033 test_string = ''.join(map(chr, range(len(example_sig))))
3034 cache_res = func(test_string)
3035 cache_spec = [ord(c) for c in cache_res]
3036 expr_code = ' + '.join(gen_sig_code(cache_spec))
3037 signature_id_tuple = '(%s)' % (
3038 ', '.join(str(len(p)) for p in example_sig.split('.')))
3039 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
3040 ' return %s\n') % (signature_id_tuple, expr_code)
3041 self.to_screen('Extracted signature function:\n' + code)
3042
3043 def _parse_sig_js(self, jscode):
3044 funcname = self._search_regex(
3045 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3046 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3047 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3048 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3049 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3050 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3051 # Obsolete patterns
3052 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3053 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3054 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3055 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3056 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3057 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3058 jscode, 'Initial JS player signature function name', group='sig')
3059
3060 jsi = JSInterpreter(jscode)
3061 initial_function = jsi.extract_function(funcname)
3062 return lambda s: initial_function([s])
3063
3064 def _cached(self, func, *cache_id):
3065 def inner(*args, **kwargs):
3066 if cache_id not in self._player_cache:
3067 try:
3068 self._player_cache[cache_id] = func(*args, **kwargs)
3069 except ExtractorError as e:
3070 self._player_cache[cache_id] = e
3071 except Exception as e:
3072 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3073
3074 ret = self._player_cache[cache_id]
3075 if isinstance(ret, Exception):
3076 raise ret
3077 return ret
3078 return inner
3079
3080 def _decrypt_signature(self, s, video_id, player_url):
3081 """Turn the encrypted s field into a working signature"""
3082 extract_sig = self._cached(
3083 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3084 func = extract_sig(video_id, player_url, s)
3085 self._print_sig_code(func, s)
3086 return func(s)
3087
3088 def _decrypt_nsig(self, s, video_id, player_url):
3089 """Turn the encrypted n field into a working signature"""
3090 if player_url is None:
3091 raise ExtractorError('Cannot decrypt nsig without player_url')
3092 player_url = urljoin('https://www.youtube.com', player_url)
3093
3094 try:
3095 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3096 except ExtractorError as e:
3097 raise ExtractorError('Unable to extract nsig function code', cause=e)
3098 if self.get_param('youtube_print_sig_code'):
3099 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3100
3101 try:
3102 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3103 ret = extract_nsig(jsi, func_code)(s)
3104 except JSInterpreter.Exception as e:
3105 try:
3106 jsi = PhantomJSwrapper(self, timeout=5000)
3107 except ExtractorError:
3108 raise e
3109 self.report_warning(
3110 f'Native nsig extraction failed: Trying with PhantomJS\n'
3111 f' n = {s} ; player = {player_url}', video_id)
3112 self.write_debug(e, only_once=True)
3113
3114 args, func_body = func_code
3115 ret = jsi.execute(
3116 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3117 video_id=video_id, note='Executing signature code').strip()
3118
3119 self.write_debug(f'Decrypted nsig {s} => {ret}')
3120 return ret
3121
3122 def _extract_n_function_name(self, jscode):
3123 funcname, idx = self._search_regex(
3124 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3125 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3126 if not idx:
3127 return funcname
3128
3129 return json.loads(js_to_json(self._search_regex(
3130 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3131 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3132
3133 def _extract_n_function_code(self, video_id, player_url):
3134 player_id = self._extract_player_info(player_url)
3135 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
3136 jscode = func_code or self._load_player(video_id, player_url)
3137 jsi = JSInterpreter(jscode)
3138
3139 if func_code:
3140 return jsi, player_id, func_code
3141
3142 func_name = self._extract_n_function_name(jscode)
3143
3144 # For redundancy
3145 func_code = self._search_regex(
3146 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3147 # NB: The end of the regex is intentionally kept strict
3148 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3149 jscode, 'nsig function', group=('var', 'code'), default=None)
3150 if func_code:
3151 func_code = ([func_code[0]], func_code[1])
3152 else:
3153 self.write_debug('Extracting nsig function with jsinterp')
3154 func_code = jsi.extract_function_code(func_name)
3155
3156 self.cache.store('youtube-nsig', player_id, func_code)
3157 return jsi, player_id, func_code
3158
3159 def _extract_n_function_from_code(self, jsi, func_code):
3160 func = jsi.extract_function_from_code(*func_code)
3161
3162 def extract_nsig(s):
3163 try:
3164 ret = func([s])
3165 except JSInterpreter.Exception:
3166 raise
3167 except Exception as e:
3168 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3169
3170 if ret.startswith('enhanced_except_'):
3171 raise JSInterpreter.Exception('Signature function returned an exception')
3172 return ret
3173
3174 return extract_nsig
3175
3176 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3177 """
3178 Extract signatureTimestamp (sts)
3179 Required to tell API what sig/player version is in use.
3180 """
3181 sts = None
3182 if isinstance(ytcfg, dict):
3183 sts = int_or_none(ytcfg.get('STS'))
3184
3185 if not sts:
3186 # Attempt to extract from player
3187 if player_url is None:
3188 error_msg = 'Cannot extract signature timestamp without player_url.'
3189 if fatal:
3190 raise ExtractorError(error_msg)
3191 self.report_warning(error_msg)
3192 return
3193 code = self._load_player(video_id, player_url, fatal=fatal)
3194 if code:
3195 sts = int_or_none(self._search_regex(
3196 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3197 'JS player signature timestamp', group='sts', fatal=fatal))
3198 return sts
3199
3200 def _mark_watched(self, video_id, player_responses):
3201 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3202 label = 'fully ' if is_full else ''
3203 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3204 expected_type=url_or_none)
3205 if not url:
3206 self.report_warning(f'Unable to mark {label}watched')
3207 return
3208 parsed_url = urllib.parse.urlparse(url)
3209 qs = urllib.parse.parse_qs(parsed_url.query)
3210
3211 # cpn generation algorithm is reverse engineered from base.js.
3212 # In fact it works even with dummy cpn.
3213 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3214 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3215
3216 # # more consistent results setting it to right before the end
3217 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3218
3219 qs.update({
3220 'ver': ['2'],
3221 'cpn': [cpn],
3222 'cmt': video_length,
3223 'el': 'detailpage', # otherwise defaults to "shorts"
3224 })
3225
3226 if is_full:
3227 # these seem to mark watchtime "history" in the real world
3228 # they're required, so send in a single value
3229 qs.update({
3230 'st': 0,
3231 'et': video_length,
3232 })
3233
3234 url = urllib.parse.urlunparse(
3235 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3236
3237 self._download_webpage(
3238 url, video_id, f'Marking {label}watched',
3239 'Unable to mark watched', fatal=False)
3240
3241 @classmethod
3242 def _extract_from_webpage(cls, url, webpage):
3243 # Invidious Instances
3244 # https://github.com/yt-dlp/yt-dlp/issues/195
3245 # https://github.com/iv-org/invidious/pull/1730
3246 mobj = re.search(
3247 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3248 webpage)
3249 if mobj:
3250 yield cls.url_result(mobj.group('url'), cls)
3251 raise cls.StopExtraction()
3252
3253 yield from super()._extract_from_webpage(url, webpage)
3254
3255 # lazyYT YouTube embed
3256 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3257 yield cls.url_result(unescapeHTML(id_), cls, id_)
3258
3259 # Wordpress "YouTube Video Importer" plugin
3260 for m in re.findall(r'''(?x)<div[^>]+
3261 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3262 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3263 yield cls.url_result(m[-1], cls, m[-1])
3264
3265 @classmethod
3266 def extract_id(cls, url):
3267 video_id = cls.get_temp_id(url)
3268 if not video_id:
3269 raise ExtractorError(f'Invalid URL: {url}')
3270 return video_id
3271
3272 def _extract_chapters_from_json(self, data, duration):
3273 chapter_list = traverse_obj(
3274 data, (
3275 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3276 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3277 ), expected_type=list)
3278
3279 return self._extract_chapters_helper(
3280 chapter_list,
3281 start_function=lambda chapter: float_or_none(
3282 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3283 title_function=lambda chapter: traverse_obj(
3284 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3285 duration=duration)
3286
3287 def _extract_chapters_from_engagement_panel(self, data, duration):
3288 content_list = traverse_obj(
3289 data,
3290 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3291 expected_type=list)
3292 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3293 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3294
3295 return next(filter(None, (
3296 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3297 chapter_time, chapter_title, duration)
3298 for contents in content_list)), [])
3299
3300 def _extract_heatmap(self, data):
3301 return traverse_obj(data, (
3302 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3303 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3304 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3305 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3306 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3307 'value': ('intensityScoreNormalized', {float_or_none}),
3308 })) or None
3309
3310 def _extract_comment(self, comment_renderer, parent=None):
3311 comment_id = comment_renderer.get('commentId')
3312 if not comment_id:
3313 return
3314
3315 info = {
3316 'id': comment_id,
3317 'text': self._get_text(comment_renderer, 'contentText'),
3318 'like_count': self._get_count(comment_renderer, 'voteCount'),
3319 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3320 'author': self._get_text(comment_renderer, 'authorText'),
3321 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3322 'parent': parent or 'root',
3323 }
3324
3325 # Timestamp is an estimate calculated from the current time and time_text
3326 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3327 timestamp = self._parse_time_text(time_text)
3328
3329 info.update({
3330 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3331 '_time_text': time_text,
3332 'timestamp': timestamp,
3333 })
3334
3335 info['author_url'] = urljoin(
3336 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3337 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3338 expected_type=str, get_all=False))
3339
3340 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3341 if author_is_uploader is not None:
3342 info['author_is_uploader'] = author_is_uploader
3343
3344 comment_abr = traverse_obj(
3345 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3346 if comment_abr is not None:
3347 info['is_favorited'] = 'creatorHeart' in comment_abr
3348
3349 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3350 if self._has_badge(badges, BadgeType.VERIFIED):
3351 info['author_is_verified'] = True
3352
3353 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3354 if is_pinned:
3355 info['is_pinned'] = True
3356
3357 return info
3358
3359 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3360
3361 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3362
3363 def extract_header(contents):
3364 _continuation = None
3365 for content in contents:
3366 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3367 expected_comment_count = self._get_count(
3368 comments_header_renderer, 'countText', 'commentsCount')
3369
3370 if expected_comment_count is not None:
3371 tracker['est_total'] = expected_comment_count
3372 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3373 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3374
3375 sort_menu_item = try_get(
3376 comments_header_renderer,
3377 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3378 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3379
3380 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3381 if not _continuation:
3382 continue
3383
3384 sort_text = str_or_none(sort_menu_item.get('title'))
3385 if not sort_text:
3386 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3387 self.to_screen('Sorting comments by %s' % sort_text.lower())
3388 break
3389 return _continuation
3390
3391 def extract_thread(contents):
3392 if not parent:
3393 tracker['current_page_thread'] = 0
3394 for content in contents:
3395 if not parent and tracker['total_parent_comments'] >= max_parents:
3396 yield
3397 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3398 comment_renderer = get_first(
3399 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3400 expected_type=dict, default={})
3401
3402 comment = self._extract_comment(comment_renderer, parent)
3403 if not comment:
3404 continue
3405 comment_id = comment['id']
3406 if comment.get('is_pinned'):
3407 tracker['pinned_comment_ids'].add(comment_id)
3408 # Sometimes YouTube may break and give us infinite looping comments.
3409 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3410 if comment_id in tracker['seen_comment_ids']:
3411 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3412 # Pinned comments may appear a second time in newest first sort
3413 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3414 continue
3415 self.report_warning(
3416 'Detected YouTube comments looping. Stopping comment extraction '
3417 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3418 yield
3419 else:
3420 tracker['seen_comment_ids'].add(comment['id'])
3421
3422 tracker['running_total'] += 1
3423 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3424 yield comment
3425
3426 # Attempt to get the replies
3427 comment_replies_renderer = try_get(
3428 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3429
3430 if comment_replies_renderer:
3431 tracker['current_page_thread'] += 1
3432 comment_entries_iter = self._comment_entries(
3433 comment_replies_renderer, ytcfg, video_id,
3434 parent=comment.get('id'), tracker=tracker)
3435 yield from itertools.islice(comment_entries_iter, min(
3436 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3437
3438 # Keeps track of counts across recursive calls
3439 if not tracker:
3440 tracker = dict(
3441 running_total=0,
3442 est_total=None,
3443 current_page_thread=0,
3444 total_parent_comments=0,
3445 total_reply_comments=0,
3446 seen_comment_ids=set(),
3447 pinned_comment_ids=set()
3448 )
3449
3450 # TODO: Deprecated
3451 # YouTube comments have a max depth of 2
3452 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3453 if max_depth:
3454 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3455 'Set max replies in the max-comments extractor argument instead')
3456 if max_depth == 1 and parent:
3457 return
3458
3459 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3460 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3461
3462 continuation = self._extract_continuation(root_continuation_data)
3463
3464 response = None
3465 is_forced_continuation = False
3466 is_first_continuation = parent is None
3467 if is_first_continuation and not continuation:
3468 # Sometimes you can get comments by generating the continuation yourself,
3469 # even if YouTube initially reports them being disabled - e.g. stories comments.
3470 # Note: if the comment section is actually disabled, YouTube may return a response with
3471 # required check_get_keys missing. So we will disable that check initially in this case.
3472 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3473 is_forced_continuation = True
3474
3475 continuation_items_path = (
3476 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3477 for page_num in itertools.count(0):
3478 if not continuation:
3479 break
3480 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3481 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3482 if page_num == 0:
3483 if is_first_continuation:
3484 note_prefix = 'Downloading comment section API JSON'
3485 else:
3486 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3487 tracker['current_page_thread'], comment_prog_str)
3488 else:
3489 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3490 ' ' if parent else '', ' replies' if parent else '',
3491 page_num, comment_prog_str)
3492
3493 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3494 # Ignore check if YouTube says the comment count is 0.
3495 check_get_keys = None
3496 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3497 check_get_keys = [[*continuation_items_path, ..., (
3498 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
3499 try:
3500 response = self._extract_response(
3501 item_id=None, query=continuation,
3502 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3503 check_get_keys=check_get_keys)
3504 except ExtractorError as e:
3505 # Ignore incomplete data error for replies if retries didn't work.
3506 # This is to allow any other parent comments and comment threads to be downloaded.
3507 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3508 if 'incomplete data' in str(e).lower() and parent:
3509 if self.get_param('ignoreerrors') in (True, 'only_download'):
3510 self.report_warning(
3511 'Received incomplete data for a comment reply thread and retrying did not help. '
3512 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3513 return
3514 else:
3515 raise ExtractorError(
3516 'Incomplete data received for comment reply thread. '
3517 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3518 expected=True)
3519 raise
3520 is_forced_continuation = False
3521 continuation = None
3522 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3523 if is_first_continuation:
3524 continuation = extract_header(continuation_items)
3525 is_first_continuation = False
3526 if continuation:
3527 break
3528 continue
3529
3530 for entry in extract_thread(continuation_items):
3531 if not entry:
3532 return
3533 yield entry
3534 continuation = self._extract_continuation({'contents': continuation_items})
3535 if continuation:
3536 break
3537
3538 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3539 if message and not parent and tracker['running_total'] == 0:
3540 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3541 raise self.CommentsDisabled
3542
3543 @staticmethod
3544 def _generate_comment_continuation(video_id):
3545 """
3546 Generates initial comment section continuation token from given video id
3547 """
3548 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3549 return base64.b64encode(token.encode()).decode()
3550
3551 def _get_comments(self, ytcfg, video_id, contents, webpage):
3552 """Entry for comment extraction"""
3553 def _real_comment_extract(contents):
3554 renderer = next((
3555 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3556 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3557 yield from self._comment_entries(renderer, ytcfg, video_id)
3558
3559 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3560 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3561
3562 @staticmethod
3563 def _get_checkok_params():
3564 return {'contentCheckOk': True, 'racyCheckOk': True}
3565
3566 @classmethod
3567 def _generate_player_context(cls, sts=None):
3568 context = {
3569 'html5Preference': 'HTML5_PREF_WANTS',
3570 }
3571 if sts is not None:
3572 context['signatureTimestamp'] = sts
3573 return {
3574 'playbackContext': {
3575 'contentPlaybackContext': context
3576 },
3577 **cls._get_checkok_params()
3578 }
3579
3580 @staticmethod
3581 def _is_agegated(player_response):
3582 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3583 return True
3584
3585 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3586 AGE_GATE_REASONS = (
3587 'confirm your age', 'age-restricted', 'inappropriate', # reason
3588 'age_verification_required', 'age_check_required', # status
3589 )
3590 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3591
3592 @staticmethod
3593 def _is_unplayable(player_response):
3594 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3595
3596 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3597
3598 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3599 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3600 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3601 headers = self.generate_api_headers(
3602 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3603
3604 yt_query = {
3605 'videoId': video_id,
3606 }
3607 if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
3608 yt_query['params'] = 'CgIIAQ=='
3609
3610 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
3611 if pp_arg:
3612 yt_query['params'] = pp_arg
3613
3614 yt_query.update(self._generate_player_context(sts))
3615 return self._extract_response(
3616 item_id=video_id, ep='player', query=yt_query,
3617 ytcfg=player_ytcfg, headers=headers, fatal=True,
3618 default_client=client,
3619 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3620 ) or None
3621
3622 def _get_requested_clients(self, url, smuggled_data):
3623 requested_clients = []
3624 default = ['ios', 'android', 'web']
3625 allowed_clients = sorted(
3626 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3627 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3628 for client in self._configuration_arg('player_client'):
3629 if client in allowed_clients:
3630 requested_clients.append(client)
3631 elif client == 'default':
3632 requested_clients.extend(default)
3633 elif client == 'all':
3634 requested_clients.extend(allowed_clients)
3635 else:
3636 self.report_warning(f'Skipping unsupported client {client}')
3637 if not requested_clients:
3638 requested_clients = default
3639
3640 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3641 requested_clients.extend(
3642 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3643
3644 return orderedSet(requested_clients)
3645
3646 def _invalid_player_response(self, pr, video_id):
3647 # YouTube may return a different video player response than expected.
3648 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3649 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3650 return pr_id
3651
3652 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3653 initial_pr = None
3654 if webpage:
3655 initial_pr = self._search_json(
3656 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3657
3658 prs = []
3659 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3660 # Android player_response does not have microFormats which are needed for
3661 # extraction of some data. So we return the initial_pr with formats
3662 # stripped out even if not requested by the user
3663 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3664 prs.append({**initial_pr, 'streamingData': None})
3665
3666 all_clients = set(clients)
3667 clients = clients[::-1]
3668
3669 def append_client(*client_names):
3670 """ Append the first client name that exists but not already used """
3671 for client_name in client_names:
3672 actual_client = _split_innertube_client(client_name)[0]
3673 if actual_client in INNERTUBE_CLIENTS:
3674 if actual_client not in all_clients:
3675 clients.append(client_name)
3676 all_clients.add(actual_client)
3677 return
3678
3679 tried_iframe_fallback = False
3680 player_url = None
3681 skipped_clients = {}
3682 while clients:
3683 client, base_client, variant = _split_innertube_client(clients.pop())
3684 player_ytcfg = master_ytcfg if client == 'web' else {}
3685 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3686 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3687
3688 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3689 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3690 if 'js' in self._configuration_arg('player_skip'):
3691 require_js_player = False
3692 player_url = None
3693
3694 if not player_url and not tried_iframe_fallback and require_js_player:
3695 player_url = self._download_player_url(video_id)
3696 tried_iframe_fallback = True
3697
3698 try:
3699 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3700 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3701 except ExtractorError as e:
3702 self.report_warning(e)
3703 continue
3704
3705 if pr_id := self._invalid_player_response(pr, video_id):
3706 skipped_clients[client] = pr_id
3707 elif pr:
3708 # Save client name for introspection later
3709 name = short_client_name(client)
3710 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3711 sd[STREAMING_DATA_CLIENT_NAME] = name
3712 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3713 f[STREAMING_DATA_CLIENT_NAME] = name
3714 prs.append(pr)
3715
3716 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3717 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3718 append_client(f'{base_client}_creator')
3719 elif self._is_agegated(pr):
3720 if variant == 'tv_embedded':
3721 append_client(f'{base_client}_embedded')
3722 elif not variant:
3723 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3724
3725 if skipped_clients:
3726 self.report_warning(
3727 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3728 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3729 if not prs:
3730 raise ExtractorError(
3731 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3732 elif not prs:
3733 raise ExtractorError('Failed to extract any player response')
3734 return prs, player_url
3735
3736 def _needs_live_processing(self, live_status, duration):
3737 if (live_status == 'is_live' and self.get_param('live_from_start')
3738 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3739 return live_status
3740
3741 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3742 CHUNK_SIZE = 10 << 20
3743 itags, stream_ids = collections.defaultdict(set), []
3744 itag_qualities, res_qualities = {}, {0: None}
3745 q = qualities([
3746 # Normally tiny is the smallest video-only formats. But
3747 # audio-only formats with unknown quality may get tagged as tiny
3748 'tiny',
3749 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3750 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3751 ])
3752 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3753 format_types = self._configuration_arg('formats')
3754 all_formats = 'duplicate' in format_types
3755 if self._configuration_arg('include_duplicate_formats'):
3756 all_formats = True
3757 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3758 'Use formats=duplicate extractor argument instead')
3759
3760 def build_fragments(f):
3761 return LazyList({
3762 'url': update_url_query(f['url'], {
3763 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
3764 })
3765 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3766
3767 for fmt in streaming_formats:
3768 if fmt.get('targetDurationSec'):
3769 continue
3770
3771 itag = str_or_none(fmt.get('itag'))
3772 audio_track = fmt.get('audioTrack') or {}
3773 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3774 if not all_formats:
3775 if stream_id in stream_ids:
3776 continue
3777
3778 quality = fmt.get('quality')
3779 height = int_or_none(fmt.get('height'))
3780 if quality == 'tiny' or not quality:
3781 quality = fmt.get('audioQuality', '').lower() or quality
3782 # The 3gp format (17) in android client has a quality of "small",
3783 # but is actually worse than other formats
3784 if itag == '17':
3785 quality = 'tiny'
3786 if quality:
3787 if itag:
3788 itag_qualities[itag] = quality
3789 if height:
3790 res_qualities[height] = quality
3791 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3792 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3793 # number of fragment that would subsequently requested with (`&sq=N`)
3794 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3795 continue
3796
3797 fmt_url = fmt.get('url')
3798 if not fmt_url:
3799 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3800 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3801 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3802 if not all((sc, fmt_url, player_url, encrypted_sig)):
3803 continue
3804 try:
3805 fmt_url += '&%s=%s' % (
3806 traverse_obj(sc, ('sp', -1)) or 'signature',
3807 self._decrypt_signature(encrypted_sig, video_id, player_url)
3808 )
3809 except ExtractorError as e:
3810 self.report_warning('Signature extraction failed: Some formats may be missing',
3811 video_id=video_id, only_once=True)
3812 self.write_debug(e, only_once=True)
3813 continue
3814
3815 query = parse_qs(fmt_url)
3816 throttled = False
3817 if query.get('n'):
3818 try:
3819 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3820 fmt_url = update_url_query(fmt_url, {
3821 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3822 })
3823 except ExtractorError as e:
3824 phantomjs_hint = ''
3825 if isinstance(e, JSInterpreter.Exception):
3826 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3827 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3828 if player_url:
3829 self.report_warning(
3830 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3831 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3832 self.write_debug(e, only_once=True)
3833 else:
3834 self.report_warning(
3835 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3836 video_id=video_id, only_once=True)
3837 throttled = True
3838
3839 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3840 language_preference = (
3841 10 if audio_track.get('audioIsDefault') and 10
3842 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3843 else -1)
3844 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
3845 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3846 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3847 # Make sure to avoid false positives with small duration differences.
3848 # E.g. __2ABJjxzNo, ySuUZEjARPY
3849 is_damaged = try_call(lambda: format_duration < duration // 2)
3850 if is_damaged:
3851 self.report_warning(
3852 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3853
3854 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
3855 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
3856 fps = int_or_none(fmt.get('fps')) or 0
3857 dct = {
3858 'asr': int_or_none(fmt.get('audioSampleRate')),
3859 'filesize': int_or_none(fmt.get('contentLength')),
3860 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3861 'format_note': join_nonempty(
3862 join_nonempty(audio_track.get('displayName'),
3863 language_preference > 0 and ' (default)', delim=''),
3864 name, fmt.get('isDrc') and 'DRC',
3865 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3866 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3867 throttled and 'THROTTLED', is_damaged and 'DAMAGED',
3868 (self.get_param('verbose') or all_formats) and client_name,
3869 delim=', '),
3870 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3871 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
3872 + (100 if 'Premium' in name else 0)),
3873 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
3874 'audio_channels': fmt.get('audioChannels'),
3875 'height': height,
3876 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3877 'has_drm': bool(fmt.get('drmFamilies')),
3878 'tbr': tbr,
3879 'filesize_approx': filesize_from_tbr(tbr, format_duration),
3880 'url': fmt_url,
3881 'width': int_or_none(fmt.get('width')),
3882 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3883 'desc' if language_preference < -1 else '') or None,
3884 'language_preference': language_preference,
3885 # Strictly de-prioritize damaged and 3gp formats
3886 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3887 }
3888 mime_mobj = re.match(
3889 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3890 if mime_mobj:
3891 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3892 dct.update(parse_codecs(mime_mobj.group(2)))
3893 if itag:
3894 itags[itag].add(('https', dct.get('language')))
3895 stream_ids.append(stream_id)
3896 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3897 if single_stream and dct.get('ext'):
3898 dct['container'] = dct['ext'] + '_dash'
3899
3900 if (all_formats or 'dashy' in format_types) and dct['filesize']:
3901 yield {
3902 **dct,
3903 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
3904 'protocol': 'http_dash_segments',
3905 'fragments': build_fragments(dct),
3906 }
3907 if all_formats or 'dashy' not in format_types:
3908 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3909 yield dct
3910
3911 needs_live_processing = self._needs_live_processing(live_status, duration)
3912 skip_bad_formats = 'incomplete' not in format_types
3913 if self._configuration_arg('include_incomplete_formats'):
3914 skip_bad_formats = False
3915 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3916 'Use formats=incomplete extractor argument instead')
3917
3918 skip_manifests = set(self._configuration_arg('skip'))
3919 if (not self.get_param('youtube_include_hls_manifest', True)
3920 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3921 or needs_live_processing and skip_bad_formats):
3922 skip_manifests.add('hls')
3923
3924 if not self.get_param('youtube_include_dash_manifest', True):
3925 skip_manifests.add('dash')
3926 if self._configuration_arg('include_live_dash'):
3927 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3928 'Use formats=incomplete extractor argument instead')
3929 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3930 skip_manifests.add('dash')
3931
3932 def process_manifest_format(f, proto, client_name, itag):
3933 key = (proto, f.get('language'))
3934 if not all_formats and key in itags[itag]:
3935 return False
3936 itags[itag].add(key)
3937
3938 if itag and all_formats:
3939 f['format_id'] = f'{itag}-{proto}'
3940 elif any(p != proto for p, _ in itags[itag]):
3941 f['format_id'] = f'{itag}-{proto}'
3942 elif itag:
3943 f['format_id'] = itag
3944
3945 if f.get('source_preference') is None:
3946 f['source_preference'] = -1
3947
3948 if itag in ('616', '235'):
3949 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
3950 f['source_preference'] += 100
3951
3952 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3953 if f['quality'] == -1 and f.get('height'):
3954 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3955 if self.get_param('verbose') or all_formats:
3956 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
3957 if f.get('fps') and f['fps'] <= 1:
3958 del f['fps']
3959
3960 if proto == 'hls' and f.get('has_drm'):
3961 f['has_drm'] = 'maybe'
3962 f['source_preference'] -= 5
3963 return True
3964
3965 subtitles = {}
3966 for sd in streaming_data:
3967 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
3968
3969 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3970 if hls_manifest_url:
3971 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3972 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3973 subtitles = self._merge_subtitles(subs, subtitles)
3974 for f in fmts:
3975 if process_manifest_format(f, 'hls', client_name, self._search_regex(
3976 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3977 yield f
3978
3979 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3980 if dash_manifest_url:
3981 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3982 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3983 for f in formats:
3984 if process_manifest_format(f, 'dash', client_name, f['format_id']):
3985 f['filesize'] = int_or_none(self._search_regex(
3986 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3987 if needs_live_processing:
3988 f['is_from_start'] = True
3989
3990 yield f
3991 yield subtitles
3992
3993 def _extract_storyboard(self, player_responses, duration):
3994 spec = get_first(
3995 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3996 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3997 if not base_url:
3998 return
3999 L = len(spec) - 1
4000 for i, args in enumerate(spec):
4001 args = args.split('#')
4002 counts = list(map(int_or_none, args[:5]))
4003 if len(args) != 8 or not all(counts):
4004 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4005 continue
4006 width, height, frame_count, cols, rows = counts
4007 N, sigh = args[6:]
4008
4009 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4010 fragment_count = frame_count / (cols * rows)
4011 fragment_duration = duration / fragment_count
4012 yield {
4013 'format_id': f'sb{i}',
4014 'format_note': 'storyboard',
4015 'ext': 'mhtml',
4016 'protocol': 'mhtml',
4017 'acodec': 'none',
4018 'vcodec': 'none',
4019 'url': url,
4020 'width': width,
4021 'height': height,
4022 'fps': frame_count / duration,
4023 'rows': rows,
4024 'columns': cols,
4025 'fragments': [{
4026 'url': url.replace('$M', str(j)),
4027 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4028 } for j in range(math.ceil(fragment_count))],
4029 }
4030
4031 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4032 webpage = None
4033 if 'webpage' not in self._configuration_arg('player_skip'):
4034 query = {'bpctr': '9999999999', 'has_verified': '1'}
4035 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4036 if pp:
4037 query['pp'] = pp
4038 webpage = self._download_webpage(
4039 webpage_url, video_id, fatal=False, query=query)
4040
4041 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4042
4043 player_responses, player_url = self._extract_player_responses(
4044 self._get_requested_clients(url, smuggled_data),
4045 video_id, webpage, master_ytcfg, smuggled_data)
4046
4047 return webpage, master_ytcfg, player_responses, player_url
4048
4049 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4050 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4051 is_live = get_first(video_details, 'isLive')
4052 if is_live is None:
4053 is_live = get_first(live_broadcast_details, 'isLiveNow')
4054 live_content = get_first(video_details, 'isLiveContent')
4055 is_upcoming = get_first(video_details, 'isUpcoming')
4056 post_live = get_first(video_details, 'isPostLiveDvr')
4057 live_status = ('post_live' if post_live
4058 else 'is_live' if is_live
4059 else 'is_upcoming' if is_upcoming
4060 else 'was_live' if live_content
4061 else 'not_live' if False in (is_live, live_content)
4062 else None)
4063 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4064 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4065 if all(f.get('has_drm') for f in formats):
4066 # If there are no formats that definitely don't have DRM, all have DRM
4067 for f in formats:
4068 f['has_drm'] = True
4069
4070 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4071
4072 def _real_extract(self, url):
4073 url, smuggled_data = unsmuggle_url(url, {})
4074 video_id = self._match_id(url)
4075
4076 base_url = self.http_scheme() + '//www.youtube.com/'
4077 webpage_url = base_url + 'watch?v=' + video_id
4078
4079 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4080
4081 playability_statuses = traverse_obj(
4082 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4083
4084 trailer_video_id = get_first(
4085 playability_statuses,
4086 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4087 expected_type=str)
4088 if trailer_video_id:
4089 return self.url_result(
4090 trailer_video_id, self.ie_key(), trailer_video_id)
4091
4092 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4093 if webpage else (lambda x: None))
4094
4095 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4096 microformats = traverse_obj(
4097 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4098 expected_type=dict)
4099
4100 translated_title = self._get_text(microformats, (..., 'title'))
4101 video_title = (self._preferred_lang and translated_title
4102 or get_first(video_details, 'title') # primary
4103 or translated_title
4104 or search_meta(['og:title', 'twitter:title', 'title']))
4105 translated_description = self._get_text(microformats, (..., 'description'))
4106 original_description = get_first(video_details, 'shortDescription')
4107 video_description = (
4108 self._preferred_lang and translated_description
4109 # If original description is blank, it will be an empty string.
4110 # Do not prefer translated description in this case.
4111 or original_description if original_description is not None else translated_description)
4112
4113 multifeed_metadata_list = get_first(
4114 player_responses,
4115 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4116 expected_type=str)
4117 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4118 if self.get_param('noplaylist'):
4119 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4120 else:
4121 entries = []
4122 feed_ids = []
4123 for feed in multifeed_metadata_list.split(','):
4124 # Unquote should take place before split on comma (,) since textual
4125 # fields may contain comma as well (see
4126 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4127 feed_data = urllib.parse.parse_qs(
4128 urllib.parse.unquote_plus(feed))
4129
4130 def feed_entry(name):
4131 return try_get(
4132 feed_data, lambda x: x[name][0], str)
4133
4134 feed_id = feed_entry('id')
4135 if not feed_id:
4136 continue
4137 feed_title = feed_entry('title')
4138 title = video_title
4139 if feed_title:
4140 title += ' (%s)' % feed_title
4141 entries.append({
4142 '_type': 'url_transparent',
4143 'ie_key': 'Youtube',
4144 'url': smuggle_url(
4145 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
4146 {'force_singlefeed': True}),
4147 'title': title,
4148 })
4149 feed_ids.append(feed_id)
4150 self.to_screen(
4151 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
4152 % (', '.join(feed_ids), video_id))
4153 return self.playlist_result(
4154 entries, video_id, video_title, video_description)
4155
4156 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4157 or int_or_none(get_first(microformats, 'lengthSeconds'))
4158 or parse_duration(search_meta('duration')) or None)
4159
4160 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4161 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4162 if live_status == 'post_live':
4163 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4164
4165 if not formats:
4166 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4167 self.report_drm(video_id)
4168 pemr = get_first(
4169 playability_statuses,
4170 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4171 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4172 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4173 if subreason:
4174 if subreason == 'The uploader has not made this video available in your country.':
4175 countries = get_first(microformats, 'availableCountries')
4176 if not countries:
4177 regions_allowed = search_meta('regionsAllowed')
4178 countries = regions_allowed.split(',') if regions_allowed else None
4179 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4180 reason += f'. {subreason}'
4181 if reason:
4182 self.raise_no_formats(reason, expected=True)
4183
4184 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4185 if not keywords and webpage:
4186 keywords = [
4187 unescapeHTML(m.group('content'))
4188 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4189 for keyword in keywords:
4190 if keyword.startswith('yt:stretch='):
4191 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4192 if mobj:
4193 # NB: float is intentional for forcing float division
4194 w, h = (float(v) for v in mobj.groups())
4195 if w > 0 and h > 0:
4196 ratio = w / h
4197 for f in formats:
4198 if f.get('vcodec') != 'none':
4199 f['stretched_ratio'] = ratio
4200 break
4201 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4202 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4203 if thumbnail_url:
4204 thumbnails.append({
4205 'url': thumbnail_url,
4206 })
4207 original_thumbnails = thumbnails.copy()
4208
4209 # The best resolution thumbnails sometimes does not appear in the webpage
4210 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4211 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4212 thumbnail_names = [
4213 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4214 # in resolution, these are not the custom thumbnail. So de-prioritize them
4215 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4216 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
4217 ]
4218 n_thumbnail_names = len(thumbnail_names)
4219 thumbnails.extend({
4220 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4221 video_id=video_id, name=name, ext=ext,
4222 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4223 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4224 for thumb in thumbnails:
4225 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4226 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4227 self._remove_duplicate_formats(thumbnails)
4228 self._downloader._sort_thumbnails(original_thumbnails)
4229
4230 category = get_first(microformats, 'category') or search_meta('genre')
4231 channel_id = self.ucid_or_none(str_or_none(
4232 get_first(video_details, 'channelId')
4233 or get_first(microformats, 'externalChannelId')
4234 or search_meta('channelId')))
4235 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4236
4237 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4238 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4239 if not duration and live_end_time and live_start_time:
4240 duration = live_end_time - live_start_time
4241
4242 needs_live_processing = self._needs_live_processing(live_status, duration)
4243
4244 def is_bad_format(fmt):
4245 if needs_live_processing and not fmt.get('is_from_start'):
4246 return True
4247 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4248 and fmt.get('protocol') == 'http_dash_segments'):
4249 return True
4250
4251 for fmt in filter(is_bad_format, formats):
4252 fmt['preference'] = (fmt.get('preference') or -1) - 10
4253 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4254
4255 if needs_live_processing:
4256 self._prepare_live_from_start_formats(
4257 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4258
4259 formats.extend(self._extract_storyboard(player_responses, duration))
4260
4261 channel_handle = self.handle_from_url(owner_profile_url)
4262
4263 info = {
4264 'id': video_id,
4265 'title': video_title,
4266 'formats': formats,
4267 'thumbnails': thumbnails,
4268 # The best thumbnail that we are sure exists. Prevents unnecessary
4269 # URL checking if user don't care about getting the best possible thumbnail
4270 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4271 'description': video_description,
4272 'channel_id': channel_id,
4273 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4274 'duration': duration,
4275 'view_count': int_or_none(
4276 get_first((video_details, microformats), (..., 'viewCount'))
4277 or search_meta('interactionCount')),
4278 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4279 'age_limit': 18 if (
4280 get_first(microformats, 'isFamilySafe') is False
4281 or search_meta('isFamilyFriendly') == 'false'
4282 or search_meta('og:restrictions:age') == '18+') else 0,
4283 'webpage_url': webpage_url,
4284 'categories': [category] if category else None,
4285 'tags': keywords,
4286 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4287 'live_status': live_status,
4288 'release_timestamp': live_start_time,
4289 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4290 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
4291 }
4292
4293 subtitles = {}
4294 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4295 if pctr:
4296 def get_lang_code(track):
4297 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4298 or track.get('languageCode'))
4299
4300 # Converted into dicts to remove duplicates
4301 captions = {
4302 get_lang_code(sub): sub
4303 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4304 translation_languages = {
4305 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4306 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4307
4308 def process_language(container, base_url, lang_code, sub_name, query):
4309 lang_subs = container.setdefault(lang_code, [])
4310 for fmt in self._SUBTITLE_FORMATS:
4311 query.update({
4312 'fmt': fmt,
4313 })
4314 lang_subs.append({
4315 'ext': fmt,
4316 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4317 'name': sub_name,
4318 })
4319
4320 # NB: Constructing the full subtitle dictionary is slow
4321 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4322 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4323 for lang_code, caption_track in captions.items():
4324 base_url = caption_track.get('baseUrl')
4325 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4326 if not base_url:
4327 continue
4328 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4329 if caption_track.get('kind') != 'asr':
4330 if not lang_code:
4331 continue
4332 process_language(
4333 subtitles, base_url, lang_code, lang_name, {})
4334 if not caption_track.get('isTranslatable'):
4335 continue
4336 for trans_code, trans_name in translation_languages.items():
4337 if not trans_code:
4338 continue
4339 orig_trans_code = trans_code
4340 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4341 if not get_translated_subs:
4342 continue
4343 trans_code += f'-{lang_code}'
4344 trans_name += format_field(lang_name, None, ' from %s')
4345 if lang_code == f'a-{orig_trans_code}':
4346 # Set audio language based on original subtitles
4347 for f in formats:
4348 if f.get('acodec') != 'none' and not f.get('language'):
4349 f['language'] = orig_trans_code
4350 # Add an "-orig" label to the original language so that it can be distinguished.
4351 # The subs are returned without "-orig" as well for compatibility
4352 process_language(
4353 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4354 # Setting tlang=lang returns damaged subtitles.
4355 process_language(automatic_captions, base_url, trans_code, trans_name,
4356 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4357
4358 info['automatic_captions'] = automatic_captions
4359 info['subtitles'] = subtitles
4360
4361 parsed_url = urllib.parse.urlparse(url)
4362 for component in [parsed_url.fragment, parsed_url.query]:
4363 query = urllib.parse.parse_qs(component)
4364 for k, v in query.items():
4365 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4366 d_k += '_time'
4367 if d_k not in info and k in s_ks:
4368 info[d_k] = parse_duration(query[k][0])
4369
4370 # Youtube Music Auto-generated description
4371 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4372 # XXX: Causes catastrophic backtracking if description has "·"
4373 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4374 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4375 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4376 mobj = re.search(
4377 r'''(?xs)
4378 (?=(?P<track>[^\n·]+))(?P=track)·
4379 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4380 (?=(?P<album>[^\n]+))(?P=album)\n
4381 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4382 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4383 (.+?\nArtist\s*:\s*
4384 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4385 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4386 ''', video_description)
4387 if mobj:
4388 release_year = mobj.group('release_year')
4389 release_date = mobj.group('release_date')
4390 if release_date:
4391 release_date = release_date.replace('-', '')
4392 if not release_year:
4393 release_year = release_date[:4]
4394 info.update({
4395 'album': mobj.group('album'.strip()),
4396 'artists': ([a] if (a := mobj.group('clean_artist'))
4397 else [a.strip() for a in mobj.group('artist').split('·')]),
4398 'track': mobj.group('track').strip(),
4399 'release_date': release_date,
4400 'release_year': int_or_none(release_year),
4401 })
4402
4403 initial_data = None
4404 if webpage:
4405 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4406 if not traverse_obj(initial_data, 'contents'):
4407 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4408 initial_data = None
4409 if not initial_data:
4410 query = {'videoId': video_id}
4411 query.update(self._get_checkok_params())
4412 initial_data = self._extract_response(
4413 item_id=video_id, ep='next', fatal=False,
4414 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4415 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4416 note='Downloading initial data API JSON')
4417
4418 info['comment_count'] = traverse_obj(initial_data, (
4419 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4420 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
4421 ), (
4422 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4423 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
4424 ), expected_type=self._get_count, get_all=False)
4425
4426 try: # This will error if there is no livechat
4427 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4428 except (KeyError, IndexError, TypeError):
4429 pass
4430 else:
4431 info.setdefault('subtitles', {})['live_chat'] = [{
4432 # url is needed to set cookies
4433 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4434 'video_id': video_id,
4435 'ext': 'json',
4436 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4437 else 'youtube_live_chat_replay'),
4438 }]
4439
4440 if initial_data:
4441 info['chapters'] = (
4442 self._extract_chapters_from_json(initial_data, duration)
4443 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4444 or self._extract_chapters_from_description(video_description, duration)
4445 or None)
4446
4447 info['heatmap'] = self._extract_heatmap(initial_data)
4448
4449 contents = traverse_obj(
4450 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4451 expected_type=list, default=[])
4452
4453 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4454 if vpir:
4455 stl = vpir.get('superTitleLink')
4456 if stl:
4457 stl = self._get_text(stl)
4458 if try_get(
4459 vpir,
4460 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4461 info['location'] = stl
4462 else:
4463 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4464 if mobj:
4465 info.update({
4466 'series': mobj.group(1),
4467 'season_number': int(mobj.group(2)),
4468 'episode_number': int(mobj.group(3)),
4469 })
4470 for tlb in (try_get(
4471 vpir,
4472 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4473 list) or []):
4474 tbrs = variadic(
4475 traverse_obj(
4476 tlb, ('toggleButtonRenderer', ...),
4477 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4478 for tbr in tbrs:
4479 for getter, regex in [(
4480 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4481 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4482 lambda x: x['accessibility'],
4483 lambda x: x['accessibilityData']['accessibilityData'],
4484 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4485 label = (try_get(tbr, getter, dict) or {}).get('label')
4486 if label:
4487 mobj = re.match(regex, label)
4488 if mobj:
4489 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4490 break
4491
4492 info['like_count'] = traverse_obj(vpir, (
4493 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4494 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4495 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4496 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4497
4498 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4499 if vcr:
4500 vc = self._get_count(vcr, 'viewCount')
4501 # Upcoming premieres with waiting count are treated as live here
4502 if vcr.get('isLive'):
4503 info['concurrent_view_count'] = vc
4504 elif info.get('view_count') is None:
4505 info['view_count'] = vc
4506
4507 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4508 if vsir:
4509 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4510 info.update({
4511 'channel': self._get_text(vor, 'title'),
4512 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4513
4514 if not channel_handle:
4515 channel_handle = self.handle_from_url(
4516 traverse_obj(vor, (
4517 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4518 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4519 {str}), get_all=False))
4520
4521 rows = try_get(
4522 vsir,
4523 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4524 list) or []
4525 multiple_songs = False
4526 for row in rows:
4527 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4528 multiple_songs = True
4529 break
4530 for row in rows:
4531 mrr = row.get('metadataRowRenderer') or {}
4532 mrr_title = mrr.get('title')
4533 if not mrr_title:
4534 continue
4535 mrr_title = self._get_text(mrr, 'title')
4536 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4537 if mrr_title == 'License':
4538 info['license'] = mrr_contents_text
4539 elif not multiple_songs:
4540 if mrr_title == 'Album':
4541 info['album'] = mrr_contents_text
4542 elif mrr_title == 'Artist':
4543 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
4544 elif mrr_title == 'Song':
4545 info['track'] = mrr_contents_text
4546 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4547 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4548 info['channel_is_verified'] = True
4549
4550 info.update({
4551 'uploader': info.get('channel'),
4552 'uploader_id': channel_handle,
4553 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4554 })
4555 # The upload date for scheduled, live and past live streams / premieres in microformats
4556 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4557 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4558 upload_date = (
4559 unified_strdate(get_first(microformats, 'uploadDate'))
4560 or unified_strdate(search_meta('uploadDate')))
4561 if not upload_date or (
4562 live_status in ('not_live', None)
4563 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4564 ):
4565 upload_date = strftime_or_none(
4566 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4567 info['upload_date'] = upload_date
4568
4569 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4570 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4571 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
4572 if upload_datetime >= datetime_from_str('today-2days'):
4573 for fmt in info['formats']:
4574 if fmt.get('protocol') == 'm3u8_native':
4575 fmt['__needs_testing'] = True
4576
4577 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
4578 v = info.get(s_k)
4579 if v:
4580 info[d_k] = v
4581
4582 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4583
4584 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4585 or get_first(video_details, 'isPrivate', expected_type=bool))
4586
4587 info['availability'] = (
4588 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4589 else self._availability(
4590 is_private=is_private,
4591 needs_premium=(
4592 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4593 or False if initial_data and is_private is not None else None),
4594 needs_subscription=(
4595 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4596 or False if initial_data and is_private is not None else None),
4597 needs_auth=info['age_limit'] >= 18,
4598 is_unlisted=None if is_private is None else (
4599 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4600 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4601
4602 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4603
4604 self.mark_watched(video_id, player_responses)
4605
4606 return info
4607
4608
4609 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4610 @staticmethod
4611 def passthrough_smuggled_data(func):
4612 def _smuggle(info, smuggled_data):
4613 if info.get('_type') not in ('url', 'url_transparent'):
4614 return info
4615 if smuggled_data.get('is_music_url'):
4616 parsed_url = urllib.parse.urlparse(info['url'])
4617 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4618 smuggled_data.pop('is_music_url')
4619 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4620 if smuggled_data:
4621 info['url'] = smuggle_url(info['url'], smuggled_data)
4622 return info
4623
4624 @functools.wraps(func)
4625 def wrapper(self, url):
4626 url, smuggled_data = unsmuggle_url(url, {})
4627 if self.is_music_url(url):
4628 smuggled_data['is_music_url'] = True
4629 info_dict = func(self, url, smuggled_data)
4630 if smuggled_data:
4631 _smuggle(info_dict, smuggled_data)
4632 if info_dict.get('entries'):
4633 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4634 return info_dict
4635 return wrapper
4636
4637 @staticmethod
4638 def _extract_basic_item_renderer(item):
4639 # Modified from _extract_grid_item_renderer
4640 known_basic_renderers = (
4641 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4642 )
4643 for key, renderer in item.items():
4644 if not isinstance(renderer, dict):
4645 continue
4646 elif key in known_basic_renderers:
4647 return renderer
4648 elif key.startswith('grid') and key.endswith('Renderer'):
4649 return renderer
4650
4651 def _extract_channel_renderer(self, renderer):
4652 channel_id = self.ucid_or_none(renderer['channelId'])
4653 title = self._get_text(renderer, 'title')
4654 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4655 channel_handle = self.handle_from_url(
4656 traverse_obj(renderer, (
4657 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4658 ('browseEndpoint', 'canonicalBaseUrl')),
4659 {str}), get_all=False))
4660 if not channel_handle:
4661 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4662 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4663 return {
4664 '_type': 'url',
4665 'url': channel_url,
4666 'id': channel_id,
4667 'ie_key': YoutubeTabIE.ie_key(),
4668 'channel': title,
4669 'uploader': title,
4670 'channel_id': channel_id,
4671 'channel_url': channel_url,
4672 'title': title,
4673 'uploader_id': channel_handle,
4674 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4675 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4676 # However, in feed/channels this is set correctly to the subscriber count
4677 'channel_follower_count': traverse_obj(
4678 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4679 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4680 'playlist_count': (
4681 # videoCountText may be the subscriber count
4682 self._get_count(renderer, 'videoCountText')
4683 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4684 'description': self._get_text(renderer, 'descriptionSnippet'),
4685 'channel_is_verified': True if self._has_badge(
4686 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4687 }
4688
4689 def _grid_entries(self, grid_renderer):
4690 for item in grid_renderer['items']:
4691 if not isinstance(item, dict):
4692 continue
4693 renderer = self._extract_basic_item_renderer(item)
4694 if not isinstance(renderer, dict):
4695 continue
4696 title = self._get_text(renderer, 'title')
4697
4698 # playlist
4699 playlist_id = renderer.get('playlistId')
4700 if playlist_id:
4701 yield self.url_result(
4702 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4703 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4704 video_title=title)
4705 continue
4706 # video
4707 video_id = renderer.get('videoId')
4708 if video_id:
4709 yield self._extract_video(renderer)
4710 continue
4711 # channel
4712 channel_id = renderer.get('channelId')
4713 if channel_id:
4714 yield self._extract_channel_renderer(renderer)
4715 continue
4716 # generic endpoint URL support
4717 ep_url = urljoin('https://www.youtube.com/', try_get(
4718 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4719 str))
4720 if ep_url:
4721 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4722 if ie.suitable(ep_url):
4723 yield self.url_result(
4724 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4725 break
4726
4727 def _music_reponsive_list_entry(self, renderer):
4728 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4729 if video_id:
4730 title = traverse_obj(renderer, (
4731 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4732 'text', 'runs', 0, 'text'))
4733 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4734 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4735 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4736 if playlist_id:
4737 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4738 if video_id:
4739 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4740 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4741 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4742 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4743 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4744 if browse_id:
4745 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4746 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4747
4748 def _shelf_entries_from_content(self, shelf_renderer):
4749 content = shelf_renderer.get('content')
4750 if not isinstance(content, dict):
4751 return
4752 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4753 if renderer:
4754 # TODO: add support for nested playlists so each shelf is processed
4755 # as separate playlist
4756 # TODO: this includes only first N items
4757 yield from self._grid_entries(renderer)
4758 renderer = content.get('horizontalListRenderer')
4759 if renderer:
4760 # TODO
4761 pass
4762
4763 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4764 ep = try_get(
4765 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4766 str)
4767 shelf_url = urljoin('https://www.youtube.com', ep)
4768 if shelf_url:
4769 # Skipping links to another channels, note that checking for
4770 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4771 # will not work
4772 if skip_channels and '/channels?' in shelf_url:
4773 return
4774 title = self._get_text(shelf_renderer, 'title')
4775 yield self.url_result(shelf_url, video_title=title)
4776 # Shelf may not contain shelf URL, fallback to extraction from content
4777 yield from self._shelf_entries_from_content(shelf_renderer)
4778
4779 def _playlist_entries(self, video_list_renderer):
4780 for content in video_list_renderer['contents']:
4781 if not isinstance(content, dict):
4782 continue
4783 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4784 if not isinstance(renderer, dict):
4785 continue
4786 video_id = renderer.get('videoId')
4787 if not video_id:
4788 continue
4789 yield self._extract_video(renderer)
4790
4791 def _rich_entries(self, rich_grid_renderer):
4792 renderer = traverse_obj(
4793 rich_grid_renderer,
4794 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4795 video_id = renderer.get('videoId')
4796 if video_id:
4797 yield self._extract_video(renderer)
4798 return
4799 playlist_id = renderer.get('playlistId')
4800 if playlist_id:
4801 yield self.url_result(
4802 f'https://www.youtube.com/playlist?list={playlist_id}',
4803 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4804 video_title=self._get_text(renderer, 'title'))
4805 return
4806
4807 def _video_entry(self, video_renderer):
4808 video_id = video_renderer.get('videoId')
4809 if video_id:
4810 return self._extract_video(video_renderer)
4811
4812 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4813 url = urljoin('https://youtube.com', traverse_obj(
4814 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4815 if url:
4816 return self.url_result(
4817 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4818
4819 def _post_thread_entries(self, post_thread_renderer):
4820 post_renderer = try_get(
4821 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4822 if not post_renderer:
4823 return
4824 # video attachment
4825 video_renderer = try_get(
4826 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4827 video_id = video_renderer.get('videoId')
4828 if video_id:
4829 entry = self._extract_video(video_renderer)
4830 if entry:
4831 yield entry
4832 # playlist attachment
4833 playlist_id = try_get(
4834 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4835 if playlist_id:
4836 yield self.url_result(
4837 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4838 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4839 # inline video links
4840 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4841 for run in runs:
4842 if not isinstance(run, dict):
4843 continue
4844 ep_url = try_get(
4845 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4846 if not ep_url:
4847 continue
4848 if not YoutubeIE.suitable(ep_url):
4849 continue
4850 ep_video_id = YoutubeIE._match_id(ep_url)
4851 if video_id == ep_video_id:
4852 continue
4853 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4854
4855 def _post_thread_continuation_entries(self, post_thread_continuation):
4856 contents = post_thread_continuation.get('contents')
4857 if not isinstance(contents, list):
4858 return
4859 for content in contents:
4860 renderer = content.get('backstagePostThreadRenderer')
4861 if isinstance(renderer, dict):
4862 yield from self._post_thread_entries(renderer)
4863 continue
4864 renderer = content.get('videoRenderer')
4865 if isinstance(renderer, dict):
4866 yield self._video_entry(renderer)
4867
4868 r''' # unused
4869 def _rich_grid_entries(self, contents):
4870 for content in contents:
4871 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4872 if video_renderer:
4873 entry = self._video_entry(video_renderer)
4874 if entry:
4875 yield entry
4876 '''
4877
4878 def _report_history_entries(self, renderer):
4879 for url in traverse_obj(renderer, (
4880 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4881 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4882 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4883 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4884
4885 def _extract_entries(self, parent_renderer, continuation_list):
4886 # continuation_list is modified in-place with continuation_list = [continuation_token]
4887 continuation_list[:] = [None]
4888 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4889 for content in contents:
4890 if not isinstance(content, dict):
4891 continue
4892 is_renderer = traverse_obj(
4893 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4894 expected_type=dict)
4895 if not is_renderer:
4896 if content.get('richItemRenderer'):
4897 for entry in self._rich_entries(content['richItemRenderer']):
4898 yield entry
4899 continuation_list[0] = self._extract_continuation(parent_renderer)
4900 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4901 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4902 yield from self._report_history_entries(table)
4903 continuation_list[0] = self._extract_continuation(table)
4904 continue
4905
4906 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4907 for isr_content in isr_contents:
4908 if not isinstance(isr_content, dict):
4909 continue
4910
4911 known_renderers = {
4912 'playlistVideoListRenderer': self._playlist_entries,
4913 'gridRenderer': self._grid_entries,
4914 'reelShelfRenderer': self._grid_entries,
4915 'shelfRenderer': self._shelf_entries,
4916 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4917 'backstagePostThreadRenderer': self._post_thread_entries,
4918 'videoRenderer': lambda x: [self._video_entry(x)],
4919 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4920 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4921 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
4922 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
4923 }
4924 for key, renderer in isr_content.items():
4925 if key not in known_renderers:
4926 continue
4927 for entry in known_renderers[key](renderer):
4928 if entry:
4929 yield entry
4930 continuation_list[0] = self._extract_continuation(renderer)
4931 break
4932
4933 if not continuation_list[0]:
4934 continuation_list[0] = self._extract_continuation(is_renderer)
4935
4936 if not continuation_list[0]:
4937 continuation_list[0] = self._extract_continuation(parent_renderer)
4938
4939 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4940 continuation_list = [None]
4941 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4942 tab_content = try_get(tab, lambda x: x['content'], dict)
4943 if not tab_content:
4944 return
4945 parent_renderer = (
4946 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4947 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4948 yield from extract_entries(parent_renderer)
4949 continuation = continuation_list[0]
4950 seen_continuations = set()
4951 for page_num in itertools.count(1):
4952 if not continuation:
4953 break
4954 continuation_token = continuation.get('continuation')
4955 if continuation_token is not None and continuation_token in seen_continuations:
4956 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
4957 break
4958 seen_continuations.add(continuation_token)
4959 headers = self.generate_api_headers(
4960 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4961 response = self._extract_response(
4962 item_id=f'{item_id} page {page_num}',
4963 query=continuation, headers=headers, ytcfg=ytcfg,
4964 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4965
4966 if not response:
4967 break
4968 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4969 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4970 visitor_data = self._extract_visitor_data(response) or visitor_data
4971
4972 known_renderers = {
4973 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4974 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4975 'gridVideoRenderer': (self._grid_entries, 'items'),
4976 'gridChannelRenderer': (self._grid_entries, 'items'),
4977 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4978 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4979 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4980 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4981 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4982 'playlistVideoListContinuation': (self._playlist_entries, None),
4983 'gridContinuation': (self._grid_entries, None),
4984 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4985 'sectionListContinuation': (extract_entries, None), # for feeds
4986 }
4987
4988 continuation_items = traverse_obj(response, (
4989 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4990 'appendContinuationItemsAction', 'continuationItems'
4991 ), 'continuationContents', get_all=False)
4992 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4993
4994 video_items_renderer = None
4995 for key in continuation_item.keys():
4996 if key not in known_renderers:
4997 continue
4998 func, parent_key = known_renderers[key]
4999 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5000 continuation_list = [None]
5001 yield from func(video_items_renderer)
5002 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5003
5004 if not video_items_renderer:
5005 break
5006
5007 @staticmethod
5008 def _extract_selected_tab(tabs, fatal=True):
5009 for tab_renderer in tabs:
5010 if tab_renderer.get('selected'):
5011 return tab_renderer
5012 if fatal:
5013 raise ExtractorError('Unable to find selected tab')
5014
5015 @staticmethod
5016 def _extract_tab_renderers(response):
5017 return traverse_obj(
5018 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5019
5020 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5021 metadata = self._extract_metadata_from_tabs(item_id, data)
5022
5023 selected_tab = self._extract_selected_tab(tabs)
5024 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5025 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5026
5027 return self.playlist_result(
5028 self._entries(
5029 selected_tab, metadata['id'], ytcfg,
5030 self._extract_account_syncid(ytcfg, data),
5031 self._extract_visitor_data(data, ytcfg)),
5032 **metadata)
5033
5034 def _extract_metadata_from_tabs(self, item_id, data):
5035 info = {'id': item_id}
5036
5037 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5038 if metadata_renderer:
5039 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5040 ('channelUrl', {self.ucid_from_url}))
5041 info.update({
5042 'channel': metadata_renderer.get('title'),
5043 'channel_id': channel_id,
5044 })
5045 if info['channel_id']:
5046 info['id'] = info['channel_id']
5047 else:
5048 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5049
5050 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5051 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5052 def _get_uncropped(url):
5053 return url_or_none((url or '').split('=')[0] + '=s0')
5054
5055 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5056 if avatar_thumbnails:
5057 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5058 if uncropped_avatar:
5059 avatar_thumbnails.append({
5060 'url': uncropped_avatar,
5061 'id': 'avatar_uncropped',
5062 'preference': 1
5063 })
5064
5065 channel_banners = self._extract_thumbnails(
5066 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5067 for banner in channel_banners:
5068 banner['preference'] = -10
5069
5070 if channel_banners:
5071 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5072 if uncropped_banner:
5073 channel_banners.append({
5074 'url': uncropped_banner,
5075 'id': 'banner_uncropped',
5076 'preference': -5
5077 })
5078
5079 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5080 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5081 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5082
5083 primary_thumbnails = self._extract_thumbnails(
5084 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5085 playlist_thumbnails = self._extract_thumbnails(
5086 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5087
5088 info.update({
5089 'title': (traverse_obj(metadata_renderer, 'title')
5090 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5091 or info['id']),
5092 'availability': self._extract_availability(data),
5093 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
5094 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5095 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5096 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5097 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5098 })
5099
5100 channel_handle = (
5101 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5102 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5103
5104 if channel_handle:
5105 info.update({
5106 'uploader_id': channel_handle,
5107 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5108 })
5109
5110 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5111 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5112 info['channel_is_verified'] = True
5113 # Playlist stats is a text runs array containing [video count, view count, last updated].
5114 # last updated or (view count and last updated) may be missing.
5115 playlist_stats = get_first(
5116 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5117
5118 last_updated_unix = self._parse_time_text(
5119 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5120 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5121 info['modified_date'] = strftime_or_none(last_updated_unix)
5122
5123 info['view_count'] = self._get_count(playlist_stats, 1)
5124 if info['view_count'] is None: # 0 is allowed
5125 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5126 if info['view_count'] is None:
5127 info['view_count'] = self._get_count(data, (
5128 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5129 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5130
5131 info['playlist_count'] = self._get_count(playlist_stats, 0)
5132 if info['playlist_count'] is None: # 0 is allowed
5133 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5134
5135 if not info.get('channel_id'):
5136 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5137 if not owner: # Deprecated
5138 owner = traverse_obj(
5139 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5140 ('videoOwner', 'videoOwnerRenderer', 'title'))
5141 owner_text = self._get_text(owner)
5142 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5143 info.update({
5144 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5145 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5146 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
5147 })
5148
5149 info.update({
5150 'uploader': info['channel'],
5151 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5152 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5153 })
5154
5155 return info
5156
5157 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5158 first_id = last_id = response = None
5159 for page_num in itertools.count(1):
5160 videos = list(self._playlist_entries(playlist))
5161 if not videos:
5162 return
5163 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5164 if start >= len(videos):
5165 return
5166 yield from videos[start:]
5167 first_id = first_id or videos[0]['id']
5168 last_id = videos[-1]['id']
5169 watch_endpoint = try_get(
5170 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5171 headers = self.generate_api_headers(
5172 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5173 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5174 query = {
5175 'playlistId': playlist_id,
5176 'videoId': watch_endpoint.get('videoId') or last_id,
5177 'index': watch_endpoint.get('index') or len(videos),
5178 'params': watch_endpoint.get('params') or 'OAE%3D'
5179 }
5180 response = self._extract_response(
5181 item_id='%s page %d' % (playlist_id, page_num),
5182 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5183 check_get_keys='contents'
5184 )
5185 playlist = try_get(
5186 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5187
5188 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5189 title = playlist.get('title') or try_get(
5190 data, lambda x: x['titleText']['simpleText'], str)
5191 playlist_id = playlist.get('playlistId') or item_id
5192
5193 # Delegating everything except mix playlists to regular tab-based playlist URL
5194 playlist_url = urljoin(url, try_get(
5195 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5196 str))
5197
5198 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5199 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5200 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5201
5202 if playlist_url and playlist_url != url and not is_known_unviewable:
5203 return self.url_result(
5204 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5205 video_title=title)
5206
5207 return self.playlist_result(
5208 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5209 playlist_id=playlist_id, playlist_title=title)
5210
5211 def _extract_availability(self, data):
5212 """
5213 Gets the availability of a given playlist/tab.
5214 Note: Unless YouTube tells us explicitly, we do not assume it is public
5215 @param data: response
5216 """
5217 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5218 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5219 player_header_privacy = playlist_header_renderer.get('privacy')
5220
5221 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5222
5223 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5224 privacy_setting_icon = get_first(
5225 (playlist_header_renderer, sidebar_renderer),
5226 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5227 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5228 expected_type=str)
5229
5230 microformats_is_unlisted = traverse_obj(
5231 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5232
5233 return (
5234 'public' if (
5235 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5236 or player_header_privacy == 'PUBLIC'
5237 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5238 else self._availability(
5239 is_private=(
5240 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5241 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5242 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5243 is_unlisted=(
5244 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5245 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5246 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5247 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5248 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5249 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5250 needs_auth=False))
5251
5252 @staticmethod
5253 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5254 sidebar_renderer = try_get(
5255 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5256 for item in sidebar_renderer:
5257 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5258 if renderer:
5259 return renderer
5260
5261 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5262 """
5263 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5264 """
5265 is_playlist = bool(traverse_obj(
5266 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5267 if not is_playlist:
5268 return
5269 headers = self.generate_api_headers(
5270 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5271 visitor_data=self._extract_visitor_data(data, ytcfg))
5272 query = {
5273 'params': 'wgYCCAA=',
5274 'browseId': f'VL{item_id}'
5275 }
5276 return self._extract_response(
5277 item_id=item_id, headers=headers, query=query,
5278 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5279 note='Redownloading playlist API JSON with unavailable videos')
5280
5281 @functools.cached_property
5282 def skip_webpage(self):
5283 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5284
5285 def _extract_webpage(self, url, item_id, fatal=True):
5286 webpage, data = None, None
5287 for retry in self.RetryManager(fatal=fatal):
5288 try:
5289 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5290 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5291 except ExtractorError as e:
5292 if isinstance(e.cause, network_exceptions):
5293 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5294 retry.error = e
5295 continue
5296 self._error_or_warning(e, fatal=fatal)
5297 break
5298
5299 try:
5300 self._extract_and_report_alerts(data)
5301 except ExtractorError as e:
5302 self._error_or_warning(e, fatal=fatal)
5303 break
5304
5305 # Sometimes youtube returns a webpage with incomplete ytInitialData
5306 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5307 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5308 retry.error = ExtractorError('Incomplete yt initial data received')
5309 data = None
5310 continue
5311
5312 return webpage, data
5313
5314 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5315 """Use if failed to extract ytcfg (and data) from initial webpage"""
5316 if not ytcfg and self.is_authenticated:
5317 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5318 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5319 raise ExtractorError(
5320 f'{msg}. If you are not downloading private content, or '
5321 'your cookies are only for the first account and channel,'
5322 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5323 expected=True)
5324 self.report_warning(msg, only_once=True)
5325
5326 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5327 data = None
5328 if not self.skip_webpage:
5329 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5330 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5331 # Reject webpage data if redirected to home page without explicitly requesting
5332 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5333 if (url != 'https://www.youtube.com/feed/recommended'
5334 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5335 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5336 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5337 if fatal:
5338 raise ExtractorError(msg, expected=True)
5339 self.report_warning(msg, only_once=True)
5340 if not data:
5341 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5342 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5343 return data, ytcfg
5344
5345 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5346 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5347 resolve_response = self._extract_response(
5348 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5349 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5350 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5351 for ep_key, ep in endpoints.items():
5352 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5353 if params:
5354 return self._extract_response(
5355 item_id=item_id, query=params, ep=ep, headers=headers,
5356 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5357 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5358 err_note = 'Failed to resolve url (does the playlist exist?)'
5359 if fatal:
5360 raise ExtractorError(err_note, expected=True)
5361 self.report_warning(err_note, item_id)
5362
5363 _SEARCH_PARAMS = None
5364
5365 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5366 data = {'query': query}
5367 if params is NO_DEFAULT:
5368 params = self._SEARCH_PARAMS
5369 if params:
5370 data['params'] = params
5371
5372 content_keys = (
5373 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5374 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5375 # ytmusic search
5376 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5377 ('continuationContents', ),
5378 )
5379 display_id = f'query "{query}"'
5380 check_get_keys = tuple({keys[0] for keys in content_keys})
5381 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5382 self._report_playlist_authcheck(ytcfg, fatal=False)
5383
5384 continuation_list = [None]
5385 search = None
5386 for page_num in itertools.count(1):
5387 data.update(continuation_list[0] or {})
5388 headers = self.generate_api_headers(
5389 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5390 search = self._extract_response(
5391 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5392 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5393 slr_contents = traverse_obj(search, *content_keys)
5394 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5395 if not continuation_list[0]:
5396 break
5397
5398
5399 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5400 IE_DESC = 'YouTube Tabs'
5401 _VALID_URL = r'''(?x:
5402 https?://
5403 (?!consent\.)(?:\w+\.)?
5404 (?:
5405 youtube(?:kids)?\.com|
5406 %(invidious)s
5407 )/
5408 (?:
5409 (?P<channel_type>channel|c|user|browse)/|
5410 (?P<not_channel>
5411 feed/|hashtag/|
5412 (?:playlist|watch)\?.*?\blist=
5413 )|
5414 (?!(?:%(reserved_names)s)\b) # Direct URLs
5415 )
5416 (?P<id>[^/?\#&]+)
5417 )''' % {
5418 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5419 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5420 }
5421 IE_NAME = 'youtube:tab'
5422
5423 _TESTS = [{
5424 'note': 'playlists, multipage',
5425 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5426 'playlist_mincount': 94,
5427 'info_dict': {
5428 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5429 'title': 'Igor Kleiner Ph.D. - Playlists',
5430 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5431 'uploader': 'Igor Kleiner Ph.D.',
5432 'uploader_id': '@IgorDataScience',
5433 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5434 'channel': 'Igor Kleiner Ph.D.',
5435 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5436 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5437 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5438 'channel_follower_count': int
5439 },
5440 }, {
5441 'note': 'playlists, multipage, different order',
5442 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5443 'playlist_mincount': 94,
5444 'info_dict': {
5445 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5446 'title': 'Igor Kleiner Ph.D. - Playlists',
5447 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5448 'uploader': 'Igor Kleiner Ph.D.',
5449 'uploader_id': '@IgorDataScience',
5450 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5451 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5452 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5453 'channel': 'Igor Kleiner Ph.D.',
5454 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5455 'channel_follower_count': int
5456 },
5457 }, {
5458 'note': 'playlists, series',
5459 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5460 'playlist_mincount': 5,
5461 'info_dict': {
5462 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5463 'title': '3Blue1Brown - Playlists',
5464 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5465 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5466 'channel': '3Blue1Brown',
5467 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5468 'uploader_id': '@3blue1brown',
5469 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5470 'uploader': '3Blue1Brown',
5471 'tags': ['Mathematics'],
5472 'channel_follower_count': int,
5473 'channel_is_verified': True,
5474 },
5475 }, {
5476 'note': 'playlists, singlepage',
5477 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5478 'playlist_mincount': 4,
5479 'info_dict': {
5480 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5481 'title': 'ThirstForScience - Playlists',
5482 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5483 'uploader': 'ThirstForScience',
5484 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5485 'uploader_id': '@ThirstForScience',
5486 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5487 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5488 'tags': 'count:12',
5489 'channel': 'ThirstForScience',
5490 'channel_follower_count': int
5491 }
5492 }, {
5493 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5494 'only_matching': True,
5495 }, {
5496 'note': 'basic, single video playlist',
5497 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5498 'info_dict': {
5499 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5500 'title': 'youtube-dl public playlist',
5501 'description': '',
5502 'tags': [],
5503 'view_count': int,
5504 'modified_date': '20201130',
5505 'channel': 'Sergey M.',
5506 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5507 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5508 'availability': 'public',
5509 'uploader': 'Sergey M.',
5510 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5511 'uploader_id': '@sergeym.6173',
5512 },
5513 'playlist_count': 1,
5514 }, {
5515 'note': 'empty playlist',
5516 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5517 'info_dict': {
5518 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5519 'title': 'youtube-dl empty playlist',
5520 'tags': [],
5521 'channel': 'Sergey M.',
5522 'description': '',
5523 'modified_date': '20230921',
5524 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5525 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5526 'availability': 'unlisted',
5527 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5528 'uploader_id': '@sergeym.6173',
5529 'uploader': 'Sergey M.',
5530 },
5531 'playlist_count': 0,
5532 }, {
5533 'note': 'Home tab',
5534 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5535 'info_dict': {
5536 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5537 'title': 'lex will - Home',
5538 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5539 'uploader': 'lex will',
5540 'uploader_id': '@lexwill718',
5541 'channel': 'lex will',
5542 'tags': ['bible', 'history', 'prophesy'],
5543 'uploader_url': 'https://www.youtube.com/@lexwill718',
5544 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5545 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5546 'channel_follower_count': int
5547 },
5548 'playlist_mincount': 2,
5549 }, {
5550 'note': 'Videos tab',
5551 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5552 'info_dict': {
5553 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5554 'title': 'lex will - Videos',
5555 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5556 'uploader': 'lex will',
5557 'uploader_id': '@lexwill718',
5558 'tags': ['bible', 'history', 'prophesy'],
5559 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5560 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5561 'uploader_url': 'https://www.youtube.com/@lexwill718',
5562 'channel': 'lex will',
5563 'channel_follower_count': int
5564 },
5565 'playlist_mincount': 975,
5566 }, {
5567 'note': 'Videos tab, sorted by popular',
5568 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5569 'info_dict': {
5570 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5571 'title': 'lex will - Videos',
5572 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5573 'uploader': 'lex will',
5574 'uploader_id': '@lexwill718',
5575 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5576 'uploader_url': 'https://www.youtube.com/@lexwill718',
5577 'channel': 'lex will',
5578 'tags': ['bible', 'history', 'prophesy'],
5579 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5580 'channel_follower_count': int
5581 },
5582 'playlist_mincount': 199,
5583 }, {
5584 'note': 'Playlists tab',
5585 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5586 'info_dict': {
5587 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5588 'title': 'lex will - Playlists',
5589 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5590 'uploader': 'lex will',
5591 'uploader_id': '@lexwill718',
5592 'uploader_url': 'https://www.youtube.com/@lexwill718',
5593 'channel': 'lex will',
5594 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5595 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5596 'tags': ['bible', 'history', 'prophesy'],
5597 'channel_follower_count': int
5598 },
5599 'playlist_mincount': 17,
5600 }, {
5601 'note': 'Community tab',
5602 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5603 'info_dict': {
5604 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5605 'title': 'lex will - Community',
5606 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5607 'channel': 'lex will',
5608 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5609 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5610 'tags': ['bible', 'history', 'prophesy'],
5611 'channel_follower_count': int,
5612 'uploader_url': 'https://www.youtube.com/@lexwill718',
5613 'uploader_id': '@lexwill718',
5614 'uploader': 'lex will',
5615 },
5616 'playlist_mincount': 18,
5617 }, {
5618 'note': 'Channels tab',
5619 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5620 'info_dict': {
5621 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5622 'title': 'lex will - Channels',
5623 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5624 'channel': 'lex will',
5625 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5626 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5627 'tags': ['bible', 'history', 'prophesy'],
5628 'channel_follower_count': int,
5629 'uploader_url': 'https://www.youtube.com/@lexwill718',
5630 'uploader_id': '@lexwill718',
5631 'uploader': 'lex will',
5632 },
5633 'playlist_mincount': 12,
5634 }, {
5635 'note': 'Search tab',
5636 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5637 'playlist_mincount': 40,
5638 'info_dict': {
5639 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5640 'title': '3Blue1Brown - Search - linear algebra',
5641 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5642 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5643 'tags': ['Mathematics'],
5644 'channel': '3Blue1Brown',
5645 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5646 'channel_follower_count': int,
5647 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5648 'uploader_id': '@3blue1brown',
5649 'uploader': '3Blue1Brown',
5650 'channel_is_verified': True,
5651 },
5652 }, {
5653 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5654 'only_matching': True,
5655 }, {
5656 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5657 'only_matching': True,
5658 }, {
5659 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5660 'only_matching': True,
5661 }, {
5662 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5663 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5664 'info_dict': {
5665 'title': '29C3: Not my department',
5666 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5667 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5668 'tags': [],
5669 'view_count': int,
5670 'modified_date': '20150605',
5671 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5672 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5673 'channel': 'Christiaan008',
5674 'availability': 'public',
5675 'uploader_id': '@ChRiStIaAn008',
5676 'uploader': 'Christiaan008',
5677 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5678 },
5679 'playlist_count': 96,
5680 }, {
5681 'note': 'Large playlist',
5682 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5683 'info_dict': {
5684 'title': 'Uploads from Cauchemar',
5685 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5686 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5687 'tags': [],
5688 'modified_date': r're:\d{8}',
5689 'channel': 'Cauchemar',
5690 'view_count': int,
5691 'description': '',
5692 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5693 'availability': 'public',
5694 'uploader_id': '@Cauchemar89',
5695 'uploader': 'Cauchemar',
5696 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5697 },
5698 'playlist_mincount': 1123,
5699 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5700 }, {
5701 'note': 'even larger playlist, 8832 videos',
5702 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5703 'only_matching': True,
5704 }, {
5705 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5706 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5707 'info_dict': {
5708 'title': 'Uploads from Interstellar Movie',
5709 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5710 'tags': [],
5711 'view_count': int,
5712 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5713 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5714 'channel': 'Interstellar Movie',
5715 'description': '',
5716 'modified_date': r're:\d{8}',
5717 'availability': 'public',
5718 'uploader_id': '@InterstellarMovie',
5719 'uploader': 'Interstellar Movie',
5720 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5721 },
5722 'playlist_mincount': 21,
5723 }, {
5724 'note': 'Playlist with "show unavailable videos" button',
5725 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5726 'info_dict': {
5727 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5728 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5729 'view_count': int,
5730 'channel': 'Phim Siêu Nhân Nhật Bản',
5731 'tags': [],
5732 'description': '',
5733 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5734 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5735 'modified_date': r're:\d{8}',
5736 'availability': 'public',
5737 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5738 'uploader_id': '@phimsieunhannhatban',
5739 'uploader': 'Phim Siêu Nhân Nhật Bản',
5740 },
5741 'playlist_mincount': 200,
5742 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5743 }, {
5744 'note': 'Playlist with unavailable videos in page 7',
5745 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5746 'info_dict': {
5747 'title': 'Uploads from BlankTV',
5748 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5749 'channel': 'BlankTV',
5750 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5751 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5752 'view_count': int,
5753 'tags': [],
5754 'modified_date': r're:\d{8}',
5755 'description': '',
5756 'availability': 'public',
5757 'uploader_id': '@blanktv',
5758 'uploader': 'BlankTV',
5759 'uploader_url': 'https://www.youtube.com/@blanktv',
5760 },
5761 'playlist_mincount': 1000,
5762 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5763 }, {
5764 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5765 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5766 'info_dict': {
5767 'title': 'Data Analysis with Dr Mike Pound',
5768 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5769 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5770 'tags': [],
5771 'view_count': int,
5772 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5773 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5774 'channel': 'Computerphile',
5775 'availability': 'public',
5776 'modified_date': '20190712',
5777 'uploader_id': '@Computerphile',
5778 'uploader': 'Computerphile',
5779 'uploader_url': 'https://www.youtube.com/@Computerphile',
5780 },
5781 'playlist_mincount': 11,
5782 }, {
5783 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5784 'only_matching': True,
5785 }, {
5786 'note': 'Playlist URL that does not actually serve a playlist',
5787 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5788 'info_dict': {
5789 'id': 'FqZTN594JQw',
5790 'ext': 'webm',
5791 'title': "Smiley's People 01 detective, Adventure Series, Action",
5792 'upload_date': '20150526',
5793 'license': 'Standard YouTube License',
5794 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5795 'categories': ['People & Blogs'],
5796 'tags': list,
5797 'view_count': int,
5798 'like_count': int,
5799 },
5800 'params': {
5801 'skip_download': True,
5802 },
5803 'skip': 'This video is not available.',
5804 'add_ie': [YoutubeIE.ie_key()],
5805 }, {
5806 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5807 'only_matching': True,
5808 }, {
5809 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5810 'only_matching': True,
5811 }, {
5812 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5813 'info_dict': {
5814 'id': 'hGkQjiJLjWQ', # This will keep changing
5815 'ext': 'mp4',
5816 'title': str,
5817 'upload_date': r're:\d{8}',
5818 'description': str,
5819 'categories': ['News & Politics'],
5820 'tags': list,
5821 'like_count': int,
5822 'release_timestamp': int,
5823 'channel': 'Sky News',
5824 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5825 'age_limit': 0,
5826 'view_count': int,
5827 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5828 'playable_in_embed': True,
5829 'release_date': r're:\d+',
5830 'availability': 'public',
5831 'live_status': 'is_live',
5832 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5833 'channel_follower_count': int,
5834 'concurrent_view_count': int,
5835 'uploader_url': 'https://www.youtube.com/@SkyNews',
5836 'uploader_id': '@SkyNews',
5837 'uploader': 'Sky News',
5838 'channel_is_verified': True,
5839 },
5840 'params': {
5841 'skip_download': True,
5842 },
5843 'expected_warnings': ['Ignoring subtitle tracks found in '],
5844 }, {
5845 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5846 'info_dict': {
5847 'id': 'a48o2S1cPoo',
5848 'ext': 'mp4',
5849 'title': 'The Young Turks - Live Main Show',
5850 'upload_date': '20150715',
5851 'license': 'Standard YouTube License',
5852 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5853 'categories': ['News & Politics'],
5854 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5855 'like_count': int,
5856 },
5857 'params': {
5858 'skip_download': True,
5859 },
5860 'only_matching': True,
5861 }, {
5862 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5863 'only_matching': True,
5864 }, {
5865 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5866 'only_matching': True,
5867 }, {
5868 'note': 'A channel that is not live. Should raise error',
5869 'url': 'https://www.youtube.com/user/numberphile/live',
5870 'only_matching': True,
5871 }, {
5872 'url': 'https://www.youtube.com/feed/trending',
5873 'only_matching': True,
5874 }, {
5875 'url': 'https://www.youtube.com/feed/library',
5876 'only_matching': True,
5877 }, {
5878 'url': 'https://www.youtube.com/feed/history',
5879 'only_matching': True,
5880 }, {
5881 'url': 'https://www.youtube.com/feed/subscriptions',
5882 'only_matching': True,
5883 }, {
5884 'url': 'https://www.youtube.com/feed/watch_later',
5885 'only_matching': True,
5886 }, {
5887 'note': 'Recommended - redirects to home page.',
5888 'url': 'https://www.youtube.com/feed/recommended',
5889 'only_matching': True,
5890 }, {
5891 'note': 'inline playlist with not always working continuations',
5892 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5893 'only_matching': True,
5894 }, {
5895 'url': 'https://www.youtube.com/course',
5896 'only_matching': True,
5897 }, {
5898 'url': 'https://www.youtube.com/zsecurity',
5899 'only_matching': True,
5900 }, {
5901 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5902 'only_matching': True,
5903 }, {
5904 'url': 'https://www.youtube.com/TheYoungTurks/live',
5905 'only_matching': True,
5906 }, {
5907 'url': 'https://www.youtube.com/hashtag/cctv9',
5908 'info_dict': {
5909 'id': 'cctv9',
5910 'title': 'cctv9 - All',
5911 'tags': [],
5912 },
5913 'playlist_mincount': 300, # not consistent but should be over 300
5914 }, {
5915 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5916 'only_matching': True,
5917 }, {
5918 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5919 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5920 'only_matching': True
5921 }, {
5922 'note': '/browse/ should redirect to /channel/',
5923 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5924 'only_matching': True
5925 }, {
5926 'note': 'VLPL, should redirect to playlist?list=PL...',
5927 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5928 'info_dict': {
5929 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5930 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5931 'title': 'NCS : All Releases 💿',
5932 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
5933 'modified_date': r're:\d{8}',
5934 'view_count': int,
5935 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5936 'tags': [],
5937 'channel': 'NoCopyrightSounds',
5938 'availability': 'public',
5939 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
5940 'uploader': 'NoCopyrightSounds',
5941 'uploader_id': '@NoCopyrightSounds',
5942 },
5943 'playlist_mincount': 166,
5944 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
5945 }, {
5946 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
5947 'note': 'Topic, should redirect to playlist?list=UU...',
5948 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5949 'info_dict': {
5950 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5951 'title': 'Uploads from Royalty Free Music - Topic',
5952 'tags': [],
5953 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5954 'channel': 'Royalty Free Music - Topic',
5955 'view_count': int,
5956 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5957 'modified_date': r're:\d{8}',
5958 'description': '',
5959 'availability': 'public',
5960 'uploader': 'Royalty Free Music - Topic',
5961 },
5962 'playlist_mincount': 101,
5963 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
5964 }, {
5965 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5966 # Treat as a general feed
5967 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5968 'info_dict': {
5969 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5970 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5971 'tags': [],
5972 },
5973 'playlist_mincount': 9,
5974 }, {
5975 'note': 'Youtube music Album',
5976 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5977 'info_dict': {
5978 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5979 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5980 'tags': [],
5981 'view_count': int,
5982 'description': '',
5983 'availability': 'unlisted',
5984 'modified_date': r're:\d{8}',
5985 },
5986 'playlist_count': 50,
5987 'expected_warnings': ['YouTube Music is not directly supported'],
5988 }, {
5989 'note': 'unlisted single video playlist',
5990 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5991 'info_dict': {
5992 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5993 'title': 'yt-dlp unlisted playlist test',
5994 'availability': 'unlisted',
5995 'tags': [],
5996 'modified_date': '20220418',
5997 'channel': 'colethedj',
5998 'view_count': int,
5999 'description': '',
6000 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6001 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6002 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6003 'uploader_id': '@colethedj1894',
6004 'uploader': 'colethedj',
6005 },
6006 'playlist': [{
6007 'info_dict': {
6008 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6009 'id': 'BaW_jenozKc',
6010 '_type': 'url',
6011 'ie_key': 'Youtube',
6012 'duration': 10,
6013 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6014 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6015 'view_count': int,
6016 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6017 'channel': 'Philipp Hagemeister',
6018 'uploader_id': '@PhilippHagemeister',
6019 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6020 'uploader': 'Philipp Hagemeister',
6021 }
6022 }],
6023 'playlist_count': 1,
6024 'params': {'extract_flat': True},
6025 }, {
6026 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6027 'url': 'https://www.youtube.com/feed/recommended',
6028 'info_dict': {
6029 'id': 'recommended',
6030 'title': 'recommended',
6031 'tags': [],
6032 },
6033 'playlist_mincount': 50,
6034 'params': {
6035 'skip_download': True,
6036 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6037 },
6038 }, {
6039 'note': 'API Fallback: /videos tab, sorted by oldest first',
6040 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6041 'info_dict': {
6042 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6043 'title': 'Cody\'sLab - Videos',
6044 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6045 'channel': 'Cody\'sLab',
6046 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6047 'tags': [],
6048 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6049 'channel_follower_count': int
6050 },
6051 'playlist_mincount': 650,
6052 'params': {
6053 'skip_download': True,
6054 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6055 },
6056 'skip': 'Query for sorting no longer works',
6057 }, {
6058 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6059 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6060 'info_dict': {
6061 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6062 'title': 'Uploads from Royalty Free Music - Topic',
6063 'modified_date': r're:\d{8}',
6064 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6065 'description': '',
6066 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6067 'tags': [],
6068 'channel': 'Royalty Free Music - Topic',
6069 'view_count': int,
6070 'availability': 'public',
6071 'uploader': 'Royalty Free Music - Topic',
6072 },
6073 'playlist_mincount': 101,
6074 'params': {
6075 'skip_download': True,
6076 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6077 },
6078 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6079 }, {
6080 'note': 'non-standard redirect to regional channel',
6081 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6082 'only_matching': True
6083 }, {
6084 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6085 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6086 'info_dict': {
6087 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6088 'modified_date': '20220407',
6089 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6090 'tags': [],
6091 'availability': 'unlisted',
6092 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6093 'channel': 'pukkandan',
6094 'description': 'Test for collaborative playlist',
6095 'title': 'yt-dlp test - collaborative playlist',
6096 'view_count': int,
6097 'uploader_url': 'https://www.youtube.com/@pukkandan',
6098 'uploader_id': '@pukkandan',
6099 'uploader': 'pukkandan',
6100 },
6101 'playlist_mincount': 2
6102 }, {
6103 'note': 'translated tab name',
6104 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6105 'info_dict': {
6106 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6107 'tags': [],
6108 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6109 'description': 'test description',
6110 'title': 'cole-dlp-test-acc - 再生リスト',
6111 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6112 'channel': 'cole-dlp-test-acc',
6113 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6114 'uploader_id': '@coletdjnz',
6115 'uploader': 'cole-dlp-test-acc',
6116 },
6117 'playlist_mincount': 1,
6118 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6119 'expected_warnings': ['Preferring "ja"'],
6120 }, {
6121 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6122 'note': 'preferred lang set with playlist with translated video titles',
6123 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6124 'info_dict': {
6125 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6126 'tags': [],
6127 'view_count': int,
6128 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6129 'channel': 'cole-dlp-test-acc',
6130 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6131 'description': 'test',
6132 'title': 'dlp test playlist',
6133 'availability': 'public',
6134 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6135 'uploader_id': '@coletdjnz',
6136 'uploader': 'cole-dlp-test-acc',
6137 },
6138 'playlist_mincount': 1,
6139 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6140 'expected_warnings': ['Preferring "ja"'],
6141 }, {
6142 # shorts audio pivot for 2GtVksBMYFM.
6143 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6144 'info_dict': {
6145 'id': 'sfv_audio_pivot',
6146 'title': 'sfv_audio_pivot',
6147 'tags': [],
6148 },
6149 'playlist_mincount': 50,
6150
6151 }, {
6152 # Channel with a real live tab (not to be mistaken with streams tab)
6153 # Do not treat like it should redirect to live stream
6154 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6155 'info_dict': {
6156 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6157 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6158 'tags': [],
6159 },
6160 'playlist_mincount': 20,
6161 }, {
6162 # Tab name is not the same as tab id
6163 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6164 'info_dict': {
6165 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6166 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6167 'tags': [],
6168 },
6169 'playlist_mincount': 8,
6170 }, {
6171 # Home tab id is literally home. Not to get mistaken with featured
6172 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6173 'info_dict': {
6174 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6175 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6176 'tags': [],
6177 },
6178 'playlist_mincount': 8,
6179 }, {
6180 # Should get three playlists for videos, shorts and streams tabs
6181 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6182 'info_dict': {
6183 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6184 'title': 'Polka Ch. 尾丸ポルカ',
6185 'channel_follower_count': int,
6186 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6187 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6188 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6189 'channel': 'Polka Ch. 尾丸ポルカ',
6190 'tags': 'count:35',
6191 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6192 'uploader': 'Polka Ch. 尾丸ポルカ',
6193 'uploader_id': '@OmaruPolka',
6194 'channel_is_verified': True,
6195 },
6196 'playlist_count': 3,
6197 }, {
6198 # Shorts tab with channel with handle
6199 # TODO: fix channel description
6200 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6201 'info_dict': {
6202 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6203 'title': 'Not Just Bikes - Shorts',
6204 'tags': 'count:10',
6205 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6206 'description': 'md5:5e82545b3a041345927a92d0585df247',
6207 'channel_follower_count': int,
6208 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6209 'channel': 'Not Just Bikes',
6210 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6211 'uploader': 'Not Just Bikes',
6212 'uploader_id': '@NotJustBikes',
6213 'channel_is_verified': True,
6214 },
6215 'playlist_mincount': 10,
6216 }, {
6217 # Streams tab
6218 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6219 'info_dict': {
6220 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6221 'title': '中村悠一 - Live',
6222 'tags': 'count:7',
6223 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6224 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6225 'channel': '中村悠一',
6226 'channel_follower_count': int,
6227 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6228 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6229 'uploader_id': '@Yuichi-Nakamura',
6230 'uploader': '中村悠一',
6231 },
6232 'playlist_mincount': 60,
6233 }, {
6234 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6235 # See test_youtube_lists
6236 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6237 'only_matching': True,
6238 }, {
6239 # No uploads and no UCID given. Should fail with no uploads error
6240 # See test_youtube_lists
6241 'url': 'https://www.youtube.com/news',
6242 'only_matching': True
6243 }, {
6244 # No videos tab but has a shorts tab
6245 'url': 'https://www.youtube.com/c/TKFShorts',
6246 'info_dict': {
6247 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6248 'title': 'Shorts Break - Shorts',
6249 'tags': 'count:48',
6250 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6251 'channel': 'Shorts Break',
6252 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6253 'channel_follower_count': int,
6254 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6255 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6256 'uploader': 'Shorts Break',
6257 'uploader_id': '@ShortsBreak_Official',
6258 },
6259 'playlist_mincount': 30,
6260 }, {
6261 # Trending Now Tab. tab id is empty
6262 'url': 'https://www.youtube.com/feed/trending',
6263 'info_dict': {
6264 'id': 'trending',
6265 'title': 'trending - Now',
6266 'tags': [],
6267 },
6268 'playlist_mincount': 30,
6269 }, {
6270 # Trending Gaming Tab. tab id is empty
6271 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6272 'info_dict': {
6273 'id': 'trending',
6274 'title': 'trending - Gaming',
6275 'tags': [],
6276 },
6277 'playlist_mincount': 30,
6278 }, {
6279 # Shorts url result in shorts tab
6280 # TODO: Fix channel id extraction
6281 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6282 'info_dict': {
6283 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6284 'title': 'cole-dlp-test-acc - Shorts',
6285 'channel': 'cole-dlp-test-acc',
6286 'description': 'test description',
6287 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6288 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6289 'tags': [],
6290 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6291 'uploader_id': '@coletdjnz',
6292 'uploader': 'cole-dlp-test-acc',
6293 },
6294 'playlist': [{
6295 'info_dict': {
6296 # Channel data is not currently available for short renderers (as of 2023-03-01)
6297 '_type': 'url',
6298 'ie_key': 'Youtube',
6299 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6300 'id': 'sSM9J5YH_60',
6301 'title': 'SHORT short',
6302 'view_count': int,
6303 'thumbnails': list,
6304 }
6305 }],
6306 'params': {'extract_flat': True},
6307 }, {
6308 # Live video status should be extracted
6309 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6310 'info_dict': {
6311 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6312 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6313 'tags': []
6314 },
6315 'playlist': [{
6316 'info_dict': {
6317 '_type': 'url',
6318 'ie_key': 'Youtube',
6319 'url': 'startswith:https://www.youtube.com/watch?v=',
6320 'id': str,
6321 'title': str,
6322 'live_status': 'is_live',
6323 'channel_id': str,
6324 'channel_url': str,
6325 'concurrent_view_count': int,
6326 'channel': str,
6327 'uploader': str,
6328 'uploader_url': str,
6329 'uploader_id': str,
6330 'channel_is_verified': bool, # this will keep changing
6331 }
6332 }],
6333 'params': {'extract_flat': True, 'playlist_items': '1'},
6334 'playlist_mincount': 1
6335 }, {
6336 # Channel renderer metadata. Contains number of videos on the channel
6337 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6338 'info_dict': {
6339 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6340 'title': 'cole-dlp-test-acc - Channels',
6341 'channel': 'cole-dlp-test-acc',
6342 'description': 'test description',
6343 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6344 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6345 'tags': [],
6346 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6347 'uploader_id': '@coletdjnz',
6348 'uploader': 'cole-dlp-test-acc',
6349 },
6350 'playlist': [{
6351 'info_dict': {
6352 '_type': 'url',
6353 'ie_key': 'YoutubeTab',
6354 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6355 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6356 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6357 'title': 'PewDiePie',
6358 'channel': 'PewDiePie',
6359 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6360 'thumbnails': list,
6361 'channel_follower_count': int,
6362 'playlist_count': int,
6363 'uploader': 'PewDiePie',
6364 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6365 'uploader_id': '@PewDiePie',
6366 'channel_is_verified': True,
6367 }
6368 }],
6369 'params': {'extract_flat': True},
6370 }, {
6371 'url': 'https://www.youtube.com/@3blue1brown/about',
6372 'info_dict': {
6373 'id': '@3blue1brown',
6374 'tags': ['Mathematics'],
6375 'title': '3Blue1Brown',
6376 'channel_follower_count': int,
6377 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6378 'channel': '3Blue1Brown',
6379 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6380 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6381 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6382 'uploader_id': '@3blue1brown',
6383 'uploader': '3Blue1Brown',
6384 'channel_is_verified': True,
6385 },
6386 'playlist_count': 0,
6387 }, {
6388 # Podcasts tab, with rich entry playlistRenderers
6389 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6390 'info_dict': {
6391 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6392 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6393 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6394 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6395 'title': '99 Percent Invisible - Podcasts',
6396 'uploader': '99 Percent Invisible',
6397 'channel_follower_count': int,
6398 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6399 'tags': [],
6400 'channel': '99 Percent Invisible',
6401 'uploader_id': '@99percentinvisiblepodcast',
6402 },
6403 'playlist_count': 0,
6404 }, {
6405 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6406 'url': 'https://www.youtube.com/@AHimitsu/releases',
6407 'info_dict': {
6408 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6409 'channel': 'A Himitsu',
6410 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6411 'title': 'A Himitsu - Releases',
6412 'uploader_id': '@AHimitsu',
6413 'uploader': 'A Himitsu',
6414 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6415 'tags': 'count:12',
6416 'description': 'I make music',
6417 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6418 'channel_follower_count': int,
6419 'channel_is_verified': True,
6420 },
6421 'playlist_mincount': 10,
6422 }, {
6423 # Playlist with only shorts, shown as reel renderers
6424 # FIXME: future: YouTube currently doesn't give continuation for this,
6425 # may do in future.
6426 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6427 'info_dict': {
6428 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6429 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6430 'view_count': int,
6431 'uploader_id': '@BangyShorts',
6432 'description': '',
6433 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6434 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6435 'channel': 'Bangy Shorts',
6436 'uploader': 'Bangy Shorts',
6437 'tags': [],
6438 'availability': 'public',
6439 'modified_date': r're:\d{8}',
6440 'title': 'Uploads from Bangy Shorts',
6441 },
6442 'playlist_mincount': 100,
6443 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6444 }, {
6445 'note': 'Tags containing spaces',
6446 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6447 'playlist_count': 3,
6448 'info_dict': {
6449 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6450 'channel': 'Markiplier',
6451 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6452 'title': 'Markiplier',
6453 'channel_follower_count': int,
6454 'description': 'md5:0c010910558658824402809750dc5d97',
6455 'uploader_id': '@markiplier',
6456 'uploader_url': 'https://www.youtube.com/@markiplier',
6457 'uploader': 'Markiplier',
6458 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6459 'channel_is_verified': True,
6460 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6461 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6462 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6463 'mark fischbach'],
6464 },
6465 }]
6466
6467 @classmethod
6468 def suitable(cls, url):
6469 return False if YoutubeIE.suitable(url) else super().suitable(url)
6470
6471 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6472
6473 def _get_url_mobj(self, url):
6474 mobj = self._URL_RE.match(url).groupdict()
6475 mobj.update((k, '') for k, v in mobj.items() if v is None)
6476 return mobj
6477
6478 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6479 tab_name = (tab.get('title') or '').lower()
6480 tab_url = urljoin(base_url, traverse_obj(
6481 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6482
6483 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6484 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6485 if tab_id:
6486 return {
6487 'TAB_ID_SPONSORSHIPS': 'membership',
6488 }.get(tab_id, tab_id), tab_name
6489
6490 # Fallback to tab name if we cannot get the tab id.
6491 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6492 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6493 if tab_name:
6494 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6495 return {
6496 'home': 'featured',
6497 'live': 'streams',
6498 }.get(tab_name, tab_name), tab_name
6499
6500 def _has_tab(self, tabs, tab_id):
6501 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6502
6503 def _empty_playlist(self, item_id, data):
6504 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6505
6506 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6507 def _real_extract(self, url, smuggled_data):
6508 item_id = self._match_id(url)
6509 url = urllib.parse.urlunparse(
6510 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6511 compat_opts = self.get_param('compat_opts', [])
6512
6513 mobj = self._get_url_mobj(url)
6514 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6515 if is_channel and smuggled_data.get('is_music_url'):
6516 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6517 return self.url_result(
6518 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6519 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6520 mdata = self._extract_tab_endpoint(
6521 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6522 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6523 get_all=False, expected_type=str)
6524 if not murl:
6525 raise ExtractorError('Failed to resolve album to playlist')
6526 return self.url_result(murl, YoutubeTabIE)
6527 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6528 return self.url_result(
6529 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6530
6531 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6532 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6533 url = f'{pre}/videos{post}'
6534 if smuggled_data.get('is_music_url'):
6535 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6536
6537 # Handle both video/playlist URLs
6538 qs = parse_qs(url)
6539 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
6540 if not video_id and mobj['not_channel'].startswith('watch'):
6541 if not playlist_id:
6542 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6543 raise ExtractorError('A video URL was given without video ID', expected=True)
6544 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6545 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6546 return self.url_result(
6547 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6548
6549 if not self._yes_playlist(playlist_id, video_id):
6550 return self.url_result(
6551 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6552
6553 data, ytcfg = self._extract_data(url, display_id)
6554
6555 # YouTube may provide a non-standard redirect to the regional channel
6556 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6557 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6558 redirect_url = traverse_obj(
6559 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6560 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6561 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6562 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6563 return self.url_result(redirect_url, YoutubeTabIE)
6564
6565 tabs, extra_tabs = self._extract_tab_renderers(data), []
6566 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6567 selected_tab = self._extract_selected_tab(tabs)
6568 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6569 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6570
6571 # /about is no longer a tab
6572 if original_tab_id == 'about':
6573 return self._empty_playlist(item_id, data)
6574
6575 if not original_tab_id and selected_tab_name:
6576 self.to_screen('Downloading all uploads of the channel. '
6577 'To download only the videos in a specific tab, pass the tab\'s URL')
6578 if self._has_tab(tabs, 'streams'):
6579 extra_tabs.append(''.join((pre, '/streams', post)))
6580 if self._has_tab(tabs, 'shorts'):
6581 extra_tabs.append(''.join((pre, '/shorts', post)))
6582 # XXX: Members-only tab should also be extracted
6583
6584 if not extra_tabs and selected_tab_id != 'videos':
6585 # Channel does not have streams, shorts or videos tabs
6586 if item_id[:2] != 'UC':
6587 return self._empty_playlist(item_id, data)
6588
6589 # Topic channels don't have /videos. Use the equivalent playlist instead
6590 pl_id = f'UU{item_id[2:]}'
6591 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6592 try:
6593 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6594 except ExtractorError:
6595 return self._empty_playlist(item_id, data)
6596 else:
6597 item_id, url = pl_id, pl_url
6598 self.to_screen(
6599 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6600
6601 elif extra_tabs and selected_tab_id != 'videos':
6602 # When there are shorts/live tabs but not videos tab
6603 url, data = f'{pre}{post}', None
6604
6605 elif (original_tab_id or 'videos') != selected_tab_id:
6606 if original_tab_id == 'live':
6607 # Live tab should have redirected to the video
6608 # Except in the case the channel has an actual live tab
6609 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6610 raise UserNotLive(video_id=item_id)
6611 elif selected_tab_name:
6612 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6613
6614 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6615 url = f'{pre}{post}'
6616
6617 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6618 if 'no-youtube-unavailable-videos' not in compat_opts:
6619 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6620 self._extract_and_report_alerts(data, only_once=True)
6621
6622 tabs, entries = self._extract_tab_renderers(data), []
6623 if tabs:
6624 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6625 entries[0].update({
6626 'extractor_key': YoutubeTabIE.ie_key(),
6627 'extractor': YoutubeTabIE.IE_NAME,
6628 'webpage_url': url,
6629 })
6630 if self.get_param('playlist_items') == '0':
6631 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6632 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6633 entries.extend(map(self._real_extract, extra_tabs))
6634
6635 if len(entries) == 1:
6636 return entries[0]
6637 elif entries:
6638 metadata = self._extract_metadata_from_tabs(item_id, data)
6639 uploads_url = 'the Uploads (UU) playlist URL'
6640 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6641 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6642 self.to_screen(
6643 'Downloading as multiple playlists, separated by tabs. '
6644 f'To download as a single playlist instead, pass {uploads_url}')
6645 return self.playlist_result(entries, item_id, **metadata)
6646
6647 # Inline playlist
6648 playlist = traverse_obj(
6649 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6650 if playlist:
6651 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6652
6653 video_id = traverse_obj(
6654 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6655 if video_id:
6656 if tab != '/live': # live tab is expected to redirect to video
6657 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6658 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6659
6660 raise ExtractorError('Unable to recognize tab page')
6661
6662
6663 class YoutubePlaylistIE(InfoExtractor):
6664 IE_DESC = 'YouTube playlists'
6665 _VALID_URL = r'''(?x)(?:
6666 (?:https?://)?
6667 (?:\w+\.)?
6668 (?:
6669 (?:
6670 youtube(?:kids)?\.com|
6671 %(invidious)s
6672 )
6673 /.*?\?.*?\blist=
6674 )?
6675 (?P<id>%(playlist_id)s)
6676 )''' % {
6677 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6678 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6679 }
6680 IE_NAME = 'youtube:playlist'
6681 _TESTS = [{
6682 'note': 'issue #673',
6683 'url': 'PLBB231211A4F62143',
6684 'info_dict': {
6685 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6686 'id': 'PLBB231211A4F62143',
6687 'uploader': 'Wickman',
6688 'uploader_id': '@WickmanVT',
6689 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6690 'view_count': int,
6691 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6692 'modified_date': r're:\d{8}',
6693 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6694 'channel': 'Wickman',
6695 'tags': [],
6696 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6697 'availability': 'public',
6698 },
6699 'playlist_mincount': 29,
6700 }, {
6701 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6702 'info_dict': {
6703 'title': 'YDL_safe_search',
6704 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6705 },
6706 'playlist_count': 2,
6707 'skip': 'This playlist is private',
6708 }, {
6709 'note': 'embedded',
6710 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6711 'playlist_count': 4,
6712 'info_dict': {
6713 'title': 'JODA15',
6714 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6715 'uploader': 'milan',
6716 'uploader_id': '@milan5503',
6717 'description': '',
6718 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6719 'tags': [],
6720 'modified_date': '20140919',
6721 'view_count': int,
6722 'channel': 'milan',
6723 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6724 'uploader_url': 'https://www.youtube.com/@milan5503',
6725 'availability': 'public',
6726 },
6727 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6728 }, {
6729 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6730 'playlist_mincount': 455,
6731 'info_dict': {
6732 'title': '2018 Chinese New Singles (11/6 updated)',
6733 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6734 'uploader': 'LBK',
6735 'uploader_id': '@music_king',
6736 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6737 'channel': 'LBK',
6738 'view_count': int,
6739 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6740 'tags': [],
6741 'uploader_url': 'https://www.youtube.com/@music_king',
6742 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6743 'modified_date': r're:\d{8}',
6744 'availability': 'public',
6745 },
6746 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6747 }, {
6748 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6749 'only_matching': True,
6750 }, {
6751 # music album playlist
6752 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6753 'only_matching': True,
6754 }]
6755
6756 @classmethod
6757 def suitable(cls, url):
6758 if YoutubeTabIE.suitable(url):
6759 return False
6760 from ..utils import parse_qs
6761 qs = parse_qs(url)
6762 if qs.get('v', [None])[0]:
6763 return False
6764 return super().suitable(url)
6765
6766 def _real_extract(self, url):
6767 playlist_id = self._match_id(url)
6768 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6769 url = update_url_query(
6770 'https://www.youtube.com/playlist',
6771 parse_qs(url) or {'list': playlist_id})
6772 if is_music_url:
6773 url = smuggle_url(url, {'is_music_url': True})
6774 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6775
6776
6777 class YoutubeYtBeIE(InfoExtractor):
6778 IE_DESC = 'youtu.be'
6779 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6780 _TESTS = [{
6781 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6782 'info_dict': {
6783 'id': 'yeWKywCrFtk',
6784 'ext': 'mp4',
6785 'title': 'Small Scale Baler and Braiding Rugs',
6786 'uploader': 'Backus-Page House Museum',
6787 'uploader_id': '@backuspagemuseum',
6788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6789 'upload_date': '20161008',
6790 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6791 'categories': ['Nonprofits & Activism'],
6792 'tags': list,
6793 'like_count': int,
6794 'age_limit': 0,
6795 'playable_in_embed': True,
6796 'thumbnail': r're:^https?://.*\.webp',
6797 'channel': 'Backus-Page House Museum',
6798 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6799 'live_status': 'not_live',
6800 'view_count': int,
6801 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6802 'availability': 'public',
6803 'duration': 59,
6804 'comment_count': int,
6805 'channel_follower_count': int
6806 },
6807 'params': {
6808 'noplaylist': True,
6809 'skip_download': True,
6810 },
6811 }, {
6812 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6813 'only_matching': True,
6814 }]
6815
6816 def _real_extract(self, url):
6817 mobj = self._match_valid_url(url)
6818 video_id = mobj.group('id')
6819 playlist_id = mobj.group('playlist_id')
6820 return self.url_result(
6821 update_url_query('https://www.youtube.com/watch', {
6822 'v': video_id,
6823 'list': playlist_id,
6824 'feature': 'youtu.be',
6825 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6826
6827
6828 class YoutubeLivestreamEmbedIE(InfoExtractor):
6829 IE_DESC = 'YouTube livestream embeds'
6830 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6831 _TESTS = [{
6832 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6833 'only_matching': True,
6834 }]
6835
6836 def _real_extract(self, url):
6837 channel_id = self._match_id(url)
6838 return self.url_result(
6839 f'https://www.youtube.com/channel/{channel_id}/live',
6840 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6841
6842
6843 class YoutubeYtUserIE(InfoExtractor):
6844 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6845 IE_NAME = 'youtube:user'
6846 _VALID_URL = r'ytuser:(?P<id>.+)'
6847 _TESTS = [{
6848 'url': 'ytuser:phihag',
6849 'only_matching': True,
6850 }]
6851
6852 def _real_extract(self, url):
6853 user_id = self._match_id(url)
6854 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6855
6856
6857 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6858 IE_NAME = 'youtube:favorites'
6859 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6860 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6861 _LOGIN_REQUIRED = True
6862 _TESTS = [{
6863 'url': ':ytfav',
6864 'only_matching': True,
6865 }, {
6866 'url': ':ytfavorites',
6867 'only_matching': True,
6868 }]
6869
6870 def _real_extract(self, url):
6871 return self.url_result(
6872 'https://www.youtube.com/playlist?list=LL',
6873 ie=YoutubeTabIE.ie_key())
6874
6875
6876 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6877 IE_NAME = 'youtube:notif'
6878 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6879 _VALID_URL = r':ytnotif(?:ication)?s?'
6880 _LOGIN_REQUIRED = True
6881 _TESTS = [{
6882 'url': ':ytnotif',
6883 'only_matching': True,
6884 }, {
6885 'url': ':ytnotifications',
6886 'only_matching': True,
6887 }]
6888
6889 def _extract_notification_menu(self, response, continuation_list):
6890 notification_list = traverse_obj(
6891 response,
6892 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6893 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6894 expected_type=list) or []
6895 continuation_list[0] = None
6896 for item in notification_list:
6897 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6898 if entry:
6899 yield entry
6900 continuation = item.get('continuationItemRenderer')
6901 if continuation:
6902 continuation_list[0] = continuation
6903
6904 def _extract_notification_renderer(self, notification):
6905 video_id = traverse_obj(
6906 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6907 url = f'https://www.youtube.com/watch?v={video_id}'
6908 channel_id = None
6909 if not video_id:
6910 browse_ep = traverse_obj(
6911 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6912 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
6913 post_id = self._search_regex(
6914 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6915 'post id', default=None)
6916 if not channel_id or not post_id:
6917 return
6918 # The direct /post url redirects to this in the browser
6919 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6920
6921 channel = traverse_obj(
6922 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6923 expected_type=str)
6924 notification_title = self._get_text(notification, 'shortMessage')
6925 if notification_title:
6926 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6927 # TODO: handle recommended videos
6928 title = self._search_regex(
6929 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6930 'video title', default=None)
6931 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6932 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6933 else None)
6934 return {
6935 '_type': 'url',
6936 'url': url,
6937 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6938 'video_id': video_id,
6939 'title': title,
6940 'channel_id': channel_id,
6941 'channel': channel,
6942 'uploader': channel,
6943 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6944 'timestamp': timestamp,
6945 }
6946
6947 def _notification_menu_entries(self, ytcfg):
6948 continuation_list = [None]
6949 response = None
6950 for page in itertools.count(1):
6951 ctoken = traverse_obj(
6952 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6953 response = self._extract_response(
6954 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6955 ep='notification/get_notification_menu', check_get_keys='actions',
6956 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6957 yield from self._extract_notification_menu(response, continuation_list)
6958 if not continuation_list[0]:
6959 break
6960
6961 def _real_extract(self, url):
6962 display_id = 'notifications'
6963 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6964 self._report_playlist_authcheck(ytcfg)
6965 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6966
6967
6968 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6969 IE_DESC = 'YouTube search'
6970 IE_NAME = 'youtube:search'
6971 _SEARCH_KEY = 'ytsearch'
6972 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
6973 _TESTS = [{
6974 'url': 'ytsearch5:youtube-dl test video',
6975 'playlist_count': 5,
6976 'info_dict': {
6977 'id': 'youtube-dl test video',
6978 'title': 'youtube-dl test video',
6979 }
6980 }, {
6981 'note': 'Suicide/self-harm search warning',
6982 'url': 'ytsearch1:i hate myself and i wanna die',
6983 'playlist_count': 1,
6984 'info_dict': {
6985 'id': 'i hate myself and i wanna die',
6986 'title': 'i hate myself and i wanna die',
6987 }
6988 }]
6989
6990
6991 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6992 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6993 _SEARCH_KEY = 'ytsearchdate'
6994 IE_DESC = 'YouTube search, newest videos first'
6995 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
6996 _TESTS = [{
6997 'url': 'ytsearchdate5:youtube-dl test video',
6998 'playlist_count': 5,
6999 'info_dict': {
7000 'id': 'youtube-dl test video',
7001 'title': 'youtube-dl test video',
7002 }
7003 }]
7004
7005
7006 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7007 IE_DESC = 'YouTube search URLs with sorting and filter support'
7008 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7009 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7010 _TESTS = [{
7011 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7012 'playlist_mincount': 5,
7013 'info_dict': {
7014 'id': 'youtube-dl test video',
7015 'title': 'youtube-dl test video',
7016 }
7017 }, {
7018 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7019 'playlist_mincount': 5,
7020 'info_dict': {
7021 'id': 'python',
7022 'title': 'python',
7023 }
7024 }, {
7025 'url': 'https://www.youtube.com/results?search_query=%23cats',
7026 'playlist_mincount': 1,
7027 'info_dict': {
7028 'id': '#cats',
7029 'title': '#cats',
7030 # The test suite does not have support for nested playlists
7031 # 'entries': [{
7032 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7033 # 'title': '#cats',
7034 # }],
7035 },
7036 }, {
7037 # Channel results
7038 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7039 'info_dict': {
7040 'id': 'kurzgesagt',
7041 'title': 'kurzgesagt',
7042 },
7043 'playlist': [{
7044 'info_dict': {
7045 '_type': 'url',
7046 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7047 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7048 'ie_key': 'YoutubeTab',
7049 'channel': 'Kurzgesagt – In a Nutshell',
7050 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7051 'title': 'Kurzgesagt – In a Nutshell',
7052 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7053 # No longer available for search as it is set to the handle.
7054 # 'playlist_count': int,
7055 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7056 'thumbnails': list,
7057 'uploader_id': '@kurzgesagt',
7058 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7059 'uploader': 'Kurzgesagt – In a Nutshell',
7060 'channel_is_verified': True,
7061 'channel_follower_count': int,
7062 }
7063 }],
7064 'params': {'extract_flat': True, 'playlist_items': '1'},
7065 'playlist_mincount': 1,
7066 }, {
7067 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7068 'only_matching': True,
7069 }]
7070
7071 def _real_extract(self, url):
7072 qs = parse_qs(url)
7073 query = (qs.get('search_query') or qs.get('q'))[0]
7074 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7075
7076
7077 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7078 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7079 IE_NAME = 'youtube:music:search_url'
7080 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7081 _TESTS = [{
7082 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7083 'playlist_count': 16,
7084 'info_dict': {
7085 'id': 'royalty free music',
7086 'title': 'royalty free music',
7087 }
7088 }, {
7089 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7090 'playlist_mincount': 30,
7091 'info_dict': {
7092 'id': 'royalty free music - songs',
7093 'title': 'royalty free music - songs',
7094 },
7095 'params': {'extract_flat': 'in_playlist'}
7096 }, {
7097 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7098 'playlist_mincount': 30,
7099 'info_dict': {
7100 'id': 'royalty free music - community playlists',
7101 'title': 'royalty free music - community playlists',
7102 },
7103 'params': {'extract_flat': 'in_playlist'}
7104 }]
7105
7106 _SECTIONS = {
7107 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7108 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7109 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7110 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7111 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7112 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7113 }
7114
7115 def _real_extract(self, url):
7116 qs = parse_qs(url)
7117 query = (qs.get('search_query') or qs.get('q'))[0]
7118 params = qs.get('sp', (None,))[0]
7119 if params:
7120 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7121 else:
7122 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
7123 params = self._SECTIONS.get(section)
7124 if not params:
7125 section = None
7126 title = join_nonempty(query, section, delim=' - ')
7127 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7128
7129
7130 class YoutubeFeedsInfoExtractor(InfoExtractor):
7131 """
7132 Base class for feed extractors
7133 Subclasses must re-define the _FEED_NAME property.
7134 """
7135 _LOGIN_REQUIRED = True
7136 _FEED_NAME = 'feeds'
7137
7138 def _real_initialize(self):
7139 YoutubeBaseInfoExtractor._check_login_required(self)
7140
7141 @classproperty
7142 def IE_NAME(self):
7143 return f'youtube:{self._FEED_NAME}'
7144
7145 def _real_extract(self, url):
7146 return self.url_result(
7147 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7148
7149
7150 class YoutubeWatchLaterIE(InfoExtractor):
7151 IE_NAME = 'youtube:watchlater'
7152 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7153 _VALID_URL = r':ytwatchlater'
7154 _TESTS = [{
7155 'url': ':ytwatchlater',
7156 'only_matching': True,
7157 }]
7158
7159 def _real_extract(self, url):
7160 return self.url_result(
7161 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7162
7163
7164 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7165 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7166 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7167 _FEED_NAME = 'recommended'
7168 _LOGIN_REQUIRED = False
7169 _TESTS = [{
7170 'url': ':ytrec',
7171 'only_matching': True,
7172 }, {
7173 'url': ':ytrecommended',
7174 'only_matching': True,
7175 }, {
7176 'url': 'https://youtube.com',
7177 'only_matching': True,
7178 }]
7179
7180
7181 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7182 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7183 _VALID_URL = r':ytsub(?:scription)?s?'
7184 _FEED_NAME = 'subscriptions'
7185 _TESTS = [{
7186 'url': ':ytsubs',
7187 'only_matching': True,
7188 }, {
7189 'url': ':ytsubscriptions',
7190 'only_matching': True,
7191 }]
7192
7193
7194 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7195 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7196 _VALID_URL = r':ythis(?:tory)?'
7197 _FEED_NAME = 'history'
7198 _TESTS = [{
7199 'url': ':ythistory',
7200 'only_matching': True,
7201 }]
7202
7203
7204 class YoutubeShortsAudioPivotIE(InfoExtractor):
7205 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7206 IE_NAME = 'youtube:shorts:pivot:audio'
7207 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7208 _TESTS = [{
7209 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7210 'only_matching': True,
7211 }]
7212
7213 @staticmethod
7214 def _generate_audio_pivot_params(video_id):
7215 """
7216 Generates sfv_audio_pivot browse params for this video id
7217 """
7218 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7219 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7220
7221 def _real_extract(self, url):
7222 video_id = self._match_id(url)
7223 return self.url_result(
7224 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7225 ie=YoutubeTabIE)
7226
7227
7228 class YoutubeTruncatedURLIE(InfoExtractor):
7229 IE_NAME = 'youtube:truncated_url'
7230 IE_DESC = False # Do not list
7231 _VALID_URL = r'''(?x)
7232 (?:https?://)?
7233 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7234 (?:watch\?(?:
7235 feature=[a-z_]+|
7236 annotation_id=annotation_[^&]+|
7237 x-yt-cl=[0-9]+|
7238 hl=[^&]*|
7239 t=[0-9]+
7240 )?
7241 |
7242 attribution_link\?a=[^&]+
7243 )
7244 $
7245 '''
7246
7247 _TESTS = [{
7248 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7249 'only_matching': True,
7250 }, {
7251 'url': 'https://www.youtube.com/watch?',
7252 'only_matching': True,
7253 }, {
7254 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7255 'only_matching': True,
7256 }, {
7257 'url': 'https://www.youtube.com/watch?feature=foo',
7258 'only_matching': True,
7259 }, {
7260 'url': 'https://www.youtube.com/watch?hl=en-GB',
7261 'only_matching': True,
7262 }, {
7263 'url': 'https://www.youtube.com/watch?t=2372',
7264 'only_matching': True,
7265 }]
7266
7267 def _real_extract(self, url):
7268 raise ExtractorError(
7269 'Did you forget to quote the URL? Remember that & is a meta '
7270 'character in most shells, so you want to put the URL in quotes, '
7271 'like youtube-dl '
7272 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7273 ' or simply youtube-dl BaW_jenozKc .',
7274 expected=True)
7275
7276
7277 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7278 IE_NAME = 'youtube:clip'
7279 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7280 _TESTS = [{
7281 # FIXME: Other metadata should be extracted from the clip, not from the base video
7282 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7283 'info_dict': {
7284 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7285 'ext': 'mp4',
7286 'section_start': 29.0,
7287 'section_end': 39.7,
7288 'duration': 10.7,
7289 'age_limit': 0,
7290 'availability': 'public',
7291 'categories': ['Gaming'],
7292 'channel': 'Scott The Woz',
7293 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7294 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7295 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7296 'like_count': int,
7297 'playable_in_embed': True,
7298 'tags': 'count:17',
7299 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7300 'title': 'Mobile Games on Console - Scott The Woz',
7301 'upload_date': '20210920',
7302 'uploader': 'Scott The Woz',
7303 'uploader_id': '@ScottTheWoz',
7304 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7305 'view_count': int,
7306 'live_status': 'not_live',
7307 'channel_follower_count': int,
7308 'chapters': 'count:20',
7309 'comment_count': int,
7310 'heatmap': 'count:100',
7311 }
7312 }]
7313
7314 def _real_extract(self, url):
7315 clip_id = self._match_id(url)
7316 _, data = self._extract_webpage(url, clip_id)
7317
7318 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7319 if not video_id:
7320 raise ExtractorError('Unable to find video ID')
7321
7322 clip_data = traverse_obj(data, (
7323 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7324 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7325 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7326 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7327
7328 return {
7329 '_type': 'url_transparent',
7330 'url': f'https://www.youtube.com/watch?v={video_id}',
7331 'ie_key': YoutubeIE.ie_key(),
7332 'id': clip_id,
7333 'section_start': int(clip_data['startTimeMs']) / 1000,
7334 'section_end': int(clip_data['endTimeMs']) / 1000,
7335 }
7336
7337
7338 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7339 IE_NAME = 'youtube:consent'
7340 IE_DESC = False # Do not list
7341 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7342 _TESTS = [{
7343 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7344 'info_dict': {
7345 'id': 'qVv6vCqciTM',
7346 'ext': 'mp4',
7347 'age_limit': 0,
7348 'uploader_id': '@sana_natori',
7349 'comment_count': int,
7350 'chapters': 'count:13',
7351 'upload_date': '20221223',
7352 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7353 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7354 'uploader_url': 'https://www.youtube.com/@sana_natori',
7355 'like_count': int,
7356 'release_date': '20221223',
7357 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7358 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7359 'view_count': int,
7360 'playable_in_embed': True,
7361 'duration': 4438,
7362 'availability': 'public',
7363 'channel_follower_count': int,
7364 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7365 'categories': ['Entertainment'],
7366 'live_status': 'was_live',
7367 'release_timestamp': 1671793345,
7368 'channel': 'さなちゃんねる',
7369 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7370 'uploader': 'さなちゃんねる',
7371 'channel_is_verified': True,
7372 'heatmap': 'count:100',
7373 },
7374 'add_ie': ['Youtube'],
7375 'params': {'skip_download': 'Youtube'},
7376 }]
7377
7378 def _real_extract(self, url):
7379 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7380 if not redirect_url:
7381 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7382 return self.url_result(redirect_url)
7383
7384
7385 class YoutubeTruncatedIDIE(InfoExtractor):
7386 IE_NAME = 'youtube:truncated_id'
7387 IE_DESC = False # Do not list
7388 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7389
7390 _TESTS = [{
7391 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7392 'only_matching': True,
7393 }]
7394
7395 def _real_extract(self, url):
7396 video_id = self._match_id(url)
7397 raise ExtractorError(
7398 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7399 expected=True)