]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[compat, networking] Deprecate old functions (#2861)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime
6 import enum
7 import hashlib
8 import itertools
9 import json
10 import math
11 import os.path
12 import random
13 import re
14 import sys
15 import threading
16 import time
17 import traceback
18 import urllib.parse
19
20 from .common import InfoExtractor, SearchInfoExtractor
21 from .openload import PhantomJSwrapper
22 from ..compat import functools
23 from ..jsinterp import JSInterpreter
24 from ..networking.exceptions import HTTPError, network_exceptions
25 from ..utils import (
26 NO_DEFAULT,
27 ExtractorError,
28 LazyList,
29 UserNotLive,
30 bug_reports_message,
31 classproperty,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 filter_dict,
36 float_or_none,
37 format_field,
38 get_first,
39 int_or_none,
40 is_html,
41 join_nonempty,
42 js_to_json,
43 mimetype2ext,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_start,
52 smuggle_url,
53 str_or_none,
54 str_to_int,
55 strftime_or_none,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unified_timestamp,
61 unsmuggle_url,
62 update_url_query,
63 url_or_none,
64 urljoin,
65 variadic,
66 )
67
68 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
69 # any clients starting with _ cannot be explicitly requested by the user
70 INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
76 'clientVersion': '2.20220801.00.00',
77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
86 'clientVersion': '1.20220731.00.00',
87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
97 'clientVersion': '1.20220727.01.00',
98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
102 'web_creator': {
103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
107 'clientVersion': '1.20220726.00.00',
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
112 'android': {
113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
123 'REQUIRE_JS_PLAYER': False
124 },
125 'android_embedded': {
126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
133 },
134 },
135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
137 },
138 'android_music': {
139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
143 'clientVersion': '5.16.51',
144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
149 'REQUIRE_JS_PLAYER': False
150 },
151 'android_creator': {
152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '17.33.2',
172 'deviceModel': 'iPhone14,3',
173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
174 }
175 },
176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
178 },
179 'ios_embedded': {
180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
183 'clientVersion': '17.33.2',
184 'deviceModel': 'iPhone14,3',
185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
186 },
187 },
188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
190 },
191 'ios_music': {
192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
199 },
200 },
201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
203 },
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
211 },
212 },
213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
215 },
216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
218 'mweb': {
219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
223 'clientVersion': '2.20220801.00.00',
224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
240 }
241
242
243 def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
251 def short_client_name(client_name):
252 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
253 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
254
255
256 def build_innertube_clients():
257 THIRD_PARTY = {
258 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
259 }
260 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
261 priority = qualities(BASE_CLIENTS[::-1])
262
263 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
264 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
265 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
266 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
267 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
268
269 _, base_client, variant = _split_innertube_client(client)
270 ytcfg['priority'] = 10 * priority(base_client)
271
272 if not variant:
273 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
274 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
275 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
276 embedscreen['priority'] -= 3
277 elif variant == 'embedded':
278 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
279 ytcfg['priority'] -= 2
280 else:
281 ytcfg['priority'] -= 3
282
283
284 build_innertube_clients()
285
286
287 class BadgeType(enum.Enum):
288 AVAILABILITY_UNLISTED = enum.auto()
289 AVAILABILITY_PRIVATE = enum.auto()
290 AVAILABILITY_PUBLIC = enum.auto()
291 AVAILABILITY_PREMIUM = enum.auto()
292 AVAILABILITY_SUBSCRIPTION = enum.auto()
293 LIVE_NOW = enum.auto()
294 VERIFIED = enum.auto()
295
296
297 class YoutubeBaseInfoExtractor(InfoExtractor):
298 """Provide base functions for Youtube extractors"""
299
300 _RESERVED_NAMES = (
301 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
302 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
303 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
304 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
305
306 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
307
308 # _NETRC_MACHINE = 'youtube'
309
310 # If True it will raise an error if no login info is provided
311 _LOGIN_REQUIRED = False
312
313 _INVIDIOUS_SITES = (
314 # invidious-redirect websites
315 r'(?:www\.)?redirect\.invidious\.io',
316 r'(?:(?:www|dev)\.)?invidio\.us',
317 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
318 r'(?:www\.)?invidious\.pussthecat\.org',
319 r'(?:www\.)?invidious\.zee\.li',
320 r'(?:www\.)?invidious\.ethibox\.fr',
321 r'(?:www\.)?iv\.ggtyler\.dev',
322 r'(?:www\.)?inv\.vern\.i2p',
323 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
324 r'(?:www\.)?inv\.riverside\.rocks',
325 r'(?:www\.)?invidious\.silur\.me',
326 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
327 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
328 r'(?:www\.)?invidious\.slipfox\.xyz',
329 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
330 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
331 r'(?:www\.)?invidious\.tiekoetter\.com',
332 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
333 r'(?:www\.)?invidious\.nerdvpn\.de',
334 r'(?:www\.)?invidious\.weblibre\.org',
335 r'(?:www\.)?inv\.odyssey346\.dev',
336 r'(?:www\.)?invidious\.dhusch\.de',
337 r'(?:www\.)?iv\.melmac\.space',
338 r'(?:www\.)?watch\.thekitty\.zone',
339 r'(?:www\.)?invidious\.privacydev\.net',
340 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
341 r'(?:www\.)?invidious\.drivet\.xyz',
342 r'(?:www\.)?vid\.priv\.au',
343 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
344 r'(?:www\.)?inv\.vern\.cc',
345 r'(?:www\.)?invidious\.esmailelbob\.xyz',
346 r'(?:www\.)?invidious\.sethforprivacy\.com',
347 r'(?:www\.)?yt\.oelrichsgarcia\.de',
348 r'(?:www\.)?yt\.artemislena\.eu',
349 r'(?:www\.)?invidious\.flokinet\.to',
350 r'(?:www\.)?invidious\.baczek\.me',
351 r'(?:www\.)?y\.com\.sb',
352 r'(?:www\.)?invidious\.epicsite\.xyz',
353 r'(?:www\.)?invidious\.lidarshield\.cloud',
354 r'(?:www\.)?yt\.funami\.tech',
355 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
356 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
357 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
358 # youtube-dl invidious instances list
359 r'(?:(?:www|no)\.)?invidiou\.sh',
360 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
361 r'(?:www\.)?invidious\.kabi\.tk',
362 r'(?:www\.)?invidious\.mastodon\.host',
363 r'(?:www\.)?invidious\.zapashcanon\.fr',
364 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
365 r'(?:www\.)?invidious\.tinfoil-hat\.net',
366 r'(?:www\.)?invidious\.himiko\.cloud',
367 r'(?:www\.)?invidious\.reallyancient\.tech',
368 r'(?:www\.)?invidious\.tube',
369 r'(?:www\.)?invidiou\.site',
370 r'(?:www\.)?invidious\.site',
371 r'(?:www\.)?invidious\.xyz',
372 r'(?:www\.)?invidious\.nixnet\.xyz',
373 r'(?:www\.)?invidious\.048596\.xyz',
374 r'(?:www\.)?invidious\.drycat\.fr',
375 r'(?:www\.)?inv\.skyn3t\.in',
376 r'(?:www\.)?tube\.poal\.co',
377 r'(?:www\.)?tube\.connect\.cafe',
378 r'(?:www\.)?vid\.wxzm\.sx',
379 r'(?:www\.)?vid\.mint\.lgbt',
380 r'(?:www\.)?vid\.puffyan\.us',
381 r'(?:www\.)?yewtu\.be',
382 r'(?:www\.)?yt\.elukerio\.org',
383 r'(?:www\.)?yt\.lelux\.fi',
384 r'(?:www\.)?invidious\.ggc-project\.de',
385 r'(?:www\.)?yt\.maisputain\.ovh',
386 r'(?:www\.)?ytprivate\.com',
387 r'(?:www\.)?invidious\.13ad\.de',
388 r'(?:www\.)?invidious\.toot\.koeln',
389 r'(?:www\.)?invidious\.fdn\.fr',
390 r'(?:www\.)?watch\.nettohikari\.com',
391 r'(?:www\.)?invidious\.namazso\.eu',
392 r'(?:www\.)?invidious\.silkky\.cloud',
393 r'(?:www\.)?invidious\.exonip\.de',
394 r'(?:www\.)?invidious\.riverside\.rocks',
395 r'(?:www\.)?invidious\.blamefran\.net',
396 r'(?:www\.)?invidious\.moomoo\.de',
397 r'(?:www\.)?ytb\.trom\.tf',
398 r'(?:www\.)?yt\.cyberhost\.uk',
399 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
400 r'(?:www\.)?qklhadlycap4cnod\.onion',
401 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
402 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
403 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
404 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
405 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
406 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
407 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
408 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
409 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
410 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
411 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
412 r'(?:www\.)?piped\.kavin\.rocks',
413 r'(?:www\.)?piped\.tokhmi\.xyz',
414 r'(?:www\.)?piped\.syncpundit\.io',
415 r'(?:www\.)?piped\.mha\.fi',
416 r'(?:www\.)?watch\.whatever\.social',
417 r'(?:www\.)?piped\.garudalinux\.org',
418 r'(?:www\.)?piped\.rivo\.lol',
419 r'(?:www\.)?piped-libre\.kavin\.rocks',
420 r'(?:www\.)?yt\.jae\.fi',
421 r'(?:www\.)?piped\.mint\.lgbt',
422 r'(?:www\.)?il\.ax',
423 r'(?:www\.)?piped\.esmailelbob\.xyz',
424 r'(?:www\.)?piped\.projectsegfau\.lt',
425 r'(?:www\.)?piped\.privacydev\.net',
426 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
427 r'(?:www\.)?piped\.smnz\.de',
428 r'(?:www\.)?piped\.adminforge\.de',
429 r'(?:www\.)?watch\.whatevertinfoil\.de',
430 r'(?:www\.)?piped\.qdi\.fi',
431 r'(?:www\.)?piped\.video',
432 r'(?:www\.)?piped\.aeong\.one',
433 r'(?:www\.)?piped\.moomoo\.me',
434 r'(?:www\.)?piped\.chauvet\.pro',
435 r'(?:www\.)?watch\.leptons\.xyz',
436 r'(?:www\.)?pd\.vern\.cc',
437 r'(?:www\.)?piped\.hostux\.net',
438 r'(?:www\.)?piped\.lunar\.icu',
439 # Hyperpipe instances from https://hyperpipe.codeberg.page/
440 r'(?:www\.)?hyperpipe\.surge\.sh',
441 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
442 r'(?:www\.)?listen\.whatever\.social',
443 r'(?:www\.)?music\.adminforge\.de',
444 )
445
446 # extracted from account/account_menu ep
447 # XXX: These are the supported YouTube UI and API languages,
448 # which is slightly different from languages supported for translation in YouTube studio
449 _SUPPORTED_LANG_CODES = [
450 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
451 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
452 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
453 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
454 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
455 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
456 ]
457
458 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
459
460 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
461 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
462
463 def ucid_or_none(self, ucid):
464 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
465
466 def handle_or_none(self, handle):
467 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
468
469 def handle_from_url(self, url):
470 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
471 url, 'channel handle', default=None)
472
473 def ucid_from_url(self, url):
474 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
475 url, 'channel id', default=None)
476
477 @functools.cached_property
478 def _preferred_lang(self):
479 """
480 Returns a language code supported by YouTube for the user preferred language.
481 Returns None if no preferred language set.
482 """
483 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
484 if not preferred_lang:
485 return
486 if preferred_lang not in self._SUPPORTED_LANG_CODES:
487 raise ExtractorError(
488 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
489 expected=True)
490 elif preferred_lang != 'en':
491 self.report_warning(
492 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
493 return preferred_lang
494
495 def _initialize_consent(self):
496 cookies = self._get_cookies('https://www.youtube.com/')
497 if cookies.get('__Secure-3PSID'):
498 return
499 consent_id = None
500 consent = cookies.get('CONSENT')
501 if consent:
502 if 'YES' in consent.value:
503 return
504 consent_id = self._search_regex(
505 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
506 if not consent_id:
507 consent_id = random.randint(100, 999)
508 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
509
510 def _initialize_pref(self):
511 cookies = self._get_cookies('https://www.youtube.com/')
512 pref_cookie = cookies.get('PREF')
513 pref = {}
514 if pref_cookie:
515 try:
516 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
517 except ValueError:
518 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
519 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
520 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
521
522 def _real_initialize(self):
523 self._initialize_pref()
524 self._initialize_consent()
525 self._check_login_required()
526
527 def _check_login_required(self):
528 if self._LOGIN_REQUIRED and not self._cookies_passed:
529 self.raise_login_required('Login details are needed to download this content', method='cookies')
530
531 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
532 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
533
534 def _get_default_ytcfg(self, client='web'):
535 return copy.deepcopy(INNERTUBE_CLIENTS[client])
536
537 def _get_innertube_host(self, client='web'):
538 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
539
540 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
541 # try_get but with fallback to default ytcfg client values when present
542 _func = lambda y: try_get(y, getter, expected_type)
543 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
544
545 def _extract_client_name(self, ytcfg, default_client='web'):
546 return self._ytcfg_get_safe(
547 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
548 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
549
550 def _extract_client_version(self, ytcfg, default_client='web'):
551 return self._ytcfg_get_safe(
552 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
553 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
554
555 def _select_api_hostname(self, req_api_hostname, default_client=None):
556 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
557 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
558
559 def _extract_api_key(self, ytcfg=None, default_client='web'):
560 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
561
562 def _extract_context(self, ytcfg=None, default_client='web'):
563 context = get_first(
564 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
565 # Enforce language and tz for extraction
566 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
567 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
568 return context
569
570 _SAPISID = None
571
572 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
573 time_now = round(time.time())
574 if self._SAPISID is None:
575 yt_cookies = self._get_cookies('https://www.youtube.com')
576 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
577 # See: https://github.com/yt-dlp/yt-dlp/issues/393
578 sapisid_cookie = dict_get(
579 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
580 if sapisid_cookie and sapisid_cookie.value:
581 self._SAPISID = sapisid_cookie.value
582 self.write_debug('Extracted SAPISID cookie')
583 # SAPISID cookie is required if not already present
584 if not yt_cookies.get('SAPISID'):
585 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
586 self._set_cookie(
587 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
588 else:
589 self._SAPISID = False
590 if not self._SAPISID:
591 return None
592 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
593 sapisidhash = hashlib.sha1(
594 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
595 return f'SAPISIDHASH {time_now}_{sapisidhash}'
596
597 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
598 note='Downloading API JSON', errnote='Unable to download API page',
599 context=None, api_key=None, api_hostname=None, default_client='web'):
600
601 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
602 data.update(query)
603 real_headers = self.generate_api_headers(default_client=default_client)
604 real_headers.update({'content-type': 'application/json'})
605 if headers:
606 real_headers.update(headers)
607 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
608 or api_key or self._extract_api_key(default_client=default_client))
609 return self._download_json(
610 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
611 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
612 data=json.dumps(data).encode('utf8'), headers=real_headers,
613 query={'key': api_key, 'prettyPrint': 'false'})
614
615 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
616 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
617
618 @staticmethod
619 def _extract_session_index(*data):
620 """
621 Index of current account in account list.
622 See: https://github.com/yt-dlp/yt-dlp/pull/519
623 """
624 for ytcfg in data:
625 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
626 if session_index is not None:
627 return session_index
628
629 # Deprecated?
630 def _extract_identity_token(self, ytcfg=None, webpage=None):
631 if ytcfg:
632 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
633 if token:
634 return token
635 if webpage:
636 return self._search_regex(
637 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
638 'identity token', default=None, fatal=False)
639
640 @staticmethod
641 def _extract_account_syncid(*args):
642 """
643 Extract syncId required to download private playlists of secondary channels
644 @params response and/or ytcfg
645 """
646 for data in args:
647 # ytcfg includes channel_syncid if on secondary channel
648 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
649 if delegated_sid:
650 return delegated_sid
651 sync_ids = (try_get(
652 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
653 lambda x: x['DATASYNC_ID']), str) or '').split('||')
654 if len(sync_ids) >= 2 and sync_ids[1]:
655 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
656 # and just "user_syncid||" for primary channel. We only want the channel_syncid
657 return sync_ids[0]
658
659 @staticmethod
660 def _extract_visitor_data(*args):
661 """
662 Extracts visitorData from an API response or ytcfg
663 Appears to be used to track session state
664 """
665 return get_first(
666 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
667 expected_type=str)
668
669 @functools.cached_property
670 def is_authenticated(self):
671 return bool(self._generate_sapisidhash_header())
672
673 def extract_ytcfg(self, video_id, webpage):
674 if not webpage:
675 return {}
676 return self._parse_json(
677 self._search_regex(
678 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
679 default='{}'), video_id, fatal=False) or {}
680
681 def generate_api_headers(
682 self, *, ytcfg=None, account_syncid=None, session_index=None,
683 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
684
685 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
686 headers = {
687 'X-YouTube-Client-Name': str(
688 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
689 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
690 'Origin': origin,
691 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
692 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
693 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
694 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
695 }
696 if session_index is None:
697 session_index = self._extract_session_index(ytcfg)
698 if account_syncid or session_index is not None:
699 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
700
701 auth = self._generate_sapisidhash_header(origin)
702 if auth is not None:
703 headers['Authorization'] = auth
704 headers['X-Origin'] = origin
705 return filter_dict(headers)
706
707 def _download_ytcfg(self, client, video_id):
708 url = {
709 'web': 'https://www.youtube.com',
710 'web_music': 'https://music.youtube.com',
711 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
712 }.get(client)
713 if not url:
714 return {}
715 webpage = self._download_webpage(
716 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
717 return self.extract_ytcfg(video_id, webpage) or {}
718
719 @staticmethod
720 def _build_api_continuation_query(continuation, ctp=None):
721 query = {
722 'continuation': continuation
723 }
724 # TODO: Inconsistency with clickTrackingParams.
725 # Currently we have a fixed ctp contained within context (from ytcfg)
726 # and a ctp in root query for continuation.
727 if ctp:
728 query['clickTracking'] = {'clickTrackingParams': ctp}
729 return query
730
731 @classmethod
732 def _extract_next_continuation_data(cls, renderer):
733 next_continuation = try_get(
734 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
735 lambda x: x['continuation']['reloadContinuationData']), dict)
736 if not next_continuation:
737 return
738 continuation = next_continuation.get('continuation')
739 if not continuation:
740 return
741 ctp = next_continuation.get('clickTrackingParams')
742 return cls._build_api_continuation_query(continuation, ctp)
743
744 @classmethod
745 def _extract_continuation_ep_data(cls, continuation_ep: dict):
746 if isinstance(continuation_ep, dict):
747 continuation = try_get(
748 continuation_ep, lambda x: x['continuationCommand']['token'], str)
749 if not continuation:
750 return
751 ctp = continuation_ep.get('clickTrackingParams')
752 return cls._build_api_continuation_query(continuation, ctp)
753
754 @classmethod
755 def _extract_continuation(cls, renderer):
756 next_continuation = cls._extract_next_continuation_data(renderer)
757 if next_continuation:
758 return next_continuation
759
760 return traverse_obj(renderer, (
761 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
762 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
763 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
764
765 @classmethod
766 def _extract_alerts(cls, data):
767 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
768 if not isinstance(alert_dict, dict):
769 continue
770 for alert in alert_dict.values():
771 alert_type = alert.get('type')
772 if not alert_type:
773 continue
774 message = cls._get_text(alert, 'text')
775 if message:
776 yield alert_type, message
777
778 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
779 errors, warnings = [], []
780 for alert_type, alert_message in alerts:
781 if alert_type.lower() == 'error' and fatal:
782 errors.append([alert_type, alert_message])
783 elif alert_message not in self._IGNORED_WARNINGS:
784 warnings.append([alert_type, alert_message])
785
786 for alert_type, alert_message in (warnings + errors[:-1]):
787 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
788 if errors:
789 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
790
791 def _extract_and_report_alerts(self, data, *args, **kwargs):
792 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
793
794 def _extract_badges(self, badge_list: list):
795 """
796 Extract known BadgeType's from a list of badge renderers.
797 @returns [{'type': BadgeType}]
798 """
799 icon_type_map = {
800 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
801 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
802 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
803 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
804 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
805 'CHECK': BadgeType.VERIFIED,
806 }
807
808 badge_style_map = {
809 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
810 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
811 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
812 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
813 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
814 }
815
816 label_map = {
817 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
818 'private': BadgeType.AVAILABILITY_PRIVATE,
819 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
820 'live': BadgeType.LIVE_NOW,
821 'premium': BadgeType.AVAILABILITY_PREMIUM,
822 'verified': BadgeType.VERIFIED,
823 'official artist channel': BadgeType.VERIFIED,
824 }
825
826 badges = []
827 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
828 badge_type = (
829 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
830 or badge_style_map.get(traverse_obj(badge, 'style'))
831 )
832 if badge_type:
833 badges.append({'type': badge_type})
834 continue
835
836 # fallback, won't work in some languages
837 label = traverse_obj(
838 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
839 for match, label_badge_type in label_map.items():
840 if match in label.lower():
841 badges.append({'type': label_badge_type})
842 break
843
844 return badges
845
846 @staticmethod
847 def _has_badge(badges, badge_type):
848 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
849
850 @staticmethod
851 def _get_text(data, *path_list, max_runs=None):
852 for path in path_list or [None]:
853 if path is None:
854 obj = [data]
855 else:
856 obj = traverse_obj(data, path, default=[])
857 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
858 obj = [obj]
859 for item in obj:
860 text = try_get(item, lambda x: x['simpleText'], str)
861 if text:
862 return text
863 runs = try_get(item, lambda x: x['runs'], list) or []
864 if not runs and isinstance(item, list):
865 runs = item
866
867 runs = runs[:min(len(runs), max_runs or len(runs))]
868 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
869 if text:
870 return text
871
872 def _get_count(self, data, *path_list):
873 count_text = self._get_text(data, *path_list) or ''
874 count = parse_count(count_text)
875 if count is None:
876 count = str_to_int(
877 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
878 return count
879
880 @staticmethod
881 def _extract_thumbnails(data, *path_list):
882 """
883 Extract thumbnails from thumbnails dict
884 @param path_list: path list to level that contains 'thumbnails' key
885 """
886 thumbnails = []
887 for path in path_list or [()]:
888 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
889 thumbnail_url = url_or_none(thumbnail.get('url'))
890 if not thumbnail_url:
891 continue
892 # Sometimes youtube gives a wrong thumbnail URL. See:
893 # https://github.com/yt-dlp/yt-dlp/issues/233
894 # https://github.com/ytdl-org/youtube-dl/issues/28023
895 if 'maxresdefault' in thumbnail_url:
896 thumbnail_url = thumbnail_url.split('?')[0]
897 thumbnails.append({
898 'url': thumbnail_url,
899 'height': int_or_none(thumbnail.get('height')),
900 'width': int_or_none(thumbnail.get('width')),
901 })
902 return thumbnails
903
904 @staticmethod
905 def extract_relative_time(relative_time_text):
906 """
907 Extracts a relative time from string and converts to dt object
908 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
909 """
910
911 # XXX: this could be moved to a general function in utils.py
912 # The relative time text strings are roughly the same as what
913 # Javascript's Intl.RelativeTimeFormat function generates.
914 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
915 mobj = re.search(
916 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
917 relative_time_text)
918 if mobj:
919 start = mobj.group('start')
920 if start:
921 return datetime_from_str(start)
922 try:
923 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
924 except ValueError:
925 return None
926
927 def _parse_time_text(self, text):
928 if not text:
929 return
930 dt = self.extract_relative_time(text)
931 timestamp = None
932 if isinstance(dt, datetime.datetime):
933 timestamp = calendar.timegm(dt.timetuple())
934
935 if timestamp is None:
936 timestamp = (
937 unified_timestamp(text) or unified_timestamp(
938 self._search_regex(
939 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
940 text.lower(), 'time text', default=None)))
941
942 if text and timestamp is None and self._preferred_lang in (None, 'en'):
943 self.report_warning(
944 f'Cannot parse localized time text "{text}"', only_once=True)
945 return timestamp
946
947 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
948 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
949 default_client='web'):
950 for retry in self.RetryManager():
951 try:
952 response = self._call_api(
953 ep=ep, fatal=True, headers=headers,
954 video_id=item_id, query=query, note=note,
955 context=self._extract_context(ytcfg, default_client),
956 api_key=self._extract_api_key(ytcfg, default_client),
957 api_hostname=api_hostname, default_client=default_client)
958 except ExtractorError as e:
959 if not isinstance(e.cause, network_exceptions):
960 return self._error_or_warning(e, fatal=fatal)
961 elif not isinstance(e.cause, HTTPError):
962 retry.error = e
963 continue
964
965 first_bytes = e.cause.response.read(512)
966 if not is_html(first_bytes):
967 yt_error = try_get(
968 self._parse_json(
969 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
970 lambda x: x['error']['message'], str)
971 if yt_error:
972 self._report_alerts([('ERROR', yt_error)], fatal=False)
973 # Downloading page may result in intermittent 5xx HTTP error
974 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
975 # We also want to catch all other network exceptions since errors in later pages can be troublesome
976 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
977 if e.cause.status not in (403, 429):
978 retry.error = e
979 continue
980 return self._error_or_warning(e, fatal=fatal)
981
982 try:
983 self._extract_and_report_alerts(response, only_once=True)
984 except ExtractorError as e:
985 # YouTube servers may return errors we want to retry on in a 200 OK response
986 # See: https://github.com/yt-dlp/yt-dlp/issues/839
987 if 'unknown error' in e.msg.lower():
988 retry.error = e
989 continue
990 return self._error_or_warning(e, fatal=fatal)
991 # Youtube sometimes sends incomplete data
992 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
993 if not traverse_obj(response, *variadic(check_get_keys)):
994 retry.error = ExtractorError('Incomplete data received', expected=True)
995 continue
996
997 return response
998
999 @staticmethod
1000 def is_music_url(url):
1001 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1002
1003 def _extract_video(self, renderer):
1004 video_id = renderer.get('videoId')
1005
1006 reel_header_renderer = traverse_obj(renderer, (
1007 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1008 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1009
1010 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1011 description = self._get_text(renderer, 'descriptionSnippet')
1012
1013 duration = int_or_none(renderer.get('lengthSeconds'))
1014 if duration is None:
1015 duration = parse_duration(self._get_text(
1016 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1017 if duration is None:
1018 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1019 duration = parse_duration(self._search_regex(
1020 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1021 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1022 video_id, default=None, group='duration'))
1023
1024 channel_id = traverse_obj(
1025 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1026 expected_type=str, get_all=False)
1027 if not channel_id:
1028 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1029
1030 channel_id = self.ucid_or_none(channel_id)
1031
1032 overlay_style = traverse_obj(
1033 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1034 get_all=False, expected_type=str)
1035 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1036 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1037 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1038 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1039 expected_type=str)) or ''
1040 url = f'https://www.youtube.com/watch?v={video_id}'
1041 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1042 url = f'https://www.youtube.com/shorts/{video_id}'
1043
1044 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1045 or self._get_text(reel_header_renderer, 'timestampText') or '')
1046 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1047
1048 live_status = (
1049 'is_upcoming' if scheduled_timestamp is not None
1050 else 'was_live' if 'streamed' in time_text.lower()
1051 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1052 else None)
1053
1054 # videoInfo is a string like '50K views • 10 years ago'.
1055 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1056 view_count = (0 if 'no views' in view_count_text.lower()
1057 else self._get_count({'simpleText': view_count_text}))
1058 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1059
1060 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1061 or self._get_text(reel_header_renderer, 'channelTitleText'))
1062
1063 channel_handle = traverse_obj(renderer, (
1064 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1065 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1066 expected_type=self.handle_from_url, get_all=False)
1067 return {
1068 '_type': 'url',
1069 'ie_key': YoutubeIE.ie_key(),
1070 'id': video_id,
1071 'url': url,
1072 'title': title,
1073 'description': description,
1074 'duration': duration,
1075 'channel_id': channel_id,
1076 'channel': channel,
1077 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1078 'uploader': channel,
1079 'uploader_id': channel_handle,
1080 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1081 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1082 'timestamp': (self._parse_time_text(time_text)
1083 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1084 else None),
1085 'release_timestamp': scheduled_timestamp,
1086 'availability':
1087 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1088 else self._availability(
1089 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1090 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1091 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1092 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1093 view_count_field: view_count,
1094 'live_status': live_status,
1095 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
1096 }
1097
1098
1099 class YoutubeIE(YoutubeBaseInfoExtractor):
1100 IE_DESC = 'YouTube'
1101 _VALID_URL = r"""(?x)^
1102 (
1103 (?:https?://|//) # http(s):// or protocol-independent URL
1104 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1105 (?:www\.)?deturl\.com/www\.youtube\.com|
1106 (?:www\.)?pwnyoutube\.com|
1107 (?:www\.)?hooktube\.com|
1108 (?:www\.)?yourepeat\.com|
1109 tube\.majestyc\.net|
1110 %(invidious)s|
1111 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1112 (?:.*?\#/)? # handle anchor (#/) redirect urls
1113 (?: # the various things that can precede the ID:
1114 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1115 |(?: # or the v= param in all its forms
1116 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1117 (?:\?|\#!?) # the params delimiter ? or # or #!
1118 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1119 v=
1120 )
1121 ))
1122 |(?:
1123 youtu\.be| # just youtu.be/xxxx
1124 vid\.plus| # or vid.plus/xxxx
1125 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1126 %(invidious)s
1127 )/
1128 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1129 )
1130 )? # all until now is optional -> you can pass the naked ID
1131 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1132 (?(1).+)? # if we found the ID, everything can follow
1133 (?:\#|$)""" % {
1134 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1135 }
1136 _EMBED_REGEX = [
1137 r'''(?x)
1138 (?:
1139 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1140 data-video-url=|
1141 <embed[^>]+?src=|
1142 embedSWF\(?:\s*|
1143 <object[^>]+data=|
1144 new\s+SWFObject\(
1145 )
1146 (["\'])
1147 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1148 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1149 \1''',
1150 # https://wordpress.org/plugins/lazy-load-for-videos/
1151 r'''(?xs)
1152 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1153 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1154 ]
1155 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1156
1157 _PLAYER_INFO_RE = (
1158 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1159 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1160 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1161 )
1162 _formats = {
1163 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1164 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1165 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1166 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1167 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1168 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1169 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1170 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1171 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1172 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1173 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1174 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1175 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1176 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1177 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1178 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1179 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1180 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1181
1182
1183 # 3D videos
1184 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1185 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1186 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1187 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1188 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1189 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1190 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1191
1192 # Apple HTTP Live Streaming
1193 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1194 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1195 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1196 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1197 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1198 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1199 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1200 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1201
1202 # DASH mp4 video
1203 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1204 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1205 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1206 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1207 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1208 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1209 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1210 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1211 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1212 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1213 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1214 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1215
1216 # Dash mp4 audio
1217 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1218 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1219 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1220 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1221 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1222 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1223 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1224
1225 # Dash webm
1226 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1227 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1228 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1229 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1230 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1231 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1232 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1233 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1234 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1235 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1236 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1237 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1238 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1239 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1240 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1241 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1242 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1243 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1244 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1245 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1246 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1247 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1248
1249 # Dash webm audio
1250 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1251 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1252
1253 # Dash webm audio with opus inside
1254 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1255 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1256 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1257
1258 # RTMP (unnamed)
1259 '_rtmp': {'protocol': 'rtmp'},
1260
1261 # av01 video only formats sometimes served with "unknown" codecs
1262 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1263 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1264 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1265 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1266 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1267 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1268 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1269 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1270 }
1271 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1272
1273 _GEO_BYPASS = False
1274
1275 IE_NAME = 'youtube'
1276 _TESTS = [
1277 {
1278 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1279 'info_dict': {
1280 'id': 'BaW_jenozKc',
1281 'ext': 'mp4',
1282 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1283 'channel': 'Philipp Hagemeister',
1284 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1285 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1286 'upload_date': '20121002',
1287 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1288 'categories': ['Science & Technology'],
1289 'tags': ['youtube-dl'],
1290 'duration': 10,
1291 'view_count': int,
1292 'like_count': int,
1293 'availability': 'public',
1294 'playable_in_embed': True,
1295 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1296 'live_status': 'not_live',
1297 'age_limit': 0,
1298 'start_time': 1,
1299 'end_time': 9,
1300 'comment_count': int,
1301 'channel_follower_count': int,
1302 'uploader': 'Philipp Hagemeister',
1303 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1304 'uploader_id': '@PhilippHagemeister',
1305 'heatmap': 'count:100',
1306 }
1307 },
1308 {
1309 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1310 'note': 'Embed-only video (#1746)',
1311 'info_dict': {
1312 'id': 'yZIXLfi8CZQ',
1313 'ext': 'mp4',
1314 'upload_date': '20120608',
1315 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1316 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1317 'age_limit': 18,
1318 },
1319 'skip': 'Private video',
1320 },
1321 {
1322 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1323 'note': 'Use the first video ID in the URL',
1324 'info_dict': {
1325 'id': 'BaW_jenozKc',
1326 'ext': 'mp4',
1327 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1328 'channel': 'Philipp Hagemeister',
1329 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1330 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1331 'upload_date': '20121002',
1332 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1333 'categories': ['Science & Technology'],
1334 'tags': ['youtube-dl'],
1335 'duration': 10,
1336 'view_count': int,
1337 'like_count': int,
1338 'availability': 'public',
1339 'playable_in_embed': True,
1340 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1341 'live_status': 'not_live',
1342 'age_limit': 0,
1343 'comment_count': int,
1344 'channel_follower_count': int,
1345 'uploader': 'Philipp Hagemeister',
1346 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1347 'uploader_id': '@PhilippHagemeister',
1348 'heatmap': 'count:100',
1349 },
1350 'params': {
1351 'skip_download': True,
1352 },
1353 },
1354 {
1355 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1356 'note': '256k DASH audio (format 141) via DASH manifest',
1357 'info_dict': {
1358 'id': 'a9LDPn-MO4I',
1359 'ext': 'm4a',
1360 'upload_date': '20121002',
1361 'description': '',
1362 'title': 'UHDTV TEST 8K VIDEO.mp4'
1363 },
1364 'params': {
1365 'youtube_include_dash_manifest': True,
1366 'format': '141',
1367 },
1368 'skip': 'format 141 not served anymore',
1369 },
1370 # DASH manifest with encrypted signature
1371 {
1372 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1373 'info_dict': {
1374 'id': 'IB3lcPjvWLA',
1375 'ext': 'm4a',
1376 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1377 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1378 'duration': 244,
1379 'upload_date': '20131011',
1380 'abr': 129.495,
1381 'like_count': int,
1382 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1383 'playable_in_embed': True,
1384 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1385 'view_count': int,
1386 'track': 'The Spark',
1387 'live_status': 'not_live',
1388 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1389 'channel': 'Afrojack',
1390 'tags': 'count:19',
1391 'availability': 'public',
1392 'categories': ['Music'],
1393 'age_limit': 0,
1394 'alt_title': 'The Spark',
1395 'channel_follower_count': int,
1396 'uploader': 'Afrojack',
1397 'uploader_url': 'https://www.youtube.com/@Afrojack',
1398 'uploader_id': '@Afrojack',
1399 },
1400 'params': {
1401 'youtube_include_dash_manifest': True,
1402 'format': '141/bestaudio[ext=m4a]',
1403 },
1404 },
1405 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1406 {
1407 'note': 'Embed allowed age-gate video',
1408 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1409 'info_dict': {
1410 'id': 'HtVdAasjOgU',
1411 'ext': 'mp4',
1412 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1413 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1414 'duration': 142,
1415 'upload_date': '20140605',
1416 'age_limit': 18,
1417 'categories': ['Gaming'],
1418 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1419 'availability': 'needs_auth',
1420 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1421 'like_count': int,
1422 'channel': 'The Witcher',
1423 'live_status': 'not_live',
1424 'tags': 'count:17',
1425 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1426 'playable_in_embed': True,
1427 'view_count': int,
1428 'channel_follower_count': int,
1429 'uploader': 'The Witcher',
1430 'uploader_url': 'https://www.youtube.com/@thewitcher',
1431 'uploader_id': '@thewitcher',
1432 'comment_count': int,
1433 'channel_is_verified': True,
1434 'heatmap': 'count:100',
1435 },
1436 },
1437 {
1438 'note': 'Age-gate video with embed allowed in public site',
1439 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1440 'info_dict': {
1441 'id': 'HsUATh_Nc2U',
1442 'ext': 'mp4',
1443 'title': 'Godzilla 2 (Official Video)',
1444 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1445 'upload_date': '20200408',
1446 'age_limit': 18,
1447 'availability': 'needs_auth',
1448 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1449 'channel': 'FlyingKitty',
1450 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1451 'view_count': int,
1452 'categories': ['Entertainment'],
1453 'live_status': 'not_live',
1454 'tags': ['Flyingkitty', 'godzilla 2'],
1455 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1456 'like_count': int,
1457 'duration': 177,
1458 'playable_in_embed': True,
1459 'channel_follower_count': int,
1460 'uploader': 'FlyingKitty',
1461 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1462 'uploader_id': '@FlyingKitty900',
1463 'comment_count': int,
1464 'channel_is_verified': True,
1465 },
1466 },
1467 {
1468 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1469 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1470 'info_dict': {
1471 'id': 'Tq92D6wQ1mg',
1472 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1473 'ext': 'mp4',
1474 'upload_date': '20191228',
1475 'description': 'md5:17eccca93a786d51bc67646756894066',
1476 'age_limit': 18,
1477 'like_count': int,
1478 'availability': 'needs_auth',
1479 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1480 'view_count': int,
1481 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1482 'channel': 'Projekt Melody',
1483 'live_status': 'not_live',
1484 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1485 'playable_in_embed': True,
1486 'categories': ['Entertainment'],
1487 'duration': 106,
1488 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1489 'comment_count': int,
1490 'channel_follower_count': int,
1491 'uploader': 'Projekt Melody',
1492 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1493 'uploader_id': '@ProjektMelody',
1494 },
1495 },
1496 {
1497 'note': 'Non-Agegated non-embeddable video',
1498 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1499 'info_dict': {
1500 'id': 'MeJVWBSsPAY',
1501 'ext': 'mp4',
1502 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1503 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1504 'upload_date': '20130730',
1505 'track': 'Such mich find mich',
1506 'age_limit': 0,
1507 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1508 'like_count': int,
1509 'playable_in_embed': False,
1510 'creator': 'OOMPH!',
1511 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1512 'view_count': int,
1513 'alt_title': 'Such mich find mich',
1514 'duration': 210,
1515 'channel': 'Herr Lurik',
1516 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1517 'categories': ['Music'],
1518 'availability': 'public',
1519 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1520 'live_status': 'not_live',
1521 'artist': 'OOMPH!',
1522 'channel_follower_count': int,
1523 'uploader': 'Herr Lurik',
1524 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1525 'uploader_id': '@HerrLurik',
1526 },
1527 },
1528 {
1529 'note': 'Non-bypassable age-gated video',
1530 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1531 'only_matching': True,
1532 },
1533 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1534 # YouTube Red ad is not captured for creator
1535 {
1536 'url': '__2ABJjxzNo',
1537 'info_dict': {
1538 'id': '__2ABJjxzNo',
1539 'ext': 'mp4',
1540 'duration': 266,
1541 'upload_date': '20100430',
1542 'creator': 'deadmau5',
1543 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1544 'title': 'Deadmau5 - Some Chords (HD)',
1545 'alt_title': 'Some Chords',
1546 'availability': 'public',
1547 'tags': 'count:14',
1548 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1549 'view_count': int,
1550 'live_status': 'not_live',
1551 'channel': 'deadmau5',
1552 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1553 'like_count': int,
1554 'track': 'Some Chords',
1555 'artist': 'deadmau5',
1556 'playable_in_embed': True,
1557 'age_limit': 0,
1558 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1559 'categories': ['Music'],
1560 'album': 'Some Chords',
1561 'channel_follower_count': int,
1562 'uploader': 'deadmau5',
1563 'uploader_url': 'https://www.youtube.com/@deadmau5',
1564 'uploader_id': '@deadmau5',
1565 },
1566 'expected_warnings': [
1567 'DASH manifest missing',
1568 ]
1569 },
1570 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1571 {
1572 'url': 'lqQg6PlCWgI',
1573 'info_dict': {
1574 'id': 'lqQg6PlCWgI',
1575 'ext': 'mp4',
1576 'duration': 6085,
1577 'upload_date': '20150827',
1578 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1579 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1580 'like_count': int,
1581 'release_timestamp': 1343767800,
1582 'playable_in_embed': True,
1583 'categories': ['Sports'],
1584 'release_date': '20120731',
1585 'channel': 'Olympics',
1586 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1587 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1588 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1589 'age_limit': 0,
1590 'availability': 'public',
1591 'live_status': 'was_live',
1592 'view_count': int,
1593 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1594 'channel_follower_count': int,
1595 'uploader': 'Olympics',
1596 'uploader_url': 'https://www.youtube.com/@Olympics',
1597 'uploader_id': '@Olympics',
1598 'channel_is_verified': True,
1599 },
1600 'params': {
1601 'skip_download': 'requires avconv',
1602 }
1603 },
1604 # Non-square pixels
1605 {
1606 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1607 'info_dict': {
1608 'id': '_b-2C3KPAM0',
1609 'ext': 'mp4',
1610 'stretched_ratio': 16 / 9.,
1611 'duration': 85,
1612 'upload_date': '20110310',
1613 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1614 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1615 'playable_in_embed': True,
1616 'channel': '孫ᄋᄅ',
1617 'age_limit': 0,
1618 'tags': 'count:11',
1619 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1620 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1621 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1622 'view_count': int,
1623 'categories': ['People & Blogs'],
1624 'like_count': int,
1625 'live_status': 'not_live',
1626 'availability': 'unlisted',
1627 'comment_count': int,
1628 'channel_follower_count': int,
1629 'uploader': '孫ᄋᄅ',
1630 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1631 'uploader_id': '@AllenMeow',
1632 },
1633 },
1634 # url_encoded_fmt_stream_map is empty string
1635 {
1636 'url': 'qEJwOuvDf7I',
1637 'info_dict': {
1638 'id': 'qEJwOuvDf7I',
1639 'ext': 'webm',
1640 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1641 'description': '',
1642 'upload_date': '20150404',
1643 },
1644 'params': {
1645 'skip_download': 'requires avconv',
1646 },
1647 'skip': 'This live event has ended.',
1648 },
1649 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1650 {
1651 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1652 'info_dict': {
1653 'id': 'FIl7x6_3R5Y',
1654 'ext': 'webm',
1655 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1656 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1657 'duration': 220,
1658 'upload_date': '20150625',
1659 'formats': 'mincount:31',
1660 },
1661 'skip': 'not actual anymore',
1662 },
1663 # DASH manifest with segment_list
1664 {
1665 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1666 'md5': '8ce563a1d667b599d21064e982ab9e31',
1667 'info_dict': {
1668 'id': 'CsmdDsKjzN8',
1669 'ext': 'mp4',
1670 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1671 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1672 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1673 },
1674 'params': {
1675 'youtube_include_dash_manifest': True,
1676 'format': '135', # bestvideo
1677 },
1678 'skip': 'This live event has ended.',
1679 },
1680 {
1681 # Multifeed videos (multiple cameras), URL can be of any Camera
1682 # TODO: fix multifeed titles
1683 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1684 'info_dict': {
1685 'id': 'zaPI8MvL8pg',
1686 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1687 'description': 'md5:563ccbc698b39298481ca3c571169519',
1688 },
1689 'playlist': [{
1690 'info_dict': {
1691 'id': 'j5yGuxZ8lLU',
1692 'ext': 'mp4',
1693 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1694 'description': 'md5:563ccbc698b39298481ca3c571169519',
1695 'duration': 10120,
1696 'channel_follower_count': int,
1697 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1698 'availability': 'public',
1699 'playable_in_embed': True,
1700 'upload_date': '20131105',
1701 'categories': ['Gaming'],
1702 'live_status': 'was_live',
1703 'tags': 'count:24',
1704 'release_timestamp': 1383701910,
1705 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1706 'comment_count': int,
1707 'age_limit': 0,
1708 'like_count': int,
1709 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1710 'channel': 'WiiLikeToPlay',
1711 'view_count': int,
1712 'release_date': '20131106',
1713 'uploader': 'WiiLikeToPlay',
1714 'uploader_id': '@WLTP',
1715 'uploader_url': 'https://www.youtube.com/@WLTP',
1716 },
1717 }, {
1718 'info_dict': {
1719 'id': 'zaPI8MvL8pg',
1720 'ext': 'mp4',
1721 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1722 'availability': 'public',
1723 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1724 'channel': 'WiiLikeToPlay',
1725 'channel_follower_count': int,
1726 'description': 'md5:563ccbc698b39298481ca3c571169519',
1727 'duration': 10108,
1728 'age_limit': 0,
1729 'like_count': int,
1730 'tags': 'count:24',
1731 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1732 'release_timestamp': 1383701915,
1733 'comment_count': int,
1734 'upload_date': '20131105',
1735 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1736 'release_date': '20131106',
1737 'playable_in_embed': True,
1738 'live_status': 'was_live',
1739 'categories': ['Gaming'],
1740 'view_count': int,
1741 'uploader': 'WiiLikeToPlay',
1742 'uploader_id': '@WLTP',
1743 'uploader_url': 'https://www.youtube.com/@WLTP',
1744 },
1745 }, {
1746 'info_dict': {
1747 'id': 'R7r3vfO7Hao',
1748 'ext': 'mp4',
1749 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1750 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1751 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1752 'like_count': int,
1753 'availability': 'public',
1754 'playable_in_embed': True,
1755 'upload_date': '20131105',
1756 'description': 'md5:563ccbc698b39298481ca3c571169519',
1757 'channel_follower_count': int,
1758 'tags': 'count:24',
1759 'release_date': '20131106',
1760 'comment_count': int,
1761 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1762 'channel': 'WiiLikeToPlay',
1763 'categories': ['Gaming'],
1764 'release_timestamp': 1383701914,
1765 'live_status': 'was_live',
1766 'age_limit': 0,
1767 'duration': 10128,
1768 'view_count': int,
1769 'uploader': 'WiiLikeToPlay',
1770 'uploader_id': '@WLTP',
1771 'uploader_url': 'https://www.youtube.com/@WLTP',
1772 },
1773 }],
1774 'params': {'skip_download': True},
1775 },
1776 {
1777 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1778 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1779 'info_dict': {
1780 'id': 'gVfLd0zydlo',
1781 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1782 },
1783 'playlist_count': 2,
1784 'skip': 'Not multifeed anymore',
1785 },
1786 {
1787 'url': 'https://vid.plus/FlRa-iH7PGw',
1788 'only_matching': True,
1789 },
1790 {
1791 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1792 'only_matching': True,
1793 },
1794 {
1795 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1796 # Also tests cut-off URL expansion in video description (see
1797 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1798 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1799 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1800 'info_dict': {
1801 'id': 'lsguqyKfVQg',
1802 'ext': 'mp4',
1803 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1804 'alt_title': 'Dark Walk',
1805 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1806 'duration': 133,
1807 'upload_date': '20151119',
1808 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1809 'track': 'Dark Walk',
1810 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1811 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1812 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1813 'categories': ['Film & Animation'],
1814 'view_count': int,
1815 'live_status': 'not_live',
1816 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1817 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1818 'tags': 'count:13',
1819 'availability': 'public',
1820 'channel': 'IronSoulElf',
1821 'playable_in_embed': True,
1822 'like_count': int,
1823 'age_limit': 0,
1824 'channel_follower_count': int
1825 },
1826 'params': {
1827 'skip_download': True,
1828 },
1829 },
1830 {
1831 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1832 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1833 'only_matching': True,
1834 },
1835 {
1836 # Video with yt:stretch=17:0
1837 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1838 'info_dict': {
1839 'id': 'Q39EVAstoRM',
1840 'ext': 'mp4',
1841 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1842 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1843 'upload_date': '20151107',
1844 },
1845 'params': {
1846 'skip_download': True,
1847 },
1848 'skip': 'This video does not exist.',
1849 },
1850 {
1851 # Video with incomplete 'yt:stretch=16:'
1852 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1853 'only_matching': True,
1854 },
1855 {
1856 # Video licensed under Creative Commons
1857 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1858 'info_dict': {
1859 'id': 'M4gD1WSo5mA',
1860 'ext': 'mp4',
1861 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1862 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1863 'duration': 721,
1864 'upload_date': '20150128',
1865 'license': 'Creative Commons Attribution license (reuse allowed)',
1866 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1867 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1868 'like_count': int,
1869 'age_limit': 0,
1870 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1871 'channel': 'The Berkman Klein Center for Internet & Society',
1872 'availability': 'public',
1873 'view_count': int,
1874 'categories': ['Education'],
1875 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1876 'live_status': 'not_live',
1877 'playable_in_embed': True,
1878 'channel_follower_count': int,
1879 'chapters': list,
1880 'uploader': 'The Berkman Klein Center for Internet & Society',
1881 'uploader_id': '@BKCHarvard',
1882 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1883 },
1884 'params': {
1885 'skip_download': True,
1886 },
1887 },
1888 {
1889 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1890 'info_dict': {
1891 'id': 'eQcmzGIKrzg',
1892 'ext': 'mp4',
1893 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1894 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1895 'duration': 4060,
1896 'upload_date': '20151120',
1897 'license': 'Creative Commons Attribution license (reuse allowed)',
1898 'playable_in_embed': True,
1899 'tags': 'count:12',
1900 'like_count': int,
1901 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1902 'age_limit': 0,
1903 'availability': 'public',
1904 'categories': ['News & Politics'],
1905 'channel': 'Bernie Sanders',
1906 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1907 'view_count': int,
1908 'live_status': 'not_live',
1909 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1910 'comment_count': int,
1911 'channel_follower_count': int,
1912 'chapters': list,
1913 'uploader': 'Bernie Sanders',
1914 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1915 'uploader_id': '@BernieSanders',
1916 'channel_is_verified': True,
1917 'heatmap': 'count:100',
1918 },
1919 'params': {
1920 'skip_download': True,
1921 },
1922 },
1923 {
1924 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1925 'only_matching': True,
1926 },
1927 {
1928 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1929 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1930 'only_matching': True,
1931 },
1932 {
1933 # Rental video preview
1934 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1935 'info_dict': {
1936 'id': 'uGpuVWrhIzE',
1937 'ext': 'mp4',
1938 'title': 'Piku - Trailer',
1939 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1940 'upload_date': '20150811',
1941 'license': 'Standard YouTube License',
1942 },
1943 'params': {
1944 'skip_download': True,
1945 },
1946 'skip': 'This video is not available.',
1947 },
1948 {
1949 # YouTube Red video with episode data
1950 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1951 'info_dict': {
1952 'id': 'iqKdEhx-dD4',
1953 'ext': 'mp4',
1954 'title': 'Isolation - Mind Field (Ep 1)',
1955 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1956 'duration': 2085,
1957 'upload_date': '20170118',
1958 'series': 'Mind Field',
1959 'season_number': 1,
1960 'episode_number': 1,
1961 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1962 'tags': 'count:12',
1963 'view_count': int,
1964 'availability': 'public',
1965 'age_limit': 0,
1966 'channel': 'Vsauce',
1967 'episode': 'Episode 1',
1968 'categories': ['Entertainment'],
1969 'season': 'Season 1',
1970 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1971 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1972 'like_count': int,
1973 'playable_in_embed': True,
1974 'live_status': 'not_live',
1975 'channel_follower_count': int,
1976 'uploader': 'Vsauce',
1977 'uploader_url': 'https://www.youtube.com/@Vsauce',
1978 'uploader_id': '@Vsauce',
1979 'comment_count': int,
1980 'channel_is_verified': True,
1981 },
1982 'params': {
1983 'skip_download': True,
1984 },
1985 'expected_warnings': [
1986 'Skipping DASH manifest',
1987 ],
1988 },
1989 {
1990 # The following content has been identified by the YouTube community
1991 # as inappropriate or offensive to some audiences.
1992 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1993 'info_dict': {
1994 'id': '6SJNVb0GnPI',
1995 'ext': 'mp4',
1996 'title': 'Race Differences in Intelligence',
1997 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1998 'duration': 965,
1999 'upload_date': '20140124',
2000 },
2001 'params': {
2002 'skip_download': True,
2003 },
2004 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2005 },
2006 {
2007 # itag 212
2008 'url': '1t24XAntNCY',
2009 'only_matching': True,
2010 },
2011 {
2012 # geo restricted to JP
2013 'url': 'sJL6WA-aGkQ',
2014 'only_matching': True,
2015 },
2016 {
2017 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2018 'only_matching': True,
2019 },
2020 {
2021 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2022 'only_matching': True,
2023 },
2024 {
2025 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2026 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2027 'only_matching': True,
2028 },
2029 {
2030 # DRM protected
2031 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2032 'only_matching': True,
2033 },
2034 {
2035 # Video with unsupported adaptive stream type formats
2036 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2037 'info_dict': {
2038 'id': 'Z4Vy8R84T1U',
2039 'ext': 'mp4',
2040 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2041 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2042 'duration': 433,
2043 'upload_date': '20130923',
2044 'formats': 'maxcount:10',
2045 },
2046 'params': {
2047 'skip_download': True,
2048 'youtube_include_dash_manifest': False,
2049 },
2050 'skip': 'not actual anymore',
2051 },
2052 {
2053 # Youtube Music Auto-generated description
2054 # TODO: fix metadata extraction
2055 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2056 'info_dict': {
2057 'id': 'MgNrAu2pzNs',
2058 'ext': 'mp4',
2059 'title': 'Voyeur Girl',
2060 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2061 'upload_date': '20190312',
2062 'artist': 'Stephen',
2063 'track': 'Voyeur Girl',
2064 'album': 'it\'s too much love to know my dear',
2065 'release_date': '20190313',
2066 'release_year': 2019,
2067 'alt_title': 'Voyeur Girl',
2068 'view_count': int,
2069 'playable_in_embed': True,
2070 'like_count': int,
2071 'categories': ['Music'],
2072 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2073 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2074 'uploader': 'Stephen',
2075 'availability': 'public',
2076 'creator': 'Stephen',
2077 'duration': 169,
2078 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2079 'age_limit': 0,
2080 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2081 'tags': 'count:11',
2082 'live_status': 'not_live',
2083 'channel_follower_count': int
2084 },
2085 'params': {
2086 'skip_download': True,
2087 },
2088 },
2089 {
2090 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2091 'only_matching': True,
2092 },
2093 {
2094 # invalid -> valid video id redirection
2095 'url': 'DJztXj2GPfl',
2096 'info_dict': {
2097 'id': 'DJztXj2GPfk',
2098 'ext': 'mp4',
2099 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2100 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2101 'upload_date': '20090125',
2102 'artist': 'Panjabi MC',
2103 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2104 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2105 },
2106 'params': {
2107 'skip_download': True,
2108 },
2109 'skip': 'Video unavailable',
2110 },
2111 {
2112 # empty description results in an empty string
2113 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2114 'info_dict': {
2115 'id': 'x41yOUIvK2k',
2116 'ext': 'mp4',
2117 'title': 'IMG 3456',
2118 'description': '',
2119 'upload_date': '20170613',
2120 'view_count': int,
2121 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2122 'like_count': int,
2123 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2124 'tags': [],
2125 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2126 'availability': 'public',
2127 'age_limit': 0,
2128 'categories': ['Pets & Animals'],
2129 'duration': 7,
2130 'playable_in_embed': True,
2131 'live_status': 'not_live',
2132 'channel': 'l\'Or Vert asbl',
2133 'channel_follower_count': int,
2134 'uploader': 'l\'Or Vert asbl',
2135 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2136 'uploader_id': '@ElevageOrVert',
2137 },
2138 'params': {
2139 'skip_download': True,
2140 },
2141 },
2142 {
2143 # with '};' inside yt initial data (see [1])
2144 # see [2] for an example with '};' inside ytInitialPlayerResponse
2145 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2146 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2147 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2148 'info_dict': {
2149 'id': 'CHqg6qOn4no',
2150 'ext': 'mp4',
2151 'title': 'Part 77 Sort a list of simple types in c#',
2152 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2153 'upload_date': '20130831',
2154 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2155 'like_count': int,
2156 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2157 'live_status': 'not_live',
2158 'categories': ['Education'],
2159 'availability': 'public',
2160 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2161 'tags': 'count:12',
2162 'playable_in_embed': True,
2163 'age_limit': 0,
2164 'view_count': int,
2165 'duration': 522,
2166 'channel': 'kudvenkat',
2167 'comment_count': int,
2168 'channel_follower_count': int,
2169 'chapters': list,
2170 'uploader': 'kudvenkat',
2171 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2172 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2173 'channel_is_verified': True,
2174 'heatmap': 'count:100',
2175 },
2176 'params': {
2177 'skip_download': True,
2178 },
2179 },
2180 {
2181 # another example of '};' in ytInitialData
2182 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2183 'only_matching': True,
2184 },
2185 {
2186 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2187 'only_matching': True,
2188 },
2189 {
2190 # https://github.com/ytdl-org/youtube-dl/pull/28094
2191 'url': 'OtqTfy26tG0',
2192 'info_dict': {
2193 'id': 'OtqTfy26tG0',
2194 'ext': 'mp4',
2195 'title': 'Burn Out',
2196 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2197 'upload_date': '20141120',
2198 'artist': 'The Cinematic Orchestra',
2199 'track': 'Burn Out',
2200 'album': 'Every Day',
2201 'like_count': int,
2202 'live_status': 'not_live',
2203 'alt_title': 'Burn Out',
2204 'duration': 614,
2205 'age_limit': 0,
2206 'view_count': int,
2207 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2208 'creator': 'The Cinematic Orchestra',
2209 'channel': 'The Cinematic Orchestra',
2210 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2211 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2212 'availability': 'public',
2213 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2214 'categories': ['Music'],
2215 'playable_in_embed': True,
2216 'channel_follower_count': int,
2217 'uploader': 'The Cinematic Orchestra',
2218 'comment_count': int,
2219 },
2220 'params': {
2221 'skip_download': True,
2222 },
2223 },
2224 {
2225 # controversial video, only works with bpctr when authenticated with cookies
2226 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2227 'only_matching': True,
2228 },
2229 {
2230 # controversial video, requires bpctr/contentCheckOk
2231 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2232 'info_dict': {
2233 'id': 'SZJvDhaSDnc',
2234 'ext': 'mp4',
2235 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2236 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2237 'upload_date': '20140716',
2238 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2239 'duration': 170,
2240 'categories': ['News & Politics'],
2241 'view_count': int,
2242 'channel': 'CBS Mornings',
2243 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2244 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2245 'age_limit': 18,
2246 'availability': 'needs_auth',
2247 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2248 'like_count': int,
2249 'live_status': 'not_live',
2250 'playable_in_embed': True,
2251 'channel_follower_count': int,
2252 'uploader': 'CBS Mornings',
2253 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2254 'uploader_id': '@CBSMornings',
2255 'comment_count': int,
2256 'channel_is_verified': True,
2257 }
2258 },
2259 {
2260 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2261 'url': 'cBvYw8_A0vQ',
2262 'info_dict': {
2263 'id': 'cBvYw8_A0vQ',
2264 'ext': 'mp4',
2265 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2266 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2267 'upload_date': '20201120',
2268 'duration': 1456,
2269 'categories': ['Travel & Events'],
2270 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2271 'view_count': int,
2272 'channel': 'Walk around Japan',
2273 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2274 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2275 'age_limit': 0,
2276 'availability': 'public',
2277 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2278 'live_status': 'not_live',
2279 'playable_in_embed': True,
2280 'channel_follower_count': int,
2281 'uploader': 'Walk around Japan',
2282 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2283 'uploader_id': '@walkaroundjapan7124',
2284 },
2285 'params': {
2286 'skip_download': True,
2287 },
2288 }, {
2289 # Has multiple audio streams
2290 'url': 'WaOKSUlf4TM',
2291 'only_matching': True
2292 }, {
2293 # Requires Premium: has format 141 when requested using YTM url
2294 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2295 'only_matching': True
2296 }, {
2297 # multiple subtitles with same lang_code
2298 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2299 'only_matching': True,
2300 }, {
2301 # Force use android client fallback
2302 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2303 'info_dict': {
2304 'id': 'YOelRv7fMxY',
2305 'title': 'DIGGING A SECRET TUNNEL Part 1',
2306 'ext': '3gp',
2307 'upload_date': '20210624',
2308 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2309 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2310 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2311 'duration': 596,
2312 'categories': ['Entertainment'],
2313 'view_count': int,
2314 'channel': 'colinfurze',
2315 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2316 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2317 'age_limit': 0,
2318 'availability': 'public',
2319 'like_count': int,
2320 'live_status': 'not_live',
2321 'playable_in_embed': True,
2322 'channel_follower_count': int,
2323 'chapters': list,
2324 'uploader': 'colinfurze',
2325 'uploader_url': 'https://www.youtube.com/@colinfurze',
2326 'uploader_id': '@colinfurze',
2327 'comment_count': int,
2328 'channel_is_verified': True,
2329 'heatmap': 'count:100',
2330 },
2331 'params': {
2332 'format': '17', # 3gp format available on android
2333 'extractor_args': {'youtube': {'player_client': ['android']}},
2334 },
2335 },
2336 {
2337 # Skip download of additional client configs (remix client config in this case)
2338 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2339 'only_matching': True,
2340 'params': {
2341 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2342 },
2343 }, {
2344 # shorts
2345 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2346 'only_matching': True,
2347 }, {
2348 'note': 'Storyboards',
2349 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2350 'info_dict': {
2351 'id': '5KLPxDtMqe8',
2352 'ext': 'mhtml',
2353 'format_id': 'sb0',
2354 'title': 'Your Brain is Plastic',
2355 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2356 'upload_date': '20140324',
2357 'like_count': int,
2358 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2359 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2360 'view_count': int,
2361 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2362 'playable_in_embed': True,
2363 'tags': 'count:12',
2364 'availability': 'public',
2365 'channel': 'SciShow',
2366 'live_status': 'not_live',
2367 'duration': 248,
2368 'categories': ['Education'],
2369 'age_limit': 0,
2370 'channel_follower_count': int,
2371 'chapters': list,
2372 'uploader': 'SciShow',
2373 'uploader_url': 'https://www.youtube.com/@SciShow',
2374 'uploader_id': '@SciShow',
2375 'comment_count': int,
2376 'channel_is_verified': True,
2377 'heatmap': 'count:100',
2378 }, 'params': {'format': 'mhtml', 'skip_download': True}
2379 }, {
2380 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2381 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2382 'info_dict': {
2383 'id': '2NUZ8W2llS4',
2384 'ext': 'mp4',
2385 'title': 'The NP that test your phone performance 🙂',
2386 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2387 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2388 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2389 'duration': 21,
2390 'view_count': int,
2391 'age_limit': 0,
2392 'categories': ['Gaming'],
2393 'tags': 'count:23',
2394 'playable_in_embed': True,
2395 'live_status': 'not_live',
2396 'upload_date': '20220103',
2397 'like_count': int,
2398 'availability': 'public',
2399 'channel': 'Leon Nguyen',
2400 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2401 'comment_count': int,
2402 'channel_follower_count': int,
2403 'uploader': 'Leon Nguyen',
2404 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2405 'uploader_id': '@LeonNguyen',
2406 'heatmap': 'count:100',
2407 }
2408 }, {
2409 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2410 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2411 'info_dict': {
2412 'id': '2NUZ8W2llS4',
2413 'ext': 'mp4',
2414 'title': 'The NP that test your phone performance 🙂',
2415 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2416 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2417 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2418 'duration': 21,
2419 'view_count': int,
2420 'age_limit': 0,
2421 'categories': ['Gaming'],
2422 'tags': 'count:23',
2423 'playable_in_embed': True,
2424 'live_status': 'not_live',
2425 'upload_date': '20220102',
2426 'like_count': int,
2427 'availability': 'public',
2428 'channel': 'Leon Nguyen',
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2430 'comment_count': int,
2431 'channel_follower_count': int,
2432 'uploader': 'Leon Nguyen',
2433 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2434 'uploader_id': '@LeonNguyen',
2435 'heatmap': 'count:100',
2436 },
2437 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2438 }, {
2439 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2440 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2441 'info_dict': {
2442 'id': 'mzZzzBU6lrM',
2443 'ext': 'mp4',
2444 'title': 'I Met GeorgeNotFound In Real Life...',
2445 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2446 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2447 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2448 'duration': 955,
2449 'view_count': int,
2450 'age_limit': 0,
2451 'categories': ['Entertainment'],
2452 'tags': 'count:26',
2453 'playable_in_embed': True,
2454 'live_status': 'not_live',
2455 'release_timestamp': 1641172509,
2456 'release_date': '20220103',
2457 'upload_date': '20220103',
2458 'like_count': int,
2459 'availability': 'public',
2460 'channel': 'Quackity',
2461 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2462 'channel_follower_count': int,
2463 'uploader': 'Quackity',
2464 'uploader_id': '@Quackity',
2465 'uploader_url': 'https://www.youtube.com/@Quackity',
2466 'comment_count': int,
2467 'channel_is_verified': True,
2468 'heatmap': 'count:100',
2469 }
2470 },
2471 { # continuous livestream. Microformat upload date should be preferred.
2472 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2473 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2474 'info_dict': {
2475 'id': 'kgx4WGK0oNU',
2476 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2477 'ext': 'mp4',
2478 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2479 'availability': 'public',
2480 'age_limit': 0,
2481 'release_timestamp': 1637975704,
2482 'upload_date': '20210619',
2483 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2484 'live_status': 'is_live',
2485 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2486 'channel': 'Abao in Tokyo',
2487 'channel_follower_count': int,
2488 'release_date': '20211127',
2489 'tags': 'count:39',
2490 'categories': ['People & Blogs'],
2491 'like_count': int,
2492 'view_count': int,
2493 'playable_in_embed': True,
2494 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2495 'concurrent_view_count': int,
2496 'uploader': 'Abao in Tokyo',
2497 'uploader_url': 'https://www.youtube.com/@abaointokyo',
2498 'uploader_id': '@abaointokyo',
2499 },
2500 'params': {'skip_download': True}
2501 }, {
2502 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2503 'info_dict': {
2504 'id': 'tjjjtzRLHvA',
2505 'ext': 'mp4',
2506 'title': 'ハッシュタグ無し };if window.ytcsi',
2507 'upload_date': '20220323',
2508 'like_count': int,
2509 'availability': 'unlisted',
2510 'channel': 'Lesmiscore',
2511 'thumbnail': r're:^https?://.*\.jpg',
2512 'age_limit': 0,
2513 'categories': ['Music'],
2514 'view_count': int,
2515 'description': '',
2516 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2517 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2518 'live_status': 'not_live',
2519 'playable_in_embed': True,
2520 'channel_follower_count': int,
2521 'duration': 6,
2522 'tags': [],
2523 'uploader_id': '@lesmiscore',
2524 'uploader': 'Lesmiscore',
2525 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2526 }
2527 }, {
2528 # Prefer primary title+description language metadata by default
2529 # Do not prefer translated description if primary is empty
2530 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2531 'info_dict': {
2532 'id': 'el3E4MbxRqQ',
2533 'ext': 'mp4',
2534 'title': 'dlp test video 2 - primary sv no desc',
2535 'description': '',
2536 'channel': 'cole-dlp-test-acc',
2537 'tags': [],
2538 'view_count': int,
2539 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2540 'like_count': int,
2541 'playable_in_embed': True,
2542 'availability': 'unlisted',
2543 'thumbnail': r're:^https?://.*\.jpg',
2544 'age_limit': 0,
2545 'duration': 5,
2546 'live_status': 'not_live',
2547 'upload_date': '20220908',
2548 'categories': ['People & Blogs'],
2549 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2550 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2551 'uploader_id': '@coletdjnz',
2552 'uploader': 'cole-dlp-test-acc',
2553 },
2554 'params': {'skip_download': True}
2555 }, {
2556 # Extractor argument: prefer translated title+description
2557 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2558 'info_dict': {
2559 'id': 'gHKT4uU8Zng',
2560 'ext': 'mp4',
2561 'channel': 'cole-dlp-test-acc',
2562 'tags': [],
2563 'duration': 5,
2564 'live_status': 'not_live',
2565 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2566 'upload_date': '20220728',
2567 'view_count': int,
2568 'categories': ['People & Blogs'],
2569 'thumbnail': r're:^https?://.*\.jpg',
2570 'title': 'dlp test video title translated (fr)',
2571 'availability': 'public',
2572 'age_limit': 0,
2573 'description': 'dlp test video description translated (fr)',
2574 'playable_in_embed': True,
2575 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2576 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2577 'uploader_id': '@coletdjnz',
2578 'uploader': 'cole-dlp-test-acc',
2579 },
2580 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2581 'expected_warnings': [r'Preferring "fr" translated fields'],
2582 }, {
2583 'note': '6 channel audio',
2584 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2585 'only_matching': True,
2586 }, {
2587 'note': 'Multiple HLS formats with same itag',
2588 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2589 'info_dict': {
2590 'id': 'kX3nB4PpJko',
2591 'ext': 'mp4',
2592 'categories': ['Entertainment'],
2593 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2594 'live_status': 'not_live',
2595 'duration': 937,
2596 'channel_follower_count': int,
2597 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2598 'title': 'Last To Take Hand Off Jet, Keeps It!',
2599 'channel': 'MrBeast',
2600 'playable_in_embed': True,
2601 'view_count': int,
2602 'upload_date': '20221112',
2603 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2604 'age_limit': 0,
2605 'availability': 'public',
2606 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2607 'like_count': int,
2608 'tags': [],
2609 'uploader': 'MrBeast',
2610 'uploader_url': 'https://www.youtube.com/@MrBeast',
2611 'uploader_id': '@MrBeast',
2612 'comment_count': int,
2613 'channel_is_verified': True,
2614 'heatmap': 'count:100',
2615 },
2616 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2617 }, {
2618 'note': 'Audio formats with Dynamic Range Compression',
2619 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2620 'info_dict': {
2621 'id': 'Tq92D6wQ1mg',
2622 'ext': 'webm',
2623 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2624 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2625 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2626 'channel_follower_count': int,
2627 'description': 'md5:17eccca93a786d51bc67646756894066',
2628 'upload_date': '20191228',
2629 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2630 'playable_in_embed': True,
2631 'like_count': int,
2632 'categories': ['Entertainment'],
2633 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2634 'age_limit': 18,
2635 'channel': 'Projekt Melody',
2636 'view_count': int,
2637 'availability': 'needs_auth',
2638 'comment_count': int,
2639 'live_status': 'not_live',
2640 'duration': 106,
2641 'uploader': 'Projekt Melody',
2642 'uploader_id': '@ProjektMelody',
2643 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2644 },
2645 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2646 },
2647 {
2648 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2649 'info_dict': {
2650 'id': 'qVv6vCqciTM',
2651 'ext': 'mp4',
2652 'age_limit': 0,
2653 'comment_count': int,
2654 'chapters': 'count:13',
2655 'upload_date': '20221223',
2656 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2657 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2658 'like_count': int,
2659 'release_date': '20221223',
2660 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2661 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2662 'view_count': int,
2663 'playable_in_embed': True,
2664 'duration': 4438,
2665 'availability': 'public',
2666 'channel_follower_count': int,
2667 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2668 'categories': ['Entertainment'],
2669 'live_status': 'was_live',
2670 'release_timestamp': 1671793345,
2671 'channel': 'さなちゃんねる',
2672 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2673 'uploader': 'さなちゃんねる',
2674 'uploader_url': 'https://www.youtube.com/@sana_natori',
2675 'uploader_id': '@sana_natori',
2676 'channel_is_verified': True,
2677 'heatmap': 'count:100',
2678 },
2679 },
2680 {
2681 # Fallbacks when webpage and web client is unavailable
2682 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2683 'info_dict': {
2684 'id': 'wSSmNUl9Snw',
2685 'ext': 'mp4',
2686 # 'categories': ['Science & Technology'],
2687 'view_count': int,
2688 'chapters': 'count:2',
2689 'channel': 'Scott Manley',
2690 'like_count': int,
2691 'age_limit': 0,
2692 # 'availability': 'public',
2693 'channel_follower_count': int,
2694 'live_status': 'not_live',
2695 'upload_date': '20170831',
2696 'duration': 682,
2697 'tags': 'count:8',
2698 'uploader_url': 'https://www.youtube.com/@scottmanley',
2699 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2700 'uploader': 'Scott Manley',
2701 'uploader_id': '@scottmanley',
2702 'title': 'The Computer Hack That Saved Apollo 14',
2703 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2704 'thumbnail': r're:^https?://.*\.webp',
2705 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2706 'playable_in_embed': True,
2707 'comment_count': int,
2708 'channel_is_verified': True,
2709 'heatmap': 'count:100',
2710 },
2711 'params': {
2712 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
2713 },
2714 },
2715 ]
2716
2717 _WEBPAGE_TESTS = [
2718 # YouTube <object> embed
2719 {
2720 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2721 'md5': '873c81d308b979f0e23ee7e620b312a3',
2722 'info_dict': {
2723 'id': 'msN87y-iEx0',
2724 'ext': 'mp4',
2725 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2726 'upload_date': '20080526',
2727 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2728 'age_limit': 0,
2729 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2730 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2731 'playable_in_embed': True,
2732 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2733 'like_count': int,
2734 'comment_count': int,
2735 'channel': 'Christopher Sykes',
2736 'live_status': 'not_live',
2737 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2738 'availability': 'public',
2739 'duration': 195,
2740 'view_count': int,
2741 'categories': ['Science & Technology'],
2742 'channel_follower_count': int,
2743 'uploader': 'Christopher Sykes',
2744 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2745 'uploader_id': '@ChristopherSykesDocumentaries',
2746 'heatmap': 'count:100',
2747 },
2748 'params': {
2749 'skip_download': True,
2750 }
2751 },
2752 ]
2753
2754 @classmethod
2755 def suitable(cls, url):
2756 from ..utils import parse_qs
2757
2758 qs = parse_qs(url)
2759 if qs.get('list', [None])[0]:
2760 return False
2761 return super().suitable(url)
2762
2763 def __init__(self, *args, **kwargs):
2764 super().__init__(*args, **kwargs)
2765 self._code_cache = {}
2766 self._player_cache = {}
2767
2768 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2769 lock = threading.Lock()
2770 start_time = time.time()
2771 formats = [f for f in formats if f.get('is_from_start')]
2772
2773 def refetch_manifest(format_id, delay):
2774 nonlocal formats, start_time, is_live
2775 if time.time() <= start_time + delay:
2776 return
2777
2778 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2779 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2780 microformats = traverse_obj(
2781 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2782 expected_type=dict)
2783 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2784 is_live = live_status == 'is_live'
2785 start_time = time.time()
2786
2787 def mpd_feed(format_id, delay):
2788 """
2789 @returns (manifest_url, manifest_stream_number, is_live) or None
2790 """
2791 for retry in self.RetryManager(fatal=False):
2792 with lock:
2793 refetch_manifest(format_id, delay)
2794
2795 f = next((f for f in formats if f['format_id'] == format_id), None)
2796 if not f:
2797 if not is_live:
2798 retry.error = f'{video_id}: Video is no longer live'
2799 else:
2800 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2801 continue
2802 return f['manifest_url'], f['manifest_stream_number'], is_live
2803 return None
2804
2805 for f in formats:
2806 f['is_live'] = is_live
2807 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2808 live_start_time, mpd_feed, not is_live and f.copy())
2809 if is_live:
2810 f['fragments'] = gen
2811 f['protocol'] = 'http_dash_segments_generator'
2812 else:
2813 f['fragments'] = LazyList(gen({}))
2814 del f['is_from_start']
2815
2816 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2817 FETCH_SPAN, MAX_DURATION = 5, 432000
2818
2819 mpd_url, stream_number, is_live = None, None, True
2820
2821 begin_index = 0
2822 download_start_time = ctx.get('start') or time.time()
2823
2824 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2825 if lack_early_segments:
2826 self.report_warning(bug_reports_message(
2827 'Starting download from the last 120 hours of the live stream since '
2828 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2829 lack_early_segments = True
2830
2831 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2832 fragments, fragment_base_url = None, None
2833
2834 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2835 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2836 # Obtain from MPD's maximum seq value
2837 old_mpd_url = mpd_url
2838 last_error = ctx.pop('last_error', None)
2839 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2840 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2841 or (mpd_url, stream_number, False))
2842 if not refresh_sequence:
2843 if expire_fast and not is_live:
2844 return False, last_seq
2845 elif old_mpd_url == mpd_url:
2846 return True, last_seq
2847 if manifestless_orig_fmt:
2848 fmt_info = manifestless_orig_fmt
2849 else:
2850 try:
2851 fmts, _ = self._extract_mpd_formats_and_subtitles(
2852 mpd_url, None, note=False, errnote=False, fatal=False)
2853 except ExtractorError:
2854 fmts = None
2855 if not fmts:
2856 no_fragment_score += 2
2857 return False, last_seq
2858 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2859 fragments = fmt_info['fragments']
2860 fragment_base_url = fmt_info['fragment_base_url']
2861 assert fragment_base_url
2862
2863 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2864 return True, _last_seq
2865
2866 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2867 while is_live:
2868 fetch_time = time.time()
2869 if no_fragment_score > 30:
2870 return
2871 if last_segment_url:
2872 # Obtain from "X-Head-Seqnum" header value from each segment
2873 try:
2874 urlh = self._request_webpage(
2875 last_segment_url, None, note=False, errnote=False, fatal=False)
2876 except ExtractorError:
2877 urlh = None
2878 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2879 if last_seq is None:
2880 no_fragment_score += 2
2881 last_segment_url = None
2882 continue
2883 else:
2884 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2885 no_fragment_score += 2
2886 if not should_continue:
2887 continue
2888
2889 if known_idx > last_seq:
2890 last_segment_url = None
2891 continue
2892
2893 last_seq += 1
2894
2895 if begin_index < 0 and known_idx < 0:
2896 # skip from the start when it's negative value
2897 known_idx = last_seq + begin_index
2898 if lack_early_segments:
2899 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2900 try:
2901 for idx in range(known_idx, last_seq):
2902 # do not update sequence here or you'll get skipped some part of it
2903 should_continue, _ = _extract_sequence_from_mpd(False, False)
2904 if not should_continue:
2905 known_idx = idx - 1
2906 raise ExtractorError('breaking out of outer loop')
2907 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2908 yield {
2909 'url': last_segment_url,
2910 'fragment_count': last_seq,
2911 }
2912 if known_idx == last_seq:
2913 no_fragment_score += 5
2914 else:
2915 no_fragment_score = 0
2916 known_idx = last_seq
2917 except ExtractorError:
2918 continue
2919
2920 if manifestless_orig_fmt:
2921 # Stop at the first iteration if running for post-live manifestless;
2922 # fragment count no longer increase since it starts
2923 break
2924
2925 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2926
2927 def _extract_player_url(self, *ytcfgs, webpage=None):
2928 player_url = traverse_obj(
2929 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2930 get_all=False, expected_type=str)
2931 if not player_url:
2932 return
2933 return urljoin('https://www.youtube.com', player_url)
2934
2935 def _download_player_url(self, video_id, fatal=False):
2936 res = self._download_webpage(
2937 'https://www.youtube.com/iframe_api',
2938 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2939 if res:
2940 player_version = self._search_regex(
2941 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2942 if player_version:
2943 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2944
2945 def _signature_cache_id(self, example_sig):
2946 """ Return a string representation of a signature """
2947 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2948
2949 @classmethod
2950 def _extract_player_info(cls, player_url):
2951 for player_re in cls._PLAYER_INFO_RE:
2952 id_m = re.search(player_re, player_url)
2953 if id_m:
2954 break
2955 else:
2956 raise ExtractorError('Cannot identify player %r' % player_url)
2957 return id_m.group('id')
2958
2959 def _load_player(self, video_id, player_url, fatal=True):
2960 player_id = self._extract_player_info(player_url)
2961 if player_id not in self._code_cache:
2962 code = self._download_webpage(
2963 player_url, video_id, fatal=fatal,
2964 note='Downloading player ' + player_id,
2965 errnote='Download of %s failed' % player_url)
2966 if code:
2967 self._code_cache[player_id] = code
2968 return self._code_cache.get(player_id)
2969
2970 def _extract_signature_function(self, video_id, player_url, example_sig):
2971 player_id = self._extract_player_info(player_url)
2972
2973 # Read from filesystem cache
2974 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2975 assert os.path.basename(func_id) == func_id
2976
2977 self.write_debug(f'Extracting signature function {func_id}')
2978 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2979
2980 if not cache_spec:
2981 code = self._load_player(video_id, player_url)
2982 if code:
2983 res = self._parse_sig_js(code)
2984 test_string = ''.join(map(chr, range(len(example_sig))))
2985 cache_spec = [ord(c) for c in res(test_string)]
2986 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2987
2988 return lambda s: ''.join(s[i] for i in cache_spec)
2989
2990 def _print_sig_code(self, func, example_sig):
2991 if not self.get_param('youtube_print_sig_code'):
2992 return
2993
2994 def gen_sig_code(idxs):
2995 def _genslice(start, end, step):
2996 starts = '' if start == 0 else str(start)
2997 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2998 steps = '' if step == 1 else (':%d' % step)
2999 return f's[{starts}{ends}{steps}]'
3000
3001 step = None
3002 # Quelch pyflakes warnings - start will be set when step is set
3003 start = '(Never used)'
3004 for i, prev in zip(idxs[1:], idxs[:-1]):
3005 if step is not None:
3006 if i - prev == step:
3007 continue
3008 yield _genslice(start, prev, step)
3009 step = None
3010 continue
3011 if i - prev in [-1, 1]:
3012 step = i - prev
3013 start = prev
3014 continue
3015 else:
3016 yield 's[%d]' % prev
3017 if step is None:
3018 yield 's[%d]' % i
3019 else:
3020 yield _genslice(start, i, step)
3021
3022 test_string = ''.join(map(chr, range(len(example_sig))))
3023 cache_res = func(test_string)
3024 cache_spec = [ord(c) for c in cache_res]
3025 expr_code = ' + '.join(gen_sig_code(cache_spec))
3026 signature_id_tuple = '(%s)' % (
3027 ', '.join(str(len(p)) for p in example_sig.split('.')))
3028 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
3029 ' return %s\n') % (signature_id_tuple, expr_code)
3030 self.to_screen('Extracted signature function:\n' + code)
3031
3032 def _parse_sig_js(self, jscode):
3033 funcname = self._search_regex(
3034 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3035 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3036 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3037 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3038 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3039 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3040 # Obsolete patterns
3041 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3042 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3043 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3044 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3045 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3046 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3047 jscode, 'Initial JS player signature function name', group='sig')
3048
3049 jsi = JSInterpreter(jscode)
3050 initial_function = jsi.extract_function(funcname)
3051 return lambda s: initial_function([s])
3052
3053 def _cached(self, func, *cache_id):
3054 def inner(*args, **kwargs):
3055 if cache_id not in self._player_cache:
3056 try:
3057 self._player_cache[cache_id] = func(*args, **kwargs)
3058 except ExtractorError as e:
3059 self._player_cache[cache_id] = e
3060 except Exception as e:
3061 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3062
3063 ret = self._player_cache[cache_id]
3064 if isinstance(ret, Exception):
3065 raise ret
3066 return ret
3067 return inner
3068
3069 def _decrypt_signature(self, s, video_id, player_url):
3070 """Turn the encrypted s field into a working signature"""
3071 extract_sig = self._cached(
3072 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3073 func = extract_sig(video_id, player_url, s)
3074 self._print_sig_code(func, s)
3075 return func(s)
3076
3077 def _decrypt_nsig(self, s, video_id, player_url):
3078 """Turn the encrypted n field into a working signature"""
3079 if player_url is None:
3080 raise ExtractorError('Cannot decrypt nsig without player_url')
3081 player_url = urljoin('https://www.youtube.com', player_url)
3082
3083 try:
3084 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3085 except ExtractorError as e:
3086 raise ExtractorError('Unable to extract nsig function code', cause=e)
3087 if self.get_param('youtube_print_sig_code'):
3088 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3089
3090 try:
3091 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3092 ret = extract_nsig(jsi, func_code)(s)
3093 except JSInterpreter.Exception as e:
3094 try:
3095 jsi = PhantomJSwrapper(self, timeout=5000)
3096 except ExtractorError:
3097 raise e
3098 self.report_warning(
3099 f'Native nsig extraction failed: Trying with PhantomJS\n'
3100 f' n = {s} ; player = {player_url}', video_id)
3101 self.write_debug(e, only_once=True)
3102
3103 args, func_body = func_code
3104 ret = jsi.execute(
3105 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3106 video_id=video_id, note='Executing signature code').strip()
3107
3108 self.write_debug(f'Decrypted nsig {s} => {ret}')
3109 return ret
3110
3111 def _extract_n_function_name(self, jscode):
3112 funcname, idx = self._search_regex(
3113 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3114 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3115 if not idx:
3116 return funcname
3117
3118 return json.loads(js_to_json(self._search_regex(
3119 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3120 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3121
3122 def _extract_n_function_code(self, video_id, player_url):
3123 player_id = self._extract_player_info(player_url)
3124 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
3125 jscode = func_code or self._load_player(video_id, player_url)
3126 jsi = JSInterpreter(jscode)
3127
3128 if func_code:
3129 return jsi, player_id, func_code
3130
3131 func_name = self._extract_n_function_name(jscode)
3132
3133 # For redundancy
3134 func_code = self._search_regex(
3135 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3136 # NB: The end of the regex is intentionally kept strict
3137 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3138 jscode, 'nsig function', group=('var', 'code'), default=None)
3139 if func_code:
3140 func_code = ([func_code[0]], func_code[1])
3141 else:
3142 self.write_debug('Extracting nsig function with jsinterp')
3143 func_code = jsi.extract_function_code(func_name)
3144
3145 self.cache.store('youtube-nsig', player_id, func_code)
3146 return jsi, player_id, func_code
3147
3148 def _extract_n_function_from_code(self, jsi, func_code):
3149 func = jsi.extract_function_from_code(*func_code)
3150
3151 def extract_nsig(s):
3152 try:
3153 ret = func([s])
3154 except JSInterpreter.Exception:
3155 raise
3156 except Exception as e:
3157 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3158
3159 if ret.startswith('enhanced_except_'):
3160 raise JSInterpreter.Exception('Signature function returned an exception')
3161 return ret
3162
3163 return extract_nsig
3164
3165 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3166 """
3167 Extract signatureTimestamp (sts)
3168 Required to tell API what sig/player version is in use.
3169 """
3170 sts = None
3171 if isinstance(ytcfg, dict):
3172 sts = int_or_none(ytcfg.get('STS'))
3173
3174 if not sts:
3175 # Attempt to extract from player
3176 if player_url is None:
3177 error_msg = 'Cannot extract signature timestamp without player_url.'
3178 if fatal:
3179 raise ExtractorError(error_msg)
3180 self.report_warning(error_msg)
3181 return
3182 code = self._load_player(video_id, player_url, fatal=fatal)
3183 if code:
3184 sts = int_or_none(self._search_regex(
3185 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3186 'JS player signature timestamp', group='sts', fatal=fatal))
3187 return sts
3188
3189 def _mark_watched(self, video_id, player_responses):
3190 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3191 label = 'fully ' if is_full else ''
3192 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3193 expected_type=url_or_none)
3194 if not url:
3195 self.report_warning(f'Unable to mark {label}watched')
3196 return
3197 parsed_url = urllib.parse.urlparse(url)
3198 qs = urllib.parse.parse_qs(parsed_url.query)
3199
3200 # cpn generation algorithm is reverse engineered from base.js.
3201 # In fact it works even with dummy cpn.
3202 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3203 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3204
3205 # # more consistent results setting it to right before the end
3206 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3207
3208 qs.update({
3209 'ver': ['2'],
3210 'cpn': [cpn],
3211 'cmt': video_length,
3212 'el': 'detailpage', # otherwise defaults to "shorts"
3213 })
3214
3215 if is_full:
3216 # these seem to mark watchtime "history" in the real world
3217 # they're required, so send in a single value
3218 qs.update({
3219 'st': 0,
3220 'et': video_length,
3221 })
3222
3223 url = urllib.parse.urlunparse(
3224 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3225
3226 self._download_webpage(
3227 url, video_id, f'Marking {label}watched',
3228 'Unable to mark watched', fatal=False)
3229
3230 @classmethod
3231 def _extract_from_webpage(cls, url, webpage):
3232 # Invidious Instances
3233 # https://github.com/yt-dlp/yt-dlp/issues/195
3234 # https://github.com/iv-org/invidious/pull/1730
3235 mobj = re.search(
3236 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3237 webpage)
3238 if mobj:
3239 yield cls.url_result(mobj.group('url'), cls)
3240 raise cls.StopExtraction()
3241
3242 yield from super()._extract_from_webpage(url, webpage)
3243
3244 # lazyYT YouTube embed
3245 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3246 yield cls.url_result(unescapeHTML(id_), cls, id_)
3247
3248 # Wordpress "YouTube Video Importer" plugin
3249 for m in re.findall(r'''(?x)<div[^>]+
3250 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3251 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3252 yield cls.url_result(m[-1], cls, m[-1])
3253
3254 @classmethod
3255 def extract_id(cls, url):
3256 video_id = cls.get_temp_id(url)
3257 if not video_id:
3258 raise ExtractorError(f'Invalid URL: {url}')
3259 return video_id
3260
3261 def _extract_chapters_from_json(self, data, duration):
3262 chapter_list = traverse_obj(
3263 data, (
3264 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3265 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3266 ), expected_type=list)
3267
3268 return self._extract_chapters_helper(
3269 chapter_list,
3270 start_function=lambda chapter: float_or_none(
3271 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3272 title_function=lambda chapter: traverse_obj(
3273 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3274 duration=duration)
3275
3276 def _extract_chapters_from_engagement_panel(self, data, duration):
3277 content_list = traverse_obj(
3278 data,
3279 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3280 expected_type=list)
3281 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3282 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3283
3284 return next(filter(None, (
3285 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3286 chapter_time, chapter_title, duration)
3287 for contents in content_list)), [])
3288
3289 def _extract_heatmap_from_player_overlay(self, data):
3290 content_list = traverse_obj(data, (
3291 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
3292 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
3293 return next(filter(None, (
3294 traverse_obj(contents, (..., 'heatMarkerRenderer', {
3295 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
3296 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
3297 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
3298 })) for contents in content_list)), None)
3299
3300 def _extract_comment(self, comment_renderer, parent=None):
3301 comment_id = comment_renderer.get('commentId')
3302 if not comment_id:
3303 return
3304
3305 info = {
3306 'id': comment_id,
3307 'text': self._get_text(comment_renderer, 'contentText'),
3308 'like_count': self._get_count(comment_renderer, 'voteCount'),
3309 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3310 'author': self._get_text(comment_renderer, 'authorText'),
3311 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3312 'parent': parent or 'root',
3313 }
3314
3315 # Timestamp is an estimate calculated from the current time and time_text
3316 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3317 timestamp = self._parse_time_text(time_text)
3318
3319 info.update({
3320 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3321 '_time_text': time_text,
3322 'timestamp': timestamp,
3323 })
3324
3325 info['author_url'] = urljoin(
3326 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3327 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3328 expected_type=str, get_all=False))
3329
3330 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3331 if author_is_uploader is not None:
3332 info['author_is_uploader'] = author_is_uploader
3333
3334 comment_abr = traverse_obj(
3335 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3336 if comment_abr is not None:
3337 info['is_favorited'] = 'creatorHeart' in comment_abr
3338
3339 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3340 if self._has_badge(badges, BadgeType.VERIFIED):
3341 info['author_is_verified'] = True
3342
3343 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3344 if is_pinned:
3345 info['is_pinned'] = True
3346
3347 return info
3348
3349 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3350
3351 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3352
3353 def extract_header(contents):
3354 _continuation = None
3355 for content in contents:
3356 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3357 expected_comment_count = self._get_count(
3358 comments_header_renderer, 'countText', 'commentsCount')
3359
3360 if expected_comment_count is not None:
3361 tracker['est_total'] = expected_comment_count
3362 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3363 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3364
3365 sort_menu_item = try_get(
3366 comments_header_renderer,
3367 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3368 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3369
3370 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3371 if not _continuation:
3372 continue
3373
3374 sort_text = str_or_none(sort_menu_item.get('title'))
3375 if not sort_text:
3376 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3377 self.to_screen('Sorting comments by %s' % sort_text.lower())
3378 break
3379 return _continuation
3380
3381 def extract_thread(contents):
3382 if not parent:
3383 tracker['current_page_thread'] = 0
3384 for content in contents:
3385 if not parent and tracker['total_parent_comments'] >= max_parents:
3386 yield
3387 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3388 comment_renderer = get_first(
3389 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3390 expected_type=dict, default={})
3391
3392 comment = self._extract_comment(comment_renderer, parent)
3393 if not comment:
3394 continue
3395 comment_id = comment['id']
3396 if comment.get('is_pinned'):
3397 tracker['pinned_comment_ids'].add(comment_id)
3398 # Sometimes YouTube may break and give us infinite looping comments.
3399 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3400 if comment_id in tracker['seen_comment_ids']:
3401 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3402 # Pinned comments may appear a second time in newest first sort
3403 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3404 continue
3405 self.report_warning(
3406 'Detected YouTube comments looping. Stopping comment extraction '
3407 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3408 yield
3409 else:
3410 tracker['seen_comment_ids'].add(comment['id'])
3411
3412 tracker['running_total'] += 1
3413 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3414 yield comment
3415
3416 # Attempt to get the replies
3417 comment_replies_renderer = try_get(
3418 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3419
3420 if comment_replies_renderer:
3421 tracker['current_page_thread'] += 1
3422 comment_entries_iter = self._comment_entries(
3423 comment_replies_renderer, ytcfg, video_id,
3424 parent=comment.get('id'), tracker=tracker)
3425 yield from itertools.islice(comment_entries_iter, min(
3426 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3427
3428 # Keeps track of counts across recursive calls
3429 if not tracker:
3430 tracker = dict(
3431 running_total=0,
3432 est_total=None,
3433 current_page_thread=0,
3434 total_parent_comments=0,
3435 total_reply_comments=0,
3436 seen_comment_ids=set(),
3437 pinned_comment_ids=set()
3438 )
3439
3440 # TODO: Deprecated
3441 # YouTube comments have a max depth of 2
3442 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3443 if max_depth:
3444 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3445 'Set max replies in the max-comments extractor argument instead')
3446 if max_depth == 1 and parent:
3447 return
3448
3449 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3450 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3451
3452 continuation = self._extract_continuation(root_continuation_data)
3453
3454 response = None
3455 is_forced_continuation = False
3456 is_first_continuation = parent is None
3457 if is_first_continuation and not continuation:
3458 # Sometimes you can get comments by generating the continuation yourself,
3459 # even if YouTube initially reports them being disabled - e.g. stories comments.
3460 # Note: if the comment section is actually disabled, YouTube may return a response with
3461 # required check_get_keys missing. So we will disable that check initially in this case.
3462 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3463 is_forced_continuation = True
3464
3465 continuation_items_path = (
3466 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3467 for page_num in itertools.count(0):
3468 if not continuation:
3469 break
3470 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3471 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3472 if page_num == 0:
3473 if is_first_continuation:
3474 note_prefix = 'Downloading comment section API JSON'
3475 else:
3476 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3477 tracker['current_page_thread'], comment_prog_str)
3478 else:
3479 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3480 ' ' if parent else '', ' replies' if parent else '',
3481 page_num, comment_prog_str)
3482
3483 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3484 # Ignore check if YouTube says the comment count is 0.
3485 check_get_keys = None
3486 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3487 check_get_keys = [[*continuation_items_path, ..., (
3488 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
3489 try:
3490 response = self._extract_response(
3491 item_id=None, query=continuation,
3492 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3493 check_get_keys=check_get_keys)
3494 except ExtractorError as e:
3495 # Ignore incomplete data error for replies if retries didn't work.
3496 # This is to allow any other parent comments and comment threads to be downloaded.
3497 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3498 if 'incomplete data' in str(e).lower() and parent:
3499 if self.get_param('ignoreerrors') in (True, 'only_download'):
3500 self.report_warning(
3501 'Received incomplete data for a comment reply thread and retrying did not help. '
3502 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3503 return
3504 else:
3505 raise ExtractorError(
3506 'Incomplete data received for comment reply thread. '
3507 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3508 expected=True)
3509 raise
3510 is_forced_continuation = False
3511 continuation = None
3512 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3513 if is_first_continuation:
3514 continuation = extract_header(continuation_items)
3515 is_first_continuation = False
3516 if continuation:
3517 break
3518 continue
3519
3520 for entry in extract_thread(continuation_items):
3521 if not entry:
3522 return
3523 yield entry
3524 continuation = self._extract_continuation({'contents': continuation_items})
3525 if continuation:
3526 break
3527
3528 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3529 if message and not parent and tracker['running_total'] == 0:
3530 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3531 raise self.CommentsDisabled
3532
3533 @staticmethod
3534 def _generate_comment_continuation(video_id):
3535 """
3536 Generates initial comment section continuation token from given video id
3537 """
3538 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3539 return base64.b64encode(token.encode()).decode()
3540
3541 def _get_comments(self, ytcfg, video_id, contents, webpage):
3542 """Entry for comment extraction"""
3543 def _real_comment_extract(contents):
3544 renderer = next((
3545 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3546 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3547 yield from self._comment_entries(renderer, ytcfg, video_id)
3548
3549 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3550 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3551
3552 @staticmethod
3553 def _get_checkok_params():
3554 return {'contentCheckOk': True, 'racyCheckOk': True}
3555
3556 @classmethod
3557 def _generate_player_context(cls, sts=None):
3558 context = {
3559 'html5Preference': 'HTML5_PREF_WANTS',
3560 }
3561 if sts is not None:
3562 context['signatureTimestamp'] = sts
3563 return {
3564 'playbackContext': {
3565 'contentPlaybackContext': context
3566 },
3567 **cls._get_checkok_params()
3568 }
3569
3570 @staticmethod
3571 def _is_agegated(player_response):
3572 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3573 return True
3574
3575 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3576 AGE_GATE_REASONS = (
3577 'confirm your age', 'age-restricted', 'inappropriate', # reason
3578 'age_verification_required', 'age_check_required', # status
3579 )
3580 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3581
3582 @staticmethod
3583 def _is_unplayable(player_response):
3584 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3585
3586 _PLAYER_PARAMS = 'CgIQBg=='
3587
3588 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3589
3590 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3591 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3592 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3593 headers = self.generate_api_headers(
3594 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3595
3596 yt_query = {
3597 'videoId': video_id,
3598 }
3599 if _split_innertube_client(client)[0] == 'android':
3600 yt_query['params'] = self._PLAYER_PARAMS
3601
3602 yt_query.update(self._generate_player_context(sts))
3603 return self._extract_response(
3604 item_id=video_id, ep='player', query=yt_query,
3605 ytcfg=player_ytcfg, headers=headers, fatal=True,
3606 default_client=client,
3607 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3608 ) or None
3609
3610 def _get_requested_clients(self, url, smuggled_data):
3611 requested_clients = []
3612 default = ['ios', 'android', 'web']
3613 allowed_clients = sorted(
3614 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3615 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3616 for client in self._configuration_arg('player_client'):
3617 if client in allowed_clients:
3618 requested_clients.append(client)
3619 elif client == 'default':
3620 requested_clients.extend(default)
3621 elif client == 'all':
3622 requested_clients.extend(allowed_clients)
3623 else:
3624 self.report_warning(f'Skipping unsupported client {client}')
3625 if not requested_clients:
3626 requested_clients = default
3627
3628 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3629 requested_clients.extend(
3630 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3631
3632 return orderedSet(requested_clients)
3633
3634 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3635 initial_pr = None
3636 if webpage:
3637 initial_pr = self._search_json(
3638 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3639
3640 all_clients = set(clients)
3641 clients = clients[::-1]
3642 prs = []
3643
3644 def append_client(*client_names):
3645 """ Append the first client name that exists but not already used """
3646 for client_name in client_names:
3647 actual_client = _split_innertube_client(client_name)[0]
3648 if actual_client in INNERTUBE_CLIENTS:
3649 if actual_client not in all_clients:
3650 clients.append(client_name)
3651 all_clients.add(actual_client)
3652 return
3653
3654 # Android player_response does not have microFormats which are needed for
3655 # extraction of some data. So we return the initial_pr with formats
3656 # stripped out even if not requested by the user
3657 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3658 if initial_pr:
3659 pr = dict(initial_pr)
3660 pr['streamingData'] = None
3661 prs.append(pr)
3662
3663 last_error = None
3664 tried_iframe_fallback = False
3665 player_url = None
3666 while clients:
3667 client, base_client, variant = _split_innertube_client(clients.pop())
3668 player_ytcfg = master_ytcfg if client == 'web' else {}
3669 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3670 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3671
3672 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3673 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3674 if 'js' in self._configuration_arg('player_skip'):
3675 require_js_player = False
3676 player_url = None
3677
3678 if not player_url and not tried_iframe_fallback and require_js_player:
3679 player_url = self._download_player_url(video_id)
3680 tried_iframe_fallback = True
3681
3682 try:
3683 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3684 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3685 except ExtractorError as e:
3686 if last_error:
3687 self.report_warning(last_error)
3688 last_error = e
3689 continue
3690
3691 if pr:
3692 # YouTube may return a different video player response than expected.
3693 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3694 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3695 if pr_video_id and pr_video_id != video_id:
3696 self.report_warning(
3697 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3698 else:
3699 # Save client name for introspection later
3700 name = short_client_name(client)
3701 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3702 sd[STREAMING_DATA_CLIENT_NAME] = name
3703 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3704 f[STREAMING_DATA_CLIENT_NAME] = name
3705 prs.append(pr)
3706
3707 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3708 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3709 append_client(f'{base_client}_creator')
3710 elif self._is_agegated(pr):
3711 if variant == 'tv_embedded':
3712 append_client(f'{base_client}_embedded')
3713 elif not variant:
3714 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3715
3716 if last_error:
3717 if not len(prs):
3718 raise last_error
3719 self.report_warning(last_error)
3720 return prs, player_url
3721
3722 def _needs_live_processing(self, live_status, duration):
3723 if (live_status == 'is_live' and self.get_param('live_from_start')
3724 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3725 return live_status
3726
3727 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3728 CHUNK_SIZE = 10 << 20
3729 itags, stream_ids = collections.defaultdict(set), []
3730 itag_qualities, res_qualities = {}, {0: None}
3731 q = qualities([
3732 # Normally tiny is the smallest video-only formats. But
3733 # audio-only formats with unknown quality may get tagged as tiny
3734 'tiny',
3735 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3736 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3737 ])
3738 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3739 format_types = self._configuration_arg('formats')
3740 all_formats = 'duplicate' in format_types
3741 if self._configuration_arg('include_duplicate_formats'):
3742 all_formats = True
3743 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3744 'Use formats=duplicate extractor argument instead')
3745
3746 def build_fragments(f):
3747 return LazyList({
3748 'url': update_url_query(f['url'], {
3749 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
3750 })
3751 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3752
3753 for fmt in streaming_formats:
3754 if fmt.get('targetDurationSec'):
3755 continue
3756
3757 itag = str_or_none(fmt.get('itag'))
3758 audio_track = fmt.get('audioTrack') or {}
3759 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3760 if not all_formats:
3761 if stream_id in stream_ids:
3762 continue
3763
3764 quality = fmt.get('quality')
3765 height = int_or_none(fmt.get('height'))
3766 if quality == 'tiny' or not quality:
3767 quality = fmt.get('audioQuality', '').lower() or quality
3768 # The 3gp format (17) in android client has a quality of "small",
3769 # but is actually worse than other formats
3770 if itag == '17':
3771 quality = 'tiny'
3772 if quality:
3773 if itag:
3774 itag_qualities[itag] = quality
3775 if height:
3776 res_qualities[height] = quality
3777 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3778 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3779 # number of fragment that would subsequently requested with (`&sq=N`)
3780 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3781 continue
3782
3783 fmt_url = fmt.get('url')
3784 if not fmt_url:
3785 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3786 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3787 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3788 if not all((sc, fmt_url, player_url, encrypted_sig)):
3789 continue
3790 try:
3791 fmt_url += '&%s=%s' % (
3792 traverse_obj(sc, ('sp', -1)) or 'signature',
3793 self._decrypt_signature(encrypted_sig, video_id, player_url)
3794 )
3795 except ExtractorError as e:
3796 self.report_warning('Signature extraction failed: Some formats may be missing',
3797 video_id=video_id, only_once=True)
3798 self.write_debug(e, only_once=True)
3799 continue
3800
3801 query = parse_qs(fmt_url)
3802 throttled = False
3803 if query.get('n'):
3804 try:
3805 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3806 fmt_url = update_url_query(fmt_url, {
3807 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3808 })
3809 except ExtractorError as e:
3810 phantomjs_hint = ''
3811 if isinstance(e, JSInterpreter.Exception):
3812 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3813 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3814 if player_url:
3815 self.report_warning(
3816 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3817 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3818 self.write_debug(e, only_once=True)
3819 else:
3820 self.report_warning(
3821 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3822 video_id=video_id, only_once=True)
3823 throttled = True
3824
3825 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3826 language_preference = (
3827 10 if audio_track.get('audioIsDefault') and 10
3828 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3829 else -1)
3830 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3831 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3832 # Make sure to avoid false positives with small duration differences.
3833 # E.g. __2ABJjxzNo, ySuUZEjARPY
3834 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3835 if is_damaged:
3836 self.report_warning(
3837 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3838
3839 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
3840 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
3841 fps = int_or_none(fmt.get('fps')) or 0
3842 dct = {
3843 'asr': int_or_none(fmt.get('audioSampleRate')),
3844 'filesize': int_or_none(fmt.get('contentLength')),
3845 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3846 'format_note': join_nonempty(
3847 join_nonempty(audio_track.get('displayName'),
3848 language_preference > 0 and ' (default)', delim=''),
3849 name, fmt.get('isDrc') and 'DRC',
3850 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3851 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3852 throttled and 'THROTTLED', is_damaged and 'DAMAGED',
3853 (self.get_param('verbose') or all_formats) and client_name,
3854 delim=', '),
3855 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3856 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
3857 + (100 if 'Premium' in name else 0)),
3858 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
3859 'audio_channels': fmt.get('audioChannels'),
3860 'height': height,
3861 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3862 'has_drm': bool(fmt.get('drmFamilies')),
3863 'tbr': tbr,
3864 'url': fmt_url,
3865 'width': int_or_none(fmt.get('width')),
3866 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3867 'desc' if language_preference < -1 else '') or None,
3868 'language_preference': language_preference,
3869 # Strictly de-prioritize damaged and 3gp formats
3870 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3871 }
3872 mime_mobj = re.match(
3873 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3874 if mime_mobj:
3875 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3876 dct.update(parse_codecs(mime_mobj.group(2)))
3877 if itag:
3878 itags[itag].add(('https', dct.get('language')))
3879 stream_ids.append(stream_id)
3880 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3881 if single_stream and dct.get('ext'):
3882 dct['container'] = dct['ext'] + '_dash'
3883
3884 if (all_formats or 'dashy' in format_types) and dct['filesize']:
3885 yield {
3886 **dct,
3887 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
3888 'protocol': 'http_dash_segments',
3889 'fragments': build_fragments(dct),
3890 }
3891 if all_formats or 'dashy' not in format_types:
3892 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3893 yield dct
3894
3895 needs_live_processing = self._needs_live_processing(live_status, duration)
3896 skip_bad_formats = 'incomplete' not in format_types
3897 if self._configuration_arg('include_incomplete_formats'):
3898 skip_bad_formats = False
3899 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3900 'Use formats=incomplete extractor argument instead')
3901
3902 skip_manifests = set(self._configuration_arg('skip'))
3903 if (not self.get_param('youtube_include_hls_manifest', True)
3904 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3905 or needs_live_processing and skip_bad_formats):
3906 skip_manifests.add('hls')
3907
3908 if not self.get_param('youtube_include_dash_manifest', True):
3909 skip_manifests.add('dash')
3910 if self._configuration_arg('include_live_dash'):
3911 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3912 'Use formats=incomplete extractor argument instead')
3913 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3914 skip_manifests.add('dash')
3915
3916 def process_manifest_format(f, proto, client_name, itag):
3917 key = (proto, f.get('language'))
3918 if not all_formats and key in itags[itag]:
3919 return False
3920 itags[itag].add(key)
3921
3922 if itag and all_formats:
3923 f['format_id'] = f'{itag}-{proto}'
3924 elif any(p != proto for p, _ in itags[itag]):
3925 f['format_id'] = f'{itag}-{proto}'
3926 elif itag:
3927 f['format_id'] = itag
3928
3929 if f.get('source_preference') is None:
3930 f['source_preference'] = -1
3931
3932 if itag in ('616', '235'):
3933 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
3934 f['source_preference'] += 100
3935
3936 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3937 if f['quality'] == -1 and f.get('height'):
3938 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3939 if self.get_param('verbose') or all_formats:
3940 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
3941 if f.get('fps') and f['fps'] <= 1:
3942 del f['fps']
3943
3944 if proto == 'hls' and f.get('has_drm'):
3945 f['has_drm'] = 'maybe'
3946 f['source_preference'] -= 5
3947 return True
3948
3949 subtitles = {}
3950 for sd in streaming_data:
3951 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
3952
3953 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3954 if hls_manifest_url:
3955 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3956 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3957 subtitles = self._merge_subtitles(subs, subtitles)
3958 for f in fmts:
3959 if process_manifest_format(f, 'hls', client_name, self._search_regex(
3960 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3961 yield f
3962
3963 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3964 if dash_manifest_url:
3965 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3966 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3967 for f in formats:
3968 if process_manifest_format(f, 'dash', client_name, f['format_id']):
3969 f['filesize'] = int_or_none(self._search_regex(
3970 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3971 if needs_live_processing:
3972 f['is_from_start'] = True
3973
3974 yield f
3975 yield subtitles
3976
3977 def _extract_storyboard(self, player_responses, duration):
3978 spec = get_first(
3979 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3980 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3981 if not base_url:
3982 return
3983 L = len(spec) - 1
3984 for i, args in enumerate(spec):
3985 args = args.split('#')
3986 counts = list(map(int_or_none, args[:5]))
3987 if len(args) != 8 or not all(counts):
3988 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3989 continue
3990 width, height, frame_count, cols, rows = counts
3991 N, sigh = args[6:]
3992
3993 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3994 fragment_count = frame_count / (cols * rows)
3995 fragment_duration = duration / fragment_count
3996 yield {
3997 'format_id': f'sb{i}',
3998 'format_note': 'storyboard',
3999 'ext': 'mhtml',
4000 'protocol': 'mhtml',
4001 'acodec': 'none',
4002 'vcodec': 'none',
4003 'url': url,
4004 'width': width,
4005 'height': height,
4006 'fps': frame_count / duration,
4007 'rows': rows,
4008 'columns': cols,
4009 'fragments': [{
4010 'url': url.replace('$M', str(j)),
4011 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4012 } for j in range(math.ceil(fragment_count))],
4013 }
4014
4015 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4016 webpage = None
4017 if 'webpage' not in self._configuration_arg('player_skip'):
4018 query = {'bpctr': '9999999999', 'has_verified': '1'}
4019 webpage = self._download_webpage(
4020 webpage_url, video_id, fatal=False, query=query)
4021
4022 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4023
4024 player_responses, player_url = self._extract_player_responses(
4025 self._get_requested_clients(url, smuggled_data),
4026 video_id, webpage, master_ytcfg, smuggled_data)
4027
4028 return webpage, master_ytcfg, player_responses, player_url
4029
4030 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4031 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4032 is_live = get_first(video_details, 'isLive')
4033 if is_live is None:
4034 is_live = get_first(live_broadcast_details, 'isLiveNow')
4035 live_content = get_first(video_details, 'isLiveContent')
4036 is_upcoming = get_first(video_details, 'isUpcoming')
4037 post_live = get_first(video_details, 'isPostLiveDvr')
4038 live_status = ('post_live' if post_live
4039 else 'is_live' if is_live
4040 else 'is_upcoming' if is_upcoming
4041 else 'was_live' if live_content
4042 else 'not_live' if False in (is_live, live_content)
4043 else None)
4044 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4045 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4046 if all(f.get('has_drm') for f in formats):
4047 # If there are no formats that definitely don't have DRM, all have DRM
4048 for f in formats:
4049 f['has_drm'] = True
4050
4051 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4052
4053 def _real_extract(self, url):
4054 url, smuggled_data = unsmuggle_url(url, {})
4055 video_id = self._match_id(url)
4056
4057 base_url = self.http_scheme() + '//www.youtube.com/'
4058 webpage_url = base_url + 'watch?v=' + video_id
4059
4060 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4061
4062 playability_statuses = traverse_obj(
4063 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4064
4065 trailer_video_id = get_first(
4066 playability_statuses,
4067 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4068 expected_type=str)
4069 if trailer_video_id:
4070 return self.url_result(
4071 trailer_video_id, self.ie_key(), trailer_video_id)
4072
4073 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4074 if webpage else (lambda x: None))
4075
4076 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4077 microformats = traverse_obj(
4078 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4079 expected_type=dict)
4080
4081 translated_title = self._get_text(microformats, (..., 'title'))
4082 video_title = (self._preferred_lang and translated_title
4083 or get_first(video_details, 'title') # primary
4084 or translated_title
4085 or search_meta(['og:title', 'twitter:title', 'title']))
4086 translated_description = self._get_text(microformats, (..., 'description'))
4087 original_description = get_first(video_details, 'shortDescription')
4088 video_description = (
4089 self._preferred_lang and translated_description
4090 # If original description is blank, it will be an empty string.
4091 # Do not prefer translated description in this case.
4092 or original_description if original_description is not None else translated_description)
4093
4094 multifeed_metadata_list = get_first(
4095 player_responses,
4096 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4097 expected_type=str)
4098 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4099 if self.get_param('noplaylist'):
4100 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4101 else:
4102 entries = []
4103 feed_ids = []
4104 for feed in multifeed_metadata_list.split(','):
4105 # Unquote should take place before split on comma (,) since textual
4106 # fields may contain comma as well (see
4107 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4108 feed_data = urllib.parse.parse_qs(
4109 urllib.parse.unquote_plus(feed))
4110
4111 def feed_entry(name):
4112 return try_get(
4113 feed_data, lambda x: x[name][0], str)
4114
4115 feed_id = feed_entry('id')
4116 if not feed_id:
4117 continue
4118 feed_title = feed_entry('title')
4119 title = video_title
4120 if feed_title:
4121 title += ' (%s)' % feed_title
4122 entries.append({
4123 '_type': 'url_transparent',
4124 'ie_key': 'Youtube',
4125 'url': smuggle_url(
4126 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
4127 {'force_singlefeed': True}),
4128 'title': title,
4129 })
4130 feed_ids.append(feed_id)
4131 self.to_screen(
4132 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
4133 % (', '.join(feed_ids), video_id))
4134 return self.playlist_result(
4135 entries, video_id, video_title, video_description)
4136
4137 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4138 or int_or_none(get_first(microformats, 'lengthSeconds'))
4139 or parse_duration(search_meta('duration')) or None)
4140
4141 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4142 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4143 if live_status == 'post_live':
4144 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4145
4146 if not formats:
4147 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4148 self.report_drm(video_id)
4149 pemr = get_first(
4150 playability_statuses,
4151 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4152 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4153 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4154 if subreason:
4155 if subreason == 'The uploader has not made this video available in your country.':
4156 countries = get_first(microformats, 'availableCountries')
4157 if not countries:
4158 regions_allowed = search_meta('regionsAllowed')
4159 countries = regions_allowed.split(',') if regions_allowed else None
4160 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4161 reason += f'. {subreason}'
4162 if reason:
4163 self.raise_no_formats(reason, expected=True)
4164
4165 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4166 if not keywords and webpage:
4167 keywords = [
4168 unescapeHTML(m.group('content'))
4169 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4170 for keyword in keywords:
4171 if keyword.startswith('yt:stretch='):
4172 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4173 if mobj:
4174 # NB: float is intentional for forcing float division
4175 w, h = (float(v) for v in mobj.groups())
4176 if w > 0 and h > 0:
4177 ratio = w / h
4178 for f in formats:
4179 if f.get('vcodec') != 'none':
4180 f['stretched_ratio'] = ratio
4181 break
4182 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4183 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4184 if thumbnail_url:
4185 thumbnails.append({
4186 'url': thumbnail_url,
4187 })
4188 original_thumbnails = thumbnails.copy()
4189
4190 # The best resolution thumbnails sometimes does not appear in the webpage
4191 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4192 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4193 thumbnail_names = [
4194 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4195 # in resolution, these are not the custom thumbnail. So de-prioritize them
4196 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4197 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
4198 ]
4199 n_thumbnail_names = len(thumbnail_names)
4200 thumbnails.extend({
4201 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4202 video_id=video_id, name=name, ext=ext,
4203 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4204 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4205 for thumb in thumbnails:
4206 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4207 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4208 self._remove_duplicate_formats(thumbnails)
4209 self._downloader._sort_thumbnails(original_thumbnails)
4210
4211 category = get_first(microformats, 'category') or search_meta('genre')
4212 channel_id = self.ucid_or_none(str_or_none(
4213 get_first(video_details, 'channelId')
4214 or get_first(microformats, 'externalChannelId')
4215 or search_meta('channelId')))
4216 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4217
4218 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4219 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4220 if not duration and live_end_time and live_start_time:
4221 duration = live_end_time - live_start_time
4222
4223 needs_live_processing = self._needs_live_processing(live_status, duration)
4224
4225 def is_bad_format(fmt):
4226 if needs_live_processing and not fmt.get('is_from_start'):
4227 return True
4228 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4229 and fmt.get('protocol') == 'http_dash_segments'):
4230 return True
4231
4232 for fmt in filter(is_bad_format, formats):
4233 fmt['preference'] = (fmt.get('preference') or -1) - 10
4234 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4235
4236 if needs_live_processing:
4237 self._prepare_live_from_start_formats(
4238 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4239
4240 formats.extend(self._extract_storyboard(player_responses, duration))
4241
4242 channel_handle = self.handle_from_url(owner_profile_url)
4243
4244 info = {
4245 'id': video_id,
4246 'title': video_title,
4247 'formats': formats,
4248 'thumbnails': thumbnails,
4249 # The best thumbnail that we are sure exists. Prevents unnecessary
4250 # URL checking if user don't care about getting the best possible thumbnail
4251 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4252 'description': video_description,
4253 'channel_id': channel_id,
4254 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4255 'duration': duration,
4256 'view_count': int_or_none(
4257 get_first((video_details, microformats), (..., 'viewCount'))
4258 or search_meta('interactionCount')),
4259 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4260 'age_limit': 18 if (
4261 get_first(microformats, 'isFamilySafe') is False
4262 or search_meta('isFamilyFriendly') == 'false'
4263 or search_meta('og:restrictions:age') == '18+') else 0,
4264 'webpage_url': webpage_url,
4265 'categories': [category] if category else None,
4266 'tags': keywords,
4267 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4268 'live_status': live_status,
4269 'release_timestamp': live_start_time,
4270 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4271 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
4272 }
4273
4274 subtitles = {}
4275 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4276 if pctr:
4277 def get_lang_code(track):
4278 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4279 or track.get('languageCode'))
4280
4281 # Converted into dicts to remove duplicates
4282 captions = {
4283 get_lang_code(sub): sub
4284 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4285 translation_languages = {
4286 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4287 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4288
4289 def process_language(container, base_url, lang_code, sub_name, query):
4290 lang_subs = container.setdefault(lang_code, [])
4291 for fmt in self._SUBTITLE_FORMATS:
4292 query.update({
4293 'fmt': fmt,
4294 })
4295 lang_subs.append({
4296 'ext': fmt,
4297 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4298 'name': sub_name,
4299 })
4300
4301 # NB: Constructing the full subtitle dictionary is slow
4302 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4303 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4304 for lang_code, caption_track in captions.items():
4305 base_url = caption_track.get('baseUrl')
4306 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4307 if not base_url:
4308 continue
4309 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4310 if caption_track.get('kind') != 'asr':
4311 if not lang_code:
4312 continue
4313 process_language(
4314 subtitles, base_url, lang_code, lang_name, {})
4315 if not caption_track.get('isTranslatable'):
4316 continue
4317 for trans_code, trans_name in translation_languages.items():
4318 if not trans_code:
4319 continue
4320 orig_trans_code = trans_code
4321 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4322 if not get_translated_subs:
4323 continue
4324 trans_code += f'-{lang_code}'
4325 trans_name += format_field(lang_name, None, ' from %s')
4326 if lang_code == f'a-{orig_trans_code}':
4327 # Set audio language based on original subtitles
4328 for f in formats:
4329 if f.get('acodec') != 'none' and not f.get('language'):
4330 f['language'] = orig_trans_code
4331 # Add an "-orig" label to the original language so that it can be distinguished.
4332 # The subs are returned without "-orig" as well for compatibility
4333 process_language(
4334 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4335 # Setting tlang=lang returns damaged subtitles.
4336 process_language(automatic_captions, base_url, trans_code, trans_name,
4337 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4338
4339 info['automatic_captions'] = automatic_captions
4340 info['subtitles'] = subtitles
4341
4342 parsed_url = urllib.parse.urlparse(url)
4343 for component in [parsed_url.fragment, parsed_url.query]:
4344 query = urllib.parse.parse_qs(component)
4345 for k, v in query.items():
4346 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4347 d_k += '_time'
4348 if d_k not in info and k in s_ks:
4349 info[d_k] = parse_duration(query[k][0])
4350
4351 # Youtube Music Auto-generated description
4352 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4353 # XXX: Causes catastrophic backtracking if description has "·"
4354 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4355 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4356 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4357 mobj = re.search(
4358 r'''(?xs)
4359 (?=(?P<track>[^\n·]+))(?P=track)·
4360 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4361 (?=(?P<album>[^\n]+))(?P=album)\n
4362 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4363 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4364 (.+?\nArtist\s*:\s*
4365 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4366 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4367 ''', video_description)
4368 if mobj:
4369 release_year = mobj.group('release_year')
4370 release_date = mobj.group('release_date')
4371 if release_date:
4372 release_date = release_date.replace('-', '')
4373 if not release_year:
4374 release_year = release_date[:4]
4375 info.update({
4376 'album': mobj.group('album'.strip()),
4377 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4378 'track': mobj.group('track').strip(),
4379 'release_date': release_date,
4380 'release_year': int_or_none(release_year),
4381 })
4382
4383 initial_data = None
4384 if webpage:
4385 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4386 if not traverse_obj(initial_data, 'contents'):
4387 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4388 initial_data = None
4389 if not initial_data:
4390 query = {'videoId': video_id}
4391 query.update(self._get_checkok_params())
4392 initial_data = self._extract_response(
4393 item_id=video_id, ep='next', fatal=False,
4394 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4395 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4396 note='Downloading initial data API JSON')
4397
4398 info['comment_count'] = traverse_obj(initial_data, (
4399 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4400 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
4401 ), (
4402 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4403 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
4404 ), expected_type=self._get_count, get_all=False)
4405
4406 try: # This will error if there is no livechat
4407 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4408 except (KeyError, IndexError, TypeError):
4409 pass
4410 else:
4411 info.setdefault('subtitles', {})['live_chat'] = [{
4412 # url is needed to set cookies
4413 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4414 'video_id': video_id,
4415 'ext': 'json',
4416 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4417 else 'youtube_live_chat_replay'),
4418 }]
4419
4420 if initial_data:
4421 info['chapters'] = (
4422 self._extract_chapters_from_json(initial_data, duration)
4423 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4424 or self._extract_chapters_from_description(video_description, duration)
4425 or None)
4426
4427 info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
4428
4429 contents = traverse_obj(
4430 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4431 expected_type=list, default=[])
4432
4433 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4434 if vpir:
4435 stl = vpir.get('superTitleLink')
4436 if stl:
4437 stl = self._get_text(stl)
4438 if try_get(
4439 vpir,
4440 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4441 info['location'] = stl
4442 else:
4443 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4444 if mobj:
4445 info.update({
4446 'series': mobj.group(1),
4447 'season_number': int(mobj.group(2)),
4448 'episode_number': int(mobj.group(3)),
4449 })
4450 for tlb in (try_get(
4451 vpir,
4452 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4453 list) or []):
4454 tbrs = variadic(
4455 traverse_obj(
4456 tlb, ('toggleButtonRenderer', ...),
4457 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4458 for tbr in tbrs:
4459 for getter, regex in [(
4460 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4461 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4462 lambda x: x['accessibility'],
4463 lambda x: x['accessibilityData']['accessibilityData'],
4464 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4465 label = (try_get(tbr, getter, dict) or {}).get('label')
4466 if label:
4467 mobj = re.match(regex, label)
4468 if mobj:
4469 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4470 break
4471 sbr_tooltip = try_get(
4472 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4473 if sbr_tooltip:
4474 like_count, dislike_count = sbr_tooltip.split(' / ')
4475 info.update({
4476 'like_count': str_to_int(like_count),
4477 'dislike_count': str_to_int(dislike_count),
4478 })
4479 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4480 if vcr:
4481 vc = self._get_count(vcr, 'viewCount')
4482 # Upcoming premieres with waiting count are treated as live here
4483 if vcr.get('isLive'):
4484 info['concurrent_view_count'] = vc
4485 elif info.get('view_count') is None:
4486 info['view_count'] = vc
4487
4488 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4489 if vsir:
4490 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4491 info.update({
4492 'channel': self._get_text(vor, 'title'),
4493 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4494
4495 if not channel_handle:
4496 channel_handle = self.handle_from_url(
4497 traverse_obj(vor, (
4498 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4499 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4500 {str}), get_all=False))
4501
4502 rows = try_get(
4503 vsir,
4504 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4505 list) or []
4506 multiple_songs = False
4507 for row in rows:
4508 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4509 multiple_songs = True
4510 break
4511 for row in rows:
4512 mrr = row.get('metadataRowRenderer') or {}
4513 mrr_title = mrr.get('title')
4514 if not mrr_title:
4515 continue
4516 mrr_title = self._get_text(mrr, 'title')
4517 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4518 if mrr_title == 'License':
4519 info['license'] = mrr_contents_text
4520 elif not multiple_songs:
4521 if mrr_title == 'Album':
4522 info['album'] = mrr_contents_text
4523 elif mrr_title == 'Artist':
4524 info['artist'] = mrr_contents_text
4525 elif mrr_title == 'Song':
4526 info['track'] = mrr_contents_text
4527 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4528 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4529 info['channel_is_verified'] = True
4530
4531 info.update({
4532 'uploader': info.get('channel'),
4533 'uploader_id': channel_handle,
4534 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4535 })
4536 # The upload date for scheduled, live and past live streams / premieres in microformats
4537 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4538 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4539 upload_date = (
4540 unified_strdate(get_first(microformats, 'uploadDate'))
4541 or unified_strdate(search_meta('uploadDate')))
4542 if not upload_date or (
4543 live_status in ('not_live', None)
4544 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4545 ):
4546 upload_date = strftime_or_none(
4547 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4548 info['upload_date'] = upload_date
4549
4550 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4551 v = info.get(s_k)
4552 if v:
4553 info[d_k] = v
4554
4555 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4556
4557 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4558 or get_first(video_details, 'isPrivate', expected_type=bool))
4559
4560 info['availability'] = (
4561 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4562 else self._availability(
4563 is_private=is_private,
4564 needs_premium=(
4565 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4566 or False if initial_data and is_private is not None else None),
4567 needs_subscription=(
4568 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4569 or False if initial_data and is_private is not None else None),
4570 needs_auth=info['age_limit'] >= 18,
4571 is_unlisted=None if is_private is None else (
4572 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4573 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4574
4575 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4576
4577 self.mark_watched(video_id, player_responses)
4578
4579 return info
4580
4581
4582 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4583 @staticmethod
4584 def passthrough_smuggled_data(func):
4585 def _smuggle(info, smuggled_data):
4586 if info.get('_type') not in ('url', 'url_transparent'):
4587 return info
4588 if smuggled_data.get('is_music_url'):
4589 parsed_url = urllib.parse.urlparse(info['url'])
4590 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4591 smuggled_data.pop('is_music_url')
4592 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4593 if smuggled_data:
4594 info['url'] = smuggle_url(info['url'], smuggled_data)
4595 return info
4596
4597 @functools.wraps(func)
4598 def wrapper(self, url):
4599 url, smuggled_data = unsmuggle_url(url, {})
4600 if self.is_music_url(url):
4601 smuggled_data['is_music_url'] = True
4602 info_dict = func(self, url, smuggled_data)
4603 if smuggled_data:
4604 _smuggle(info_dict, smuggled_data)
4605 if info_dict.get('entries'):
4606 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4607 return info_dict
4608 return wrapper
4609
4610 @staticmethod
4611 def _extract_basic_item_renderer(item):
4612 # Modified from _extract_grid_item_renderer
4613 known_basic_renderers = (
4614 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4615 )
4616 for key, renderer in item.items():
4617 if not isinstance(renderer, dict):
4618 continue
4619 elif key in known_basic_renderers:
4620 return renderer
4621 elif key.startswith('grid') and key.endswith('Renderer'):
4622 return renderer
4623
4624 def _extract_channel_renderer(self, renderer):
4625 channel_id = self.ucid_or_none(renderer['channelId'])
4626 title = self._get_text(renderer, 'title')
4627 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4628 channel_handle = self.handle_from_url(
4629 traverse_obj(renderer, (
4630 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4631 ('browseEndpoint', 'canonicalBaseUrl')),
4632 {str}), get_all=False))
4633 if not channel_handle:
4634 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4635 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4636 return {
4637 '_type': 'url',
4638 'url': channel_url,
4639 'id': channel_id,
4640 'ie_key': YoutubeTabIE.ie_key(),
4641 'channel': title,
4642 'uploader': title,
4643 'channel_id': channel_id,
4644 'channel_url': channel_url,
4645 'title': title,
4646 'uploader_id': channel_handle,
4647 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4648 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4649 # However, in feed/channels this is set correctly to the subscriber count
4650 'channel_follower_count': traverse_obj(
4651 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4652 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4653 'playlist_count': (
4654 # videoCountText may be the subscriber count
4655 self._get_count(renderer, 'videoCountText')
4656 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4657 'description': self._get_text(renderer, 'descriptionSnippet'),
4658 'channel_is_verified': True if self._has_badge(
4659 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4660 }
4661
4662 def _grid_entries(self, grid_renderer):
4663 for item in grid_renderer['items']:
4664 if not isinstance(item, dict):
4665 continue
4666 renderer = self._extract_basic_item_renderer(item)
4667 if not isinstance(renderer, dict):
4668 continue
4669 title = self._get_text(renderer, 'title')
4670
4671 # playlist
4672 playlist_id = renderer.get('playlistId')
4673 if playlist_id:
4674 yield self.url_result(
4675 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4676 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4677 video_title=title)
4678 continue
4679 # video
4680 video_id = renderer.get('videoId')
4681 if video_id:
4682 yield self._extract_video(renderer)
4683 continue
4684 # channel
4685 channel_id = renderer.get('channelId')
4686 if channel_id:
4687 yield self._extract_channel_renderer(renderer)
4688 continue
4689 # generic endpoint URL support
4690 ep_url = urljoin('https://www.youtube.com/', try_get(
4691 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4692 str))
4693 if ep_url:
4694 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4695 if ie.suitable(ep_url):
4696 yield self.url_result(
4697 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4698 break
4699
4700 def _music_reponsive_list_entry(self, renderer):
4701 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4702 if video_id:
4703 title = traverse_obj(renderer, (
4704 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4705 'text', 'runs', 0, 'text'))
4706 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4707 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4708 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4709 if playlist_id:
4710 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4711 if video_id:
4712 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4713 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4714 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4715 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4716 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4717 if browse_id:
4718 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4719 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4720
4721 def _shelf_entries_from_content(self, shelf_renderer):
4722 content = shelf_renderer.get('content')
4723 if not isinstance(content, dict):
4724 return
4725 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4726 if renderer:
4727 # TODO: add support for nested playlists so each shelf is processed
4728 # as separate playlist
4729 # TODO: this includes only first N items
4730 yield from self._grid_entries(renderer)
4731 renderer = content.get('horizontalListRenderer')
4732 if renderer:
4733 # TODO
4734 pass
4735
4736 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4737 ep = try_get(
4738 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4739 str)
4740 shelf_url = urljoin('https://www.youtube.com', ep)
4741 if shelf_url:
4742 # Skipping links to another channels, note that checking for
4743 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4744 # will not work
4745 if skip_channels and '/channels?' in shelf_url:
4746 return
4747 title = self._get_text(shelf_renderer, 'title')
4748 yield self.url_result(shelf_url, video_title=title)
4749 # Shelf may not contain shelf URL, fallback to extraction from content
4750 yield from self._shelf_entries_from_content(shelf_renderer)
4751
4752 def _playlist_entries(self, video_list_renderer):
4753 for content in video_list_renderer['contents']:
4754 if not isinstance(content, dict):
4755 continue
4756 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4757 if not isinstance(renderer, dict):
4758 continue
4759 video_id = renderer.get('videoId')
4760 if not video_id:
4761 continue
4762 yield self._extract_video(renderer)
4763
4764 def _rich_entries(self, rich_grid_renderer):
4765 renderer = traverse_obj(
4766 rich_grid_renderer,
4767 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4768 video_id = renderer.get('videoId')
4769 if video_id:
4770 yield self._extract_video(renderer)
4771 return
4772 playlist_id = renderer.get('playlistId')
4773 if playlist_id:
4774 yield self.url_result(
4775 f'https://www.youtube.com/playlist?list={playlist_id}',
4776 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4777 video_title=self._get_text(renderer, 'title'))
4778 return
4779
4780 def _video_entry(self, video_renderer):
4781 video_id = video_renderer.get('videoId')
4782 if video_id:
4783 return self._extract_video(video_renderer)
4784
4785 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4786 url = urljoin('https://youtube.com', traverse_obj(
4787 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4788 if url:
4789 return self.url_result(
4790 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4791
4792 def _post_thread_entries(self, post_thread_renderer):
4793 post_renderer = try_get(
4794 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4795 if not post_renderer:
4796 return
4797 # video attachment
4798 video_renderer = try_get(
4799 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4800 video_id = video_renderer.get('videoId')
4801 if video_id:
4802 entry = self._extract_video(video_renderer)
4803 if entry:
4804 yield entry
4805 # playlist attachment
4806 playlist_id = try_get(
4807 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4808 if playlist_id:
4809 yield self.url_result(
4810 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4811 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4812 # inline video links
4813 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4814 for run in runs:
4815 if not isinstance(run, dict):
4816 continue
4817 ep_url = try_get(
4818 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4819 if not ep_url:
4820 continue
4821 if not YoutubeIE.suitable(ep_url):
4822 continue
4823 ep_video_id = YoutubeIE._match_id(ep_url)
4824 if video_id == ep_video_id:
4825 continue
4826 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4827
4828 def _post_thread_continuation_entries(self, post_thread_continuation):
4829 contents = post_thread_continuation.get('contents')
4830 if not isinstance(contents, list):
4831 return
4832 for content in contents:
4833 renderer = content.get('backstagePostThreadRenderer')
4834 if isinstance(renderer, dict):
4835 yield from self._post_thread_entries(renderer)
4836 continue
4837 renderer = content.get('videoRenderer')
4838 if isinstance(renderer, dict):
4839 yield self._video_entry(renderer)
4840
4841 r''' # unused
4842 def _rich_grid_entries(self, contents):
4843 for content in contents:
4844 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4845 if video_renderer:
4846 entry = self._video_entry(video_renderer)
4847 if entry:
4848 yield entry
4849 '''
4850
4851 def _report_history_entries(self, renderer):
4852 for url in traverse_obj(renderer, (
4853 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4854 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4855 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4856 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4857
4858 def _extract_entries(self, parent_renderer, continuation_list):
4859 # continuation_list is modified in-place with continuation_list = [continuation_token]
4860 continuation_list[:] = [None]
4861 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4862 for content in contents:
4863 if not isinstance(content, dict):
4864 continue
4865 is_renderer = traverse_obj(
4866 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4867 expected_type=dict)
4868 if not is_renderer:
4869 if content.get('richItemRenderer'):
4870 for entry in self._rich_entries(content['richItemRenderer']):
4871 yield entry
4872 continuation_list[0] = self._extract_continuation(parent_renderer)
4873 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4874 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4875 yield from self._report_history_entries(table)
4876 continuation_list[0] = self._extract_continuation(table)
4877 continue
4878
4879 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4880 for isr_content in isr_contents:
4881 if not isinstance(isr_content, dict):
4882 continue
4883
4884 known_renderers = {
4885 'playlistVideoListRenderer': self._playlist_entries,
4886 'gridRenderer': self._grid_entries,
4887 'reelShelfRenderer': self._grid_entries,
4888 'shelfRenderer': self._shelf_entries,
4889 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4890 'backstagePostThreadRenderer': self._post_thread_entries,
4891 'videoRenderer': lambda x: [self._video_entry(x)],
4892 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4893 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4894 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
4895 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
4896 }
4897 for key, renderer in isr_content.items():
4898 if key not in known_renderers:
4899 continue
4900 for entry in known_renderers[key](renderer):
4901 if entry:
4902 yield entry
4903 continuation_list[0] = self._extract_continuation(renderer)
4904 break
4905
4906 if not continuation_list[0]:
4907 continuation_list[0] = self._extract_continuation(is_renderer)
4908
4909 if not continuation_list[0]:
4910 continuation_list[0] = self._extract_continuation(parent_renderer)
4911
4912 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4913 continuation_list = [None]
4914 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4915 tab_content = try_get(tab, lambda x: x['content'], dict)
4916 if not tab_content:
4917 return
4918 parent_renderer = (
4919 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4920 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4921 yield from extract_entries(parent_renderer)
4922 continuation = continuation_list[0]
4923 seen_continuations = set()
4924 for page_num in itertools.count(1):
4925 if not continuation:
4926 break
4927 continuation_token = continuation.get('continuation')
4928 if continuation_token is not None and continuation_token in seen_continuations:
4929 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
4930 break
4931 seen_continuations.add(continuation_token)
4932 headers = self.generate_api_headers(
4933 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4934 response = self._extract_response(
4935 item_id=f'{item_id} page {page_num}',
4936 query=continuation, headers=headers, ytcfg=ytcfg,
4937 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4938
4939 if not response:
4940 break
4941 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4942 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4943 visitor_data = self._extract_visitor_data(response) or visitor_data
4944
4945 known_renderers = {
4946 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4947 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4948 'gridVideoRenderer': (self._grid_entries, 'items'),
4949 'gridChannelRenderer': (self._grid_entries, 'items'),
4950 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4951 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4952 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4953 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4954 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4955 'playlistVideoListContinuation': (self._playlist_entries, None),
4956 'gridContinuation': (self._grid_entries, None),
4957 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4958 'sectionListContinuation': (extract_entries, None), # for feeds
4959 }
4960
4961 continuation_items = traverse_obj(response, (
4962 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4963 'appendContinuationItemsAction', 'continuationItems'
4964 ), 'continuationContents', get_all=False)
4965 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4966
4967 video_items_renderer = None
4968 for key in continuation_item.keys():
4969 if key not in known_renderers:
4970 continue
4971 func, parent_key = known_renderers[key]
4972 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4973 continuation_list = [None]
4974 yield from func(video_items_renderer)
4975 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4976
4977 if not video_items_renderer:
4978 break
4979
4980 @staticmethod
4981 def _extract_selected_tab(tabs, fatal=True):
4982 for tab_renderer in tabs:
4983 if tab_renderer.get('selected'):
4984 return tab_renderer
4985 if fatal:
4986 raise ExtractorError('Unable to find selected tab')
4987
4988 @staticmethod
4989 def _extract_tab_renderers(response):
4990 return traverse_obj(
4991 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
4992
4993 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4994 metadata = self._extract_metadata_from_tabs(item_id, data)
4995
4996 selected_tab = self._extract_selected_tab(tabs)
4997 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4998 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4999
5000 return self.playlist_result(
5001 self._entries(
5002 selected_tab, metadata['id'], ytcfg,
5003 self._extract_account_syncid(ytcfg, data),
5004 self._extract_visitor_data(data, ytcfg)),
5005 **metadata)
5006
5007 def _extract_metadata_from_tabs(self, item_id, data):
5008 info = {'id': item_id}
5009
5010 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5011 if metadata_renderer:
5012 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5013 ('channelUrl', {self.ucid_from_url}))
5014 info.update({
5015 'channel': metadata_renderer.get('title'),
5016 'channel_id': channel_id,
5017 })
5018 if info['channel_id']:
5019 info['id'] = info['channel_id']
5020 else:
5021 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5022
5023 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5024 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5025 def _get_uncropped(url):
5026 return url_or_none((url or '').split('=')[0] + '=s0')
5027
5028 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5029 if avatar_thumbnails:
5030 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5031 if uncropped_avatar:
5032 avatar_thumbnails.append({
5033 'url': uncropped_avatar,
5034 'id': 'avatar_uncropped',
5035 'preference': 1
5036 })
5037
5038 channel_banners = self._extract_thumbnails(
5039 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5040 for banner in channel_banners:
5041 banner['preference'] = -10
5042
5043 if channel_banners:
5044 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5045 if uncropped_banner:
5046 channel_banners.append({
5047 'url': uncropped_banner,
5048 'id': 'banner_uncropped',
5049 'preference': -5
5050 })
5051
5052 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5053 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5054 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5055
5056 primary_thumbnails = self._extract_thumbnails(
5057 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5058 playlist_thumbnails = self._extract_thumbnails(
5059 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5060
5061 info.update({
5062 'title': (traverse_obj(metadata_renderer, 'title')
5063 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5064 or info['id']),
5065 'availability': self._extract_availability(data),
5066 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
5067 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5068 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
5069 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5070 })
5071
5072 channel_handle = (
5073 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5074 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5075
5076 if channel_handle:
5077 info.update({
5078 'uploader_id': channel_handle,
5079 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5080 })
5081
5082 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5083 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5084 info['channel_is_verified'] = True
5085 # Playlist stats is a text runs array containing [video count, view count, last updated].
5086 # last updated or (view count and last updated) may be missing.
5087 playlist_stats = get_first(
5088 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5089
5090 last_updated_unix = self._parse_time_text(
5091 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5092 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5093 info['modified_date'] = strftime_or_none(last_updated_unix)
5094
5095 info['view_count'] = self._get_count(playlist_stats, 1)
5096 if info['view_count'] is None: # 0 is allowed
5097 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5098 if info['view_count'] is None:
5099 info['view_count'] = self._get_count(data, (
5100 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5101 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5102
5103 info['playlist_count'] = self._get_count(playlist_stats, 0)
5104 if info['playlist_count'] is None: # 0 is allowed
5105 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5106
5107 if not info.get('channel_id'):
5108 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5109 if not owner: # Deprecated
5110 owner = traverse_obj(
5111 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5112 ('videoOwner', 'videoOwnerRenderer', 'title'))
5113 owner_text = self._get_text(owner)
5114 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5115 info.update({
5116 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5117 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5118 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
5119 })
5120
5121 info.update({
5122 'uploader': info['channel'],
5123 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5124 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5125 })
5126
5127 return info
5128
5129 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5130 first_id = last_id = response = None
5131 for page_num in itertools.count(1):
5132 videos = list(self._playlist_entries(playlist))
5133 if not videos:
5134 return
5135 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5136 if start >= len(videos):
5137 return
5138 yield from videos[start:]
5139 first_id = first_id or videos[0]['id']
5140 last_id = videos[-1]['id']
5141 watch_endpoint = try_get(
5142 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5143 headers = self.generate_api_headers(
5144 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5145 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5146 query = {
5147 'playlistId': playlist_id,
5148 'videoId': watch_endpoint.get('videoId') or last_id,
5149 'index': watch_endpoint.get('index') or len(videos),
5150 'params': watch_endpoint.get('params') or 'OAE%3D'
5151 }
5152 response = self._extract_response(
5153 item_id='%s page %d' % (playlist_id, page_num),
5154 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5155 check_get_keys='contents'
5156 )
5157 playlist = try_get(
5158 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5159
5160 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5161 title = playlist.get('title') or try_get(
5162 data, lambda x: x['titleText']['simpleText'], str)
5163 playlist_id = playlist.get('playlistId') or item_id
5164
5165 # Delegating everything except mix playlists to regular tab-based playlist URL
5166 playlist_url = urljoin(url, try_get(
5167 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5168 str))
5169
5170 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5171 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5172 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5173
5174 if playlist_url and playlist_url != url and not is_known_unviewable:
5175 return self.url_result(
5176 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5177 video_title=title)
5178
5179 return self.playlist_result(
5180 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5181 playlist_id=playlist_id, playlist_title=title)
5182
5183 def _extract_availability(self, data):
5184 """
5185 Gets the availability of a given playlist/tab.
5186 Note: Unless YouTube tells us explicitly, we do not assume it is public
5187 @param data: response
5188 """
5189 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5190 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5191 player_header_privacy = playlist_header_renderer.get('privacy')
5192
5193 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5194
5195 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5196 privacy_setting_icon = get_first(
5197 (playlist_header_renderer, sidebar_renderer),
5198 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5199 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5200 expected_type=str)
5201
5202 microformats_is_unlisted = traverse_obj(
5203 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5204
5205 return (
5206 'public' if (
5207 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5208 or player_header_privacy == 'PUBLIC'
5209 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5210 else self._availability(
5211 is_private=(
5212 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5213 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5214 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5215 is_unlisted=(
5216 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5217 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5218 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5219 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5220 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5221 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5222 needs_auth=False))
5223
5224 @staticmethod
5225 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5226 sidebar_renderer = try_get(
5227 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5228 for item in sidebar_renderer:
5229 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5230 if renderer:
5231 return renderer
5232
5233 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5234 """
5235 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5236 """
5237 is_playlist = bool(traverse_obj(
5238 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5239 if not is_playlist:
5240 return
5241 headers = self.generate_api_headers(
5242 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5243 visitor_data=self._extract_visitor_data(data, ytcfg))
5244 query = {
5245 'params': 'wgYCCAA=',
5246 'browseId': f'VL{item_id}'
5247 }
5248 return self._extract_response(
5249 item_id=item_id, headers=headers, query=query,
5250 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5251 note='Redownloading playlist API JSON with unavailable videos')
5252
5253 @functools.cached_property
5254 def skip_webpage(self):
5255 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5256
5257 def _extract_webpage(self, url, item_id, fatal=True):
5258 webpage, data = None, None
5259 for retry in self.RetryManager(fatal=fatal):
5260 try:
5261 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5262 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5263 except ExtractorError as e:
5264 if isinstance(e.cause, network_exceptions):
5265 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5266 retry.error = e
5267 continue
5268 self._error_or_warning(e, fatal=fatal)
5269 break
5270
5271 try:
5272 self._extract_and_report_alerts(data)
5273 except ExtractorError as e:
5274 self._error_or_warning(e, fatal=fatal)
5275 break
5276
5277 # Sometimes youtube returns a webpage with incomplete ytInitialData
5278 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5279 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5280 retry.error = ExtractorError('Incomplete yt initial data received')
5281 continue
5282
5283 return webpage, data
5284
5285 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5286 """Use if failed to extract ytcfg (and data) from initial webpage"""
5287 if not ytcfg and self.is_authenticated:
5288 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5289 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5290 raise ExtractorError(
5291 f'{msg}. If you are not downloading private content, or '
5292 'your cookies are only for the first account and channel,'
5293 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5294 expected=True)
5295 self.report_warning(msg, only_once=True)
5296
5297 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5298 data = None
5299 if not self.skip_webpage:
5300 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5301 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5302 # Reject webpage data if redirected to home page without explicitly requesting
5303 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5304 if (url != 'https://www.youtube.com/feed/recommended'
5305 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5306 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5307 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5308 if fatal:
5309 raise ExtractorError(msg, expected=True)
5310 self.report_warning(msg, only_once=True)
5311 if not data:
5312 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5313 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5314 return data, ytcfg
5315
5316 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5317 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5318 resolve_response = self._extract_response(
5319 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5320 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5321 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5322 for ep_key, ep in endpoints.items():
5323 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5324 if params:
5325 return self._extract_response(
5326 item_id=item_id, query=params, ep=ep, headers=headers,
5327 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5328 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5329 err_note = 'Failed to resolve url (does the playlist exist?)'
5330 if fatal:
5331 raise ExtractorError(err_note, expected=True)
5332 self.report_warning(err_note, item_id)
5333
5334 _SEARCH_PARAMS = None
5335
5336 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5337 data = {'query': query}
5338 if params is NO_DEFAULT:
5339 params = self._SEARCH_PARAMS
5340 if params:
5341 data['params'] = params
5342
5343 content_keys = (
5344 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5345 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5346 # ytmusic search
5347 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5348 ('continuationContents', ),
5349 )
5350 display_id = f'query "{query}"'
5351 check_get_keys = tuple({keys[0] for keys in content_keys})
5352 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5353 self._report_playlist_authcheck(ytcfg, fatal=False)
5354
5355 continuation_list = [None]
5356 search = None
5357 for page_num in itertools.count(1):
5358 data.update(continuation_list[0] or {})
5359 headers = self.generate_api_headers(
5360 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5361 search = self._extract_response(
5362 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5363 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5364 slr_contents = traverse_obj(search, *content_keys)
5365 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5366 if not continuation_list[0]:
5367 break
5368
5369
5370 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5371 IE_DESC = 'YouTube Tabs'
5372 _VALID_URL = r'''(?x:
5373 https?://
5374 (?!consent\.)(?:\w+\.)?
5375 (?:
5376 youtube(?:kids)?\.com|
5377 %(invidious)s
5378 )/
5379 (?:
5380 (?P<channel_type>channel|c|user|browse)/|
5381 (?P<not_channel>
5382 feed/|hashtag/|
5383 (?:playlist|watch)\?.*?\blist=
5384 )|
5385 (?!(?:%(reserved_names)s)\b) # Direct URLs
5386 )
5387 (?P<id>[^/?\#&]+)
5388 )''' % {
5389 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5390 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5391 }
5392 IE_NAME = 'youtube:tab'
5393
5394 _TESTS = [{
5395 'note': 'playlists, multipage',
5396 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5397 'playlist_mincount': 94,
5398 'info_dict': {
5399 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5400 'title': 'Igor Kleiner - Playlists',
5401 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5402 'uploader': 'Igor Kleiner',
5403 'uploader_id': '@IgorDataScience',
5404 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5405 'channel': 'Igor Kleiner',
5406 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5407 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5408 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5409 'channel_follower_count': int
5410 },
5411 }, {
5412 'note': 'playlists, multipage, different order',
5413 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5414 'playlist_mincount': 94,
5415 'info_dict': {
5416 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5417 'title': 'Igor Kleiner - Playlists',
5418 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5419 'uploader': 'Igor Kleiner',
5420 'uploader_id': '@IgorDataScience',
5421 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5422 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5423 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5424 'channel': 'Igor Kleiner',
5425 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5426 'channel_follower_count': int
5427 },
5428 }, {
5429 'note': 'playlists, series',
5430 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5431 'playlist_mincount': 5,
5432 'info_dict': {
5433 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5434 'title': '3Blue1Brown - Playlists',
5435 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5436 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5437 'channel': '3Blue1Brown',
5438 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5439 'uploader_id': '@3blue1brown',
5440 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5441 'uploader': '3Blue1Brown',
5442 'tags': ['Mathematics'],
5443 'channel_follower_count': int,
5444 'channel_is_verified': True,
5445 },
5446 }, {
5447 'note': 'playlists, singlepage',
5448 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5449 'playlist_mincount': 4,
5450 'info_dict': {
5451 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5452 'title': 'ThirstForScience - Playlists',
5453 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5454 'uploader': 'ThirstForScience',
5455 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5456 'uploader_id': '@ThirstForScience',
5457 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5458 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5459 'tags': 'count:13',
5460 'channel': 'ThirstForScience',
5461 'channel_follower_count': int
5462 }
5463 }, {
5464 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5465 'only_matching': True,
5466 }, {
5467 'note': 'basic, single video playlist',
5468 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5469 'info_dict': {
5470 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5471 'title': 'youtube-dl public playlist',
5472 'description': '',
5473 'tags': [],
5474 'view_count': int,
5475 'modified_date': '20201130',
5476 'channel': 'Sergey M.',
5477 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5478 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5479 'availability': 'public',
5480 'uploader': 'Sergey M.',
5481 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5482 'uploader_id': '@sergeym.6173',
5483 },
5484 'playlist_count': 1,
5485 }, {
5486 'note': 'empty playlist',
5487 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5488 'info_dict': {
5489 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5490 'title': 'youtube-dl empty playlist',
5491 'tags': [],
5492 'channel': 'Sergey M.',
5493 'description': '',
5494 'modified_date': '20160902',
5495 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5496 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5497 'availability': 'public',
5498 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5499 'uploader_id': '@sergeym.6173',
5500 'uploader': 'Sergey M.',
5501 },
5502 'playlist_count': 0,
5503 }, {
5504 'note': 'Home tab',
5505 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5506 'info_dict': {
5507 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5508 'title': 'lex will - Home',
5509 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5510 'uploader': 'lex will',
5511 'uploader_id': '@lexwill718',
5512 'channel': 'lex will',
5513 'tags': ['bible', 'history', 'prophesy'],
5514 'uploader_url': 'https://www.youtube.com/@lexwill718',
5515 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5516 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5517 'channel_follower_count': int
5518 },
5519 'playlist_mincount': 2,
5520 }, {
5521 'note': 'Videos tab',
5522 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5523 'info_dict': {
5524 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5525 'title': 'lex will - Videos',
5526 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5527 'uploader': 'lex will',
5528 'uploader_id': '@lexwill718',
5529 'tags': ['bible', 'history', 'prophesy'],
5530 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5531 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5532 'uploader_url': 'https://www.youtube.com/@lexwill718',
5533 'channel': 'lex will',
5534 'channel_follower_count': int
5535 },
5536 'playlist_mincount': 975,
5537 }, {
5538 'note': 'Videos tab, sorted by popular',
5539 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5540 'info_dict': {
5541 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5542 'title': 'lex will - Videos',
5543 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5544 'uploader': 'lex will',
5545 'uploader_id': '@lexwill718',
5546 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5547 'uploader_url': 'https://www.youtube.com/@lexwill718',
5548 'channel': 'lex will',
5549 'tags': ['bible', 'history', 'prophesy'],
5550 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5551 'channel_follower_count': int
5552 },
5553 'playlist_mincount': 199,
5554 }, {
5555 'note': 'Playlists tab',
5556 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5557 'info_dict': {
5558 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5559 'title': 'lex will - Playlists',
5560 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5561 'uploader': 'lex will',
5562 'uploader_id': '@lexwill718',
5563 'uploader_url': 'https://www.youtube.com/@lexwill718',
5564 'channel': 'lex will',
5565 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5566 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5567 'tags': ['bible', 'history', 'prophesy'],
5568 'channel_follower_count': int
5569 },
5570 'playlist_mincount': 17,
5571 }, {
5572 'note': 'Community tab',
5573 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5574 'info_dict': {
5575 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5576 'title': 'lex will - Community',
5577 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5578 'channel': 'lex will',
5579 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5580 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5581 'tags': ['bible', 'history', 'prophesy'],
5582 'channel_follower_count': int,
5583 'uploader_url': 'https://www.youtube.com/@lexwill718',
5584 'uploader_id': '@lexwill718',
5585 'uploader': 'lex will',
5586 },
5587 'playlist_mincount': 18,
5588 }, {
5589 'note': 'Channels tab',
5590 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5591 'info_dict': {
5592 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5593 'title': 'lex will - Channels',
5594 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5595 'channel': 'lex will',
5596 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5597 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5598 'tags': ['bible', 'history', 'prophesy'],
5599 'channel_follower_count': int,
5600 'uploader_url': 'https://www.youtube.com/@lexwill718',
5601 'uploader_id': '@lexwill718',
5602 'uploader': 'lex will',
5603 },
5604 'playlist_mincount': 12,
5605 }, {
5606 'note': 'Search tab',
5607 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5608 'playlist_mincount': 40,
5609 'info_dict': {
5610 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5611 'title': '3Blue1Brown - Search - linear algebra',
5612 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5613 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5614 'tags': ['Mathematics'],
5615 'channel': '3Blue1Brown',
5616 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5617 'channel_follower_count': int,
5618 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5619 'uploader_id': '@3blue1brown',
5620 'uploader': '3Blue1Brown',
5621 'channel_is_verified': True,
5622 },
5623 }, {
5624 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5625 'only_matching': True,
5626 }, {
5627 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5628 'only_matching': True,
5629 }, {
5630 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5631 'only_matching': True,
5632 }, {
5633 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5634 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5635 'info_dict': {
5636 'title': '29C3: Not my department',
5637 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5638 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5639 'tags': [],
5640 'view_count': int,
5641 'modified_date': '20150605',
5642 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5643 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5644 'channel': 'Christiaan008',
5645 'availability': 'public',
5646 'uploader_id': '@ChRiStIaAn008',
5647 'uploader': 'Christiaan008',
5648 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5649 },
5650 'playlist_count': 96,
5651 }, {
5652 'note': 'Large playlist',
5653 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5654 'info_dict': {
5655 'title': 'Uploads from Cauchemar',
5656 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5657 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5658 'tags': [],
5659 'modified_date': r're:\d{8}',
5660 'channel': 'Cauchemar',
5661 'view_count': int,
5662 'description': '',
5663 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5664 'availability': 'public',
5665 'uploader_id': '@Cauchemar89',
5666 'uploader': 'Cauchemar',
5667 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5668 },
5669 'playlist_mincount': 1123,
5670 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5671 }, {
5672 'note': 'even larger playlist, 8832 videos',
5673 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5674 'only_matching': True,
5675 }, {
5676 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5677 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5678 'info_dict': {
5679 'title': 'Uploads from Interstellar Movie',
5680 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5681 'tags': [],
5682 'view_count': int,
5683 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5684 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5685 'channel': 'Interstellar Movie',
5686 'description': '',
5687 'modified_date': r're:\d{8}',
5688 'availability': 'public',
5689 'uploader_id': '@InterstellarMovie',
5690 'uploader': 'Interstellar Movie',
5691 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5692 },
5693 'playlist_mincount': 21,
5694 }, {
5695 'note': 'Playlist with "show unavailable videos" button',
5696 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5697 'info_dict': {
5698 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5699 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5700 'view_count': int,
5701 'channel': 'Phim Siêu Nhân Nhật Bản',
5702 'tags': [],
5703 'description': '',
5704 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5705 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5706 'modified_date': r're:\d{8}',
5707 'availability': 'public',
5708 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5709 'uploader_id': '@phimsieunhannhatban',
5710 'uploader': 'Phim Siêu Nhân Nhật Bản',
5711 },
5712 'playlist_mincount': 200,
5713 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5714 }, {
5715 'note': 'Playlist with unavailable videos in page 7',
5716 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5717 'info_dict': {
5718 'title': 'Uploads from BlankTV',
5719 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5720 'channel': 'BlankTV',
5721 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5722 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5723 'view_count': int,
5724 'tags': [],
5725 'modified_date': r're:\d{8}',
5726 'description': '',
5727 'availability': 'public',
5728 'uploader_id': '@blanktv',
5729 'uploader': 'BlankTV',
5730 'uploader_url': 'https://www.youtube.com/@blanktv',
5731 },
5732 'playlist_mincount': 1000,
5733 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5734 }, {
5735 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5736 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5737 'info_dict': {
5738 'title': 'Data Analysis with Dr Mike Pound',
5739 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5740 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5741 'tags': [],
5742 'view_count': int,
5743 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5744 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5745 'channel': 'Computerphile',
5746 'availability': 'public',
5747 'modified_date': '20190712',
5748 'uploader_id': '@Computerphile',
5749 'uploader': 'Computerphile',
5750 'uploader_url': 'https://www.youtube.com/@Computerphile',
5751 },
5752 'playlist_mincount': 11,
5753 }, {
5754 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5755 'only_matching': True,
5756 }, {
5757 'note': 'Playlist URL that does not actually serve a playlist',
5758 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5759 'info_dict': {
5760 'id': 'FqZTN594JQw',
5761 'ext': 'webm',
5762 'title': "Smiley's People 01 detective, Adventure Series, Action",
5763 'upload_date': '20150526',
5764 'license': 'Standard YouTube License',
5765 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5766 'categories': ['People & Blogs'],
5767 'tags': list,
5768 'view_count': int,
5769 'like_count': int,
5770 },
5771 'params': {
5772 'skip_download': True,
5773 },
5774 'skip': 'This video is not available.',
5775 'add_ie': [YoutubeIE.ie_key()],
5776 }, {
5777 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5778 'only_matching': True,
5779 }, {
5780 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5781 'only_matching': True,
5782 }, {
5783 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5784 'info_dict': {
5785 'id': 'hGkQjiJLjWQ', # This will keep changing
5786 'ext': 'mp4',
5787 'title': str,
5788 'upload_date': r're:\d{8}',
5789 'description': str,
5790 'categories': ['News & Politics'],
5791 'tags': list,
5792 'like_count': int,
5793 'release_timestamp': int,
5794 'channel': 'Sky News',
5795 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5796 'age_limit': 0,
5797 'view_count': int,
5798 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5799 'playable_in_embed': True,
5800 'release_date': r're:\d+',
5801 'availability': 'public',
5802 'live_status': 'is_live',
5803 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5804 'channel_follower_count': int,
5805 'concurrent_view_count': int,
5806 'uploader_url': 'https://www.youtube.com/@SkyNews',
5807 'uploader_id': '@SkyNews',
5808 'uploader': 'Sky News',
5809 'channel_is_verified': True,
5810 },
5811 'params': {
5812 'skip_download': True,
5813 },
5814 'expected_warnings': ['Ignoring subtitle tracks found in '],
5815 }, {
5816 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5817 'info_dict': {
5818 'id': 'a48o2S1cPoo',
5819 'ext': 'mp4',
5820 'title': 'The Young Turks - Live Main Show',
5821 'upload_date': '20150715',
5822 'license': 'Standard YouTube License',
5823 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5824 'categories': ['News & Politics'],
5825 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5826 'like_count': int,
5827 },
5828 'params': {
5829 'skip_download': True,
5830 },
5831 'only_matching': True,
5832 }, {
5833 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5834 'only_matching': True,
5835 }, {
5836 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5837 'only_matching': True,
5838 }, {
5839 'note': 'A channel that is not live. Should raise error',
5840 'url': 'https://www.youtube.com/user/numberphile/live',
5841 'only_matching': True,
5842 }, {
5843 'url': 'https://www.youtube.com/feed/trending',
5844 'only_matching': True,
5845 }, {
5846 'url': 'https://www.youtube.com/feed/library',
5847 'only_matching': True,
5848 }, {
5849 'url': 'https://www.youtube.com/feed/history',
5850 'only_matching': True,
5851 }, {
5852 'url': 'https://www.youtube.com/feed/subscriptions',
5853 'only_matching': True,
5854 }, {
5855 'url': 'https://www.youtube.com/feed/watch_later',
5856 'only_matching': True,
5857 }, {
5858 'note': 'Recommended - redirects to home page.',
5859 'url': 'https://www.youtube.com/feed/recommended',
5860 'only_matching': True,
5861 }, {
5862 'note': 'inline playlist with not always working continuations',
5863 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5864 'only_matching': True,
5865 }, {
5866 'url': 'https://www.youtube.com/course',
5867 'only_matching': True,
5868 }, {
5869 'url': 'https://www.youtube.com/zsecurity',
5870 'only_matching': True,
5871 }, {
5872 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5873 'only_matching': True,
5874 }, {
5875 'url': 'https://www.youtube.com/TheYoungTurks/live',
5876 'only_matching': True,
5877 }, {
5878 'url': 'https://www.youtube.com/hashtag/cctv9',
5879 'info_dict': {
5880 'id': 'cctv9',
5881 'title': '#cctv9',
5882 'tags': [],
5883 },
5884 'playlist_mincount': 300, # not consistent but should be over 300
5885 }, {
5886 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5887 'only_matching': True,
5888 }, {
5889 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5890 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5891 'only_matching': True
5892 }, {
5893 'note': '/browse/ should redirect to /channel/',
5894 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5895 'only_matching': True
5896 }, {
5897 'note': 'VLPL, should redirect to playlist?list=PL...',
5898 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5899 'info_dict': {
5900 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5901 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5902 'title': 'NCS : All Releases 💿',
5903 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
5904 'modified_date': r're:\d{8}',
5905 'view_count': int,
5906 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5907 'tags': [],
5908 'channel': 'NoCopyrightSounds',
5909 'availability': 'public',
5910 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
5911 'uploader': 'NoCopyrightSounds',
5912 'uploader_id': '@NoCopyrightSounds',
5913 },
5914 'playlist_mincount': 166,
5915 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
5916 }, {
5917 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
5918 'note': 'Topic, should redirect to playlist?list=UU...',
5919 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5920 'info_dict': {
5921 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5922 'title': 'Uploads from Royalty Free Music - Topic',
5923 'tags': [],
5924 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5925 'channel': 'Royalty Free Music - Topic',
5926 'view_count': int,
5927 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5928 'modified_date': r're:\d{8}',
5929 'description': '',
5930 'availability': 'public',
5931 'uploader': 'Royalty Free Music - Topic',
5932 },
5933 'playlist_mincount': 101,
5934 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
5935 }, {
5936 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5937 # Treat as a general feed
5938 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5939 'info_dict': {
5940 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5941 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5942 'tags': [],
5943 },
5944 'playlist_mincount': 9,
5945 }, {
5946 'note': 'Youtube music Album',
5947 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5948 'info_dict': {
5949 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5950 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5951 'tags': [],
5952 'view_count': int,
5953 'description': '',
5954 'availability': 'unlisted',
5955 'modified_date': r're:\d{8}',
5956 },
5957 'playlist_count': 50,
5958 'expected_warnings': ['YouTube Music is not directly supported'],
5959 }, {
5960 'note': 'unlisted single video playlist',
5961 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5962 'info_dict': {
5963 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5964 'title': 'yt-dlp unlisted playlist test',
5965 'availability': 'unlisted',
5966 'tags': [],
5967 'modified_date': '20220418',
5968 'channel': 'colethedj',
5969 'view_count': int,
5970 'description': '',
5971 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5972 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5973 'uploader_url': 'https://www.youtube.com/@colethedj1894',
5974 'uploader_id': '@colethedj1894',
5975 'uploader': 'colethedj',
5976 },
5977 'playlist': [{
5978 'info_dict': {
5979 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
5980 'id': 'BaW_jenozKc',
5981 '_type': 'url',
5982 'ie_key': 'Youtube',
5983 'duration': 10,
5984 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
5985 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
5986 'view_count': int,
5987 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
5988 'channel': 'Philipp Hagemeister',
5989 'uploader_id': '@PhilippHagemeister',
5990 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
5991 'uploader': 'Philipp Hagemeister',
5992 }
5993 }],
5994 'playlist_count': 1,
5995 'params': {'extract_flat': True},
5996 }, {
5997 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5998 'url': 'https://www.youtube.com/feed/recommended',
5999 'info_dict': {
6000 'id': 'recommended',
6001 'title': 'recommended',
6002 'tags': [],
6003 },
6004 'playlist_mincount': 50,
6005 'params': {
6006 'skip_download': True,
6007 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6008 },
6009 }, {
6010 'note': 'API Fallback: /videos tab, sorted by oldest first',
6011 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6012 'info_dict': {
6013 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6014 'title': 'Cody\'sLab - Videos',
6015 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6016 'channel': 'Cody\'sLab',
6017 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6018 'tags': [],
6019 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6020 'channel_follower_count': int
6021 },
6022 'playlist_mincount': 650,
6023 'params': {
6024 'skip_download': True,
6025 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6026 },
6027 'skip': 'Query for sorting no longer works',
6028 }, {
6029 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6030 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6031 'info_dict': {
6032 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6033 'title': 'Uploads from Royalty Free Music - Topic',
6034 'modified_date': r're:\d{8}',
6035 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6036 'description': '',
6037 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6038 'tags': [],
6039 'channel': 'Royalty Free Music - Topic',
6040 'view_count': int,
6041 'availability': 'public',
6042 'uploader': 'Royalty Free Music - Topic',
6043 },
6044 'playlist_mincount': 101,
6045 'params': {
6046 'skip_download': True,
6047 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6048 },
6049 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6050 }, {
6051 'note': 'non-standard redirect to regional channel',
6052 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6053 'only_matching': True
6054 }, {
6055 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6056 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6057 'info_dict': {
6058 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6059 'modified_date': '20220407',
6060 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6061 'tags': [],
6062 'availability': 'unlisted',
6063 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6064 'channel': 'pukkandan',
6065 'description': 'Test for collaborative playlist',
6066 'title': 'yt-dlp test - collaborative playlist',
6067 'view_count': int,
6068 'uploader_url': 'https://www.youtube.com/@pukkandan',
6069 'uploader_id': '@pukkandan',
6070 'uploader': 'pukkandan',
6071 },
6072 'playlist_mincount': 2
6073 }, {
6074 'note': 'translated tab name',
6075 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6076 'info_dict': {
6077 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6078 'tags': [],
6079 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6080 'description': 'test description',
6081 'title': 'cole-dlp-test-acc - 再生リスト',
6082 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6083 'channel': 'cole-dlp-test-acc',
6084 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6085 'uploader_id': '@coletdjnz',
6086 'uploader': 'cole-dlp-test-acc',
6087 },
6088 'playlist_mincount': 1,
6089 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6090 'expected_warnings': ['Preferring "ja"'],
6091 }, {
6092 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6093 'note': 'preferred lang set with playlist with translated video titles',
6094 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6095 'info_dict': {
6096 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6097 'tags': [],
6098 'view_count': int,
6099 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6100 'channel': 'cole-dlp-test-acc',
6101 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6102 'description': 'test',
6103 'title': 'dlp test playlist',
6104 'availability': 'public',
6105 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6106 'uploader_id': '@coletdjnz',
6107 'uploader': 'cole-dlp-test-acc',
6108 },
6109 'playlist_mincount': 1,
6110 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6111 'expected_warnings': ['Preferring "ja"'],
6112 }, {
6113 # shorts audio pivot for 2GtVksBMYFM.
6114 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6115 'info_dict': {
6116 'id': 'sfv_audio_pivot',
6117 'title': 'sfv_audio_pivot',
6118 'tags': [],
6119 },
6120 'playlist_mincount': 50,
6121
6122 }, {
6123 # Channel with a real live tab (not to be mistaken with streams tab)
6124 # Do not treat like it should redirect to live stream
6125 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6126 'info_dict': {
6127 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6128 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6129 'tags': [],
6130 },
6131 'playlist_mincount': 20,
6132 }, {
6133 # Tab name is not the same as tab id
6134 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6135 'info_dict': {
6136 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6137 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6138 'tags': [],
6139 },
6140 'playlist_mincount': 8,
6141 }, {
6142 # Home tab id is literally home. Not to get mistaken with featured
6143 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6144 'info_dict': {
6145 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6146 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6147 'tags': [],
6148 },
6149 'playlist_mincount': 8,
6150 }, {
6151 # Should get three playlists for videos, shorts and streams tabs
6152 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6153 'info_dict': {
6154 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6155 'title': 'Polka Ch. 尾丸ポルカ',
6156 'channel_follower_count': int,
6157 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6158 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6159 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
6160 'channel': 'Polka Ch. 尾丸ポルカ',
6161 'tags': 'count:35',
6162 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6163 'uploader': 'Polka Ch. 尾丸ポルカ',
6164 'uploader_id': '@OmaruPolka',
6165 },
6166 'playlist_count': 3,
6167 }, {
6168 # Shorts tab with channel with handle
6169 # TODO: fix channel description
6170 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6171 'info_dict': {
6172 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6173 'title': 'Not Just Bikes - Shorts',
6174 'tags': 'count:12',
6175 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6176 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
6177 'channel_follower_count': int,
6178 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6179 'channel': 'Not Just Bikes',
6180 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6181 'uploader': 'Not Just Bikes',
6182 'uploader_id': '@NotJustBikes',
6183 },
6184 'playlist_mincount': 10,
6185 }, {
6186 # Streams tab
6187 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6188 'info_dict': {
6189 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6190 'title': '中村悠一 - Live',
6191 'tags': 'count:7',
6192 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6193 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6194 'channel': '中村悠一',
6195 'channel_follower_count': int,
6196 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6197 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6198 'uploader_id': '@Yuichi-Nakamura',
6199 'uploader': '中村悠一',
6200 },
6201 'playlist_mincount': 60,
6202 }, {
6203 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6204 # See test_youtube_lists
6205 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6206 'only_matching': True,
6207 }, {
6208 # No uploads and no UCID given. Should fail with no uploads error
6209 # See test_youtube_lists
6210 'url': 'https://www.youtube.com/news',
6211 'only_matching': True
6212 }, {
6213 # No videos tab but has a shorts tab
6214 'url': 'https://www.youtube.com/c/TKFShorts',
6215 'info_dict': {
6216 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6217 'title': 'Shorts Break - Shorts',
6218 'tags': 'count:48',
6219 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6220 'channel': 'Shorts Break',
6221 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6222 'channel_follower_count': int,
6223 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6224 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6225 'uploader': 'Shorts Break',
6226 'uploader_id': '@ShortsBreak_Official',
6227 },
6228 'playlist_mincount': 30,
6229 }, {
6230 # Trending Now Tab. tab id is empty
6231 'url': 'https://www.youtube.com/feed/trending',
6232 'info_dict': {
6233 'id': 'trending',
6234 'title': 'trending - Now',
6235 'tags': [],
6236 },
6237 'playlist_mincount': 30,
6238 }, {
6239 # Trending Gaming Tab. tab id is empty
6240 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6241 'info_dict': {
6242 'id': 'trending',
6243 'title': 'trending - Gaming',
6244 'tags': [],
6245 },
6246 'playlist_mincount': 30,
6247 }, {
6248 # Shorts url result in shorts tab
6249 # TODO: Fix channel id extraction
6250 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6251 'info_dict': {
6252 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6253 'title': 'cole-dlp-test-acc - Shorts',
6254 'channel': 'cole-dlp-test-acc',
6255 'description': 'test description',
6256 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6257 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6258 'tags': [],
6259 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6260 'uploader_id': '@coletdjnz',
6261 'uploader': 'cole-dlp-test-acc',
6262 },
6263 'playlist': [{
6264 'info_dict': {
6265 # Channel data is not currently available for short renderers (as of 2023-03-01)
6266 '_type': 'url',
6267 'ie_key': 'Youtube',
6268 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6269 'id': 'sSM9J5YH_60',
6270 'title': 'SHORT short',
6271 'view_count': int,
6272 'thumbnails': list,
6273 }
6274 }],
6275 'params': {'extract_flat': True},
6276 }, {
6277 # Live video status should be extracted
6278 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6279 'info_dict': {
6280 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6281 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6282 'tags': []
6283 },
6284 'playlist': [{
6285 'info_dict': {
6286 '_type': 'url',
6287 'ie_key': 'Youtube',
6288 'url': 'startswith:https://www.youtube.com/watch?v=',
6289 'id': str,
6290 'title': str,
6291 'live_status': 'is_live',
6292 'channel_id': str,
6293 'channel_url': str,
6294 'concurrent_view_count': int,
6295 'channel': str,
6296 'uploader': str,
6297 'uploader_url': str,
6298 'uploader_id': str,
6299 'channel_is_verified': bool, # this will keep changing
6300 }
6301 }],
6302 'params': {'extract_flat': True, 'playlist_items': '1'},
6303 'playlist_mincount': 1
6304 }, {
6305 # Channel renderer metadata. Contains number of videos on the channel
6306 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6307 'info_dict': {
6308 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6309 'title': 'cole-dlp-test-acc - Channels',
6310 'channel': 'cole-dlp-test-acc',
6311 'description': 'test description',
6312 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6313 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6314 'tags': [],
6315 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6316 'uploader_id': '@coletdjnz',
6317 'uploader': 'cole-dlp-test-acc',
6318 },
6319 'playlist': [{
6320 'info_dict': {
6321 '_type': 'url',
6322 'ie_key': 'YoutubeTab',
6323 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6324 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6325 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6326 'title': 'PewDiePie',
6327 'channel': 'PewDiePie',
6328 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6329 'thumbnails': list,
6330 'channel_follower_count': int,
6331 'playlist_count': int,
6332 'uploader': 'PewDiePie',
6333 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6334 'uploader_id': '@PewDiePie',
6335 'channel_is_verified': True,
6336 }
6337 }],
6338 'params': {'extract_flat': True},
6339 }, {
6340 'url': 'https://www.youtube.com/@3blue1brown/about',
6341 'info_dict': {
6342 'id': 'UCYO_jab_esuFRV4b17AJtAw',
6343 'tags': ['Mathematics'],
6344 'title': '3Blue1Brown - About',
6345 'channel_follower_count': int,
6346 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6347 'channel': '3Blue1Brown',
6348 'view_count': int,
6349 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6350 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
6351 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6352 'uploader_id': '@3blue1brown',
6353 'uploader': '3Blue1Brown',
6354 'channel_is_verified': True,
6355 },
6356 'playlist_count': 0,
6357 }, {
6358 # Podcasts tab, with rich entry playlistRenderers
6359 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6360 'info_dict': {
6361 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6362 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6363 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6364 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6365 'title': '99 Percent Invisible - Podcasts',
6366 'uploader': '99 Percent Invisible',
6367 'channel_follower_count': int,
6368 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6369 'tags': [],
6370 'channel': '99 Percent Invisible',
6371 'uploader_id': '@99percentinvisiblepodcast',
6372 },
6373 'playlist_count': 1,
6374 }, {
6375 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6376 'url': 'https://www.youtube.com/@AHimitsu/releases',
6377 'info_dict': {
6378 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6379 'channel': 'A Himitsu',
6380 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6381 'title': 'A Himitsu - Releases',
6382 'uploader_id': '@AHimitsu',
6383 'uploader': 'A Himitsu',
6384 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6385 'tags': 'count:16',
6386 'description': 'I make music',
6387 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6388 'channel_follower_count': int,
6389 'channel_is_verified': True,
6390 },
6391 'playlist_mincount': 10,
6392 }, {
6393 # Playlist with only shorts, shown as reel renderers
6394 # FIXME: future: YouTube currently doesn't give continuation for this,
6395 # may do in future.
6396 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6397 'info_dict': {
6398 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6399 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6400 'view_count': int,
6401 'uploader_id': '@BangyShorts',
6402 'description': '',
6403 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6404 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6405 'channel': 'Bangy Shorts',
6406 'uploader': 'Bangy Shorts',
6407 'tags': [],
6408 'availability': 'public',
6409 'modified_date': '20230626',
6410 'title': 'Uploads from Bangy Shorts',
6411 },
6412 'playlist_mincount': 100,
6413 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6414 }]
6415
6416 @classmethod
6417 def suitable(cls, url):
6418 return False if YoutubeIE.suitable(url) else super().suitable(url)
6419
6420 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6421
6422 def _get_url_mobj(self, url):
6423 mobj = self._URL_RE.match(url).groupdict()
6424 mobj.update((k, '') for k, v in mobj.items() if v is None)
6425 return mobj
6426
6427 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6428 tab_name = (tab.get('title') or '').lower()
6429 tab_url = urljoin(base_url, traverse_obj(
6430 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6431
6432 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6433 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6434 if tab_id:
6435 return {
6436 'TAB_ID_SPONSORSHIPS': 'membership',
6437 }.get(tab_id, tab_id), tab_name
6438
6439 # Fallback to tab name if we cannot get the tab id.
6440 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6441 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6442 if tab_name:
6443 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6444 return {
6445 'home': 'featured',
6446 'live': 'streams',
6447 }.get(tab_name, tab_name), tab_name
6448
6449 def _has_tab(self, tabs, tab_id):
6450 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6451
6452 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6453 def _real_extract(self, url, smuggled_data):
6454 item_id = self._match_id(url)
6455 url = urllib.parse.urlunparse(
6456 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6457 compat_opts = self.get_param('compat_opts', [])
6458
6459 mobj = self._get_url_mobj(url)
6460 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6461 if is_channel and smuggled_data.get('is_music_url'):
6462 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6463 return self.url_result(
6464 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6465 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6466 mdata = self._extract_tab_endpoint(
6467 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6468 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6469 get_all=False, expected_type=str)
6470 if not murl:
6471 raise ExtractorError('Failed to resolve album to playlist')
6472 return self.url_result(murl, YoutubeTabIE)
6473 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6474 return self.url_result(
6475 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6476
6477 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6478 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6479 url = f'{pre}/videos{post}'
6480 if smuggled_data.get('is_music_url'):
6481 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6482
6483 # Handle both video/playlist URLs
6484 qs = parse_qs(url)
6485 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
6486 if not video_id and mobj['not_channel'].startswith('watch'):
6487 if not playlist_id:
6488 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6489 raise ExtractorError('A video URL was given without video ID', expected=True)
6490 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6491 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6492 return self.url_result(
6493 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6494
6495 if not self._yes_playlist(playlist_id, video_id):
6496 return self.url_result(
6497 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6498
6499 data, ytcfg = self._extract_data(url, display_id)
6500
6501 # YouTube may provide a non-standard redirect to the regional channel
6502 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6503 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6504 redirect_url = traverse_obj(
6505 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6506 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6507 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6508 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6509 return self.url_result(redirect_url, YoutubeTabIE)
6510
6511 tabs, extra_tabs = self._extract_tab_renderers(data), []
6512 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6513 selected_tab = self._extract_selected_tab(tabs)
6514 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6515 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6516
6517 if not original_tab_id and selected_tab_name:
6518 self.to_screen('Downloading all uploads of the channel. '
6519 'To download only the videos in a specific tab, pass the tab\'s URL')
6520 if self._has_tab(tabs, 'streams'):
6521 extra_tabs.append(''.join((pre, '/streams', post)))
6522 if self._has_tab(tabs, 'shorts'):
6523 extra_tabs.append(''.join((pre, '/shorts', post)))
6524 # XXX: Members-only tab should also be extracted
6525
6526 if not extra_tabs and selected_tab_id != 'videos':
6527 # Channel does not have streams, shorts or videos tabs
6528 if item_id[:2] != 'UC':
6529 raise ExtractorError('This channel has no uploads', expected=True)
6530
6531 # Topic channels don't have /videos. Use the equivalent playlist instead
6532 pl_id = f'UU{item_id[2:]}'
6533 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6534 try:
6535 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6536 except ExtractorError:
6537 raise ExtractorError('This channel has no uploads', expected=True)
6538 else:
6539 item_id, url = pl_id, pl_url
6540 self.to_screen(
6541 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6542
6543 elif extra_tabs and selected_tab_id != 'videos':
6544 # When there are shorts/live tabs but not videos tab
6545 url, data = f'{pre}{post}', None
6546
6547 elif (original_tab_id or 'videos') != selected_tab_id:
6548 if original_tab_id == 'live':
6549 # Live tab should have redirected to the video
6550 # Except in the case the channel has an actual live tab
6551 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6552 raise UserNotLive(video_id=item_id)
6553 elif selected_tab_name:
6554 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6555
6556 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6557 url = f'{pre}{post}'
6558
6559 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6560 if 'no-youtube-unavailable-videos' not in compat_opts:
6561 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6562 self._extract_and_report_alerts(data, only_once=True)
6563
6564 tabs, entries = self._extract_tab_renderers(data), []
6565 if tabs:
6566 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6567 entries[0].update({
6568 'extractor_key': YoutubeTabIE.ie_key(),
6569 'extractor': YoutubeTabIE.IE_NAME,
6570 'webpage_url': url,
6571 })
6572 if self.get_param('playlist_items') == '0':
6573 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6574 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6575 entries.extend(map(self._real_extract, extra_tabs))
6576
6577 if len(entries) == 1:
6578 return entries[0]
6579 elif entries:
6580 metadata = self._extract_metadata_from_tabs(item_id, data)
6581 uploads_url = 'the Uploads (UU) playlist URL'
6582 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6583 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6584 self.to_screen(
6585 'Downloading as multiple playlists, separated by tabs. '
6586 f'To download as a single playlist instead, pass {uploads_url}')
6587 return self.playlist_result(entries, item_id, **metadata)
6588
6589 # Inline playlist
6590 playlist = traverse_obj(
6591 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6592 if playlist:
6593 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6594
6595 video_id = traverse_obj(
6596 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6597 if video_id:
6598 if tab != '/live': # live tab is expected to redirect to video
6599 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6600 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6601
6602 raise ExtractorError('Unable to recognize tab page')
6603
6604
6605 class YoutubePlaylistIE(InfoExtractor):
6606 IE_DESC = 'YouTube playlists'
6607 _VALID_URL = r'''(?x)(?:
6608 (?:https?://)?
6609 (?:\w+\.)?
6610 (?:
6611 (?:
6612 youtube(?:kids)?\.com|
6613 %(invidious)s
6614 )
6615 /.*?\?.*?\blist=
6616 )?
6617 (?P<id>%(playlist_id)s)
6618 )''' % {
6619 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6620 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6621 }
6622 IE_NAME = 'youtube:playlist'
6623 _TESTS = [{
6624 'note': 'issue #673',
6625 'url': 'PLBB231211A4F62143',
6626 'info_dict': {
6627 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6628 'id': 'PLBB231211A4F62143',
6629 'uploader': 'Wickman',
6630 'uploader_id': '@WickmanVT',
6631 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6632 'view_count': int,
6633 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6634 'modified_date': r're:\d{8}',
6635 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6636 'channel': 'Wickman',
6637 'tags': [],
6638 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6639 'availability': 'public',
6640 },
6641 'playlist_mincount': 29,
6642 }, {
6643 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6644 'info_dict': {
6645 'title': 'YDL_safe_search',
6646 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6647 },
6648 'playlist_count': 2,
6649 'skip': 'This playlist is private',
6650 }, {
6651 'note': 'embedded',
6652 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6653 'playlist_count': 4,
6654 'info_dict': {
6655 'title': 'JODA15',
6656 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6657 'uploader': 'milan',
6658 'uploader_id': '@milan5503',
6659 'description': '',
6660 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6661 'tags': [],
6662 'modified_date': '20140919',
6663 'view_count': int,
6664 'channel': 'milan',
6665 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6666 'uploader_url': 'https://www.youtube.com/@milan5503',
6667 'availability': 'public',
6668 },
6669 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
6670 }, {
6671 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6672 'playlist_mincount': 455,
6673 'info_dict': {
6674 'title': '2018 Chinese New Singles (11/6 updated)',
6675 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6676 'uploader': 'LBK',
6677 'uploader_id': '@music_king',
6678 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6679 'channel': 'LBK',
6680 'view_count': int,
6681 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6682 'tags': [],
6683 'uploader_url': 'https://www.youtube.com/@music_king',
6684 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6685 'modified_date': r're:\d{8}',
6686 'availability': 'public',
6687 },
6688 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6689 }, {
6690 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6691 'only_matching': True,
6692 }, {
6693 # music album playlist
6694 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6695 'only_matching': True,
6696 }]
6697
6698 @classmethod
6699 def suitable(cls, url):
6700 if YoutubeTabIE.suitable(url):
6701 return False
6702 from ..utils import parse_qs
6703 qs = parse_qs(url)
6704 if qs.get('v', [None])[0]:
6705 return False
6706 return super().suitable(url)
6707
6708 def _real_extract(self, url):
6709 playlist_id = self._match_id(url)
6710 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6711 url = update_url_query(
6712 'https://www.youtube.com/playlist',
6713 parse_qs(url) or {'list': playlist_id})
6714 if is_music_url:
6715 url = smuggle_url(url, {'is_music_url': True})
6716 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6717
6718
6719 class YoutubeYtBeIE(InfoExtractor):
6720 IE_DESC = 'youtu.be'
6721 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6722 _TESTS = [{
6723 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6724 'info_dict': {
6725 'id': 'yeWKywCrFtk',
6726 'ext': 'mp4',
6727 'title': 'Small Scale Baler and Braiding Rugs',
6728 'uploader': 'Backus-Page House Museum',
6729 'uploader_id': '@backuspagemuseum',
6730 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6731 'upload_date': '20161008',
6732 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6733 'categories': ['Nonprofits & Activism'],
6734 'tags': list,
6735 'like_count': int,
6736 'age_limit': 0,
6737 'playable_in_embed': True,
6738 'thumbnail': r're:^https?://.*\.webp',
6739 'channel': 'Backus-Page House Museum',
6740 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6741 'live_status': 'not_live',
6742 'view_count': int,
6743 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6744 'availability': 'public',
6745 'duration': 59,
6746 'comment_count': int,
6747 'channel_follower_count': int
6748 },
6749 'params': {
6750 'noplaylist': True,
6751 'skip_download': True,
6752 },
6753 }, {
6754 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6755 'only_matching': True,
6756 }]
6757
6758 def _real_extract(self, url):
6759 mobj = self._match_valid_url(url)
6760 video_id = mobj.group('id')
6761 playlist_id = mobj.group('playlist_id')
6762 return self.url_result(
6763 update_url_query('https://www.youtube.com/watch', {
6764 'v': video_id,
6765 'list': playlist_id,
6766 'feature': 'youtu.be',
6767 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6768
6769
6770 class YoutubeLivestreamEmbedIE(InfoExtractor):
6771 IE_DESC = 'YouTube livestream embeds'
6772 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6773 _TESTS = [{
6774 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6775 'only_matching': True,
6776 }]
6777
6778 def _real_extract(self, url):
6779 channel_id = self._match_id(url)
6780 return self.url_result(
6781 f'https://www.youtube.com/channel/{channel_id}/live',
6782 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6783
6784
6785 class YoutubeYtUserIE(InfoExtractor):
6786 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6787 IE_NAME = 'youtube:user'
6788 _VALID_URL = r'ytuser:(?P<id>.+)'
6789 _TESTS = [{
6790 'url': 'ytuser:phihag',
6791 'only_matching': True,
6792 }]
6793
6794 def _real_extract(self, url):
6795 user_id = self._match_id(url)
6796 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6797
6798
6799 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6800 IE_NAME = 'youtube:favorites'
6801 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6802 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6803 _LOGIN_REQUIRED = True
6804 _TESTS = [{
6805 'url': ':ytfav',
6806 'only_matching': True,
6807 }, {
6808 'url': ':ytfavorites',
6809 'only_matching': True,
6810 }]
6811
6812 def _real_extract(self, url):
6813 return self.url_result(
6814 'https://www.youtube.com/playlist?list=LL',
6815 ie=YoutubeTabIE.ie_key())
6816
6817
6818 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6819 IE_NAME = 'youtube:notif'
6820 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6821 _VALID_URL = r':ytnotif(?:ication)?s?'
6822 _LOGIN_REQUIRED = True
6823 _TESTS = [{
6824 'url': ':ytnotif',
6825 'only_matching': True,
6826 }, {
6827 'url': ':ytnotifications',
6828 'only_matching': True,
6829 }]
6830
6831 def _extract_notification_menu(self, response, continuation_list):
6832 notification_list = traverse_obj(
6833 response,
6834 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6835 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6836 expected_type=list) or []
6837 continuation_list[0] = None
6838 for item in notification_list:
6839 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6840 if entry:
6841 yield entry
6842 continuation = item.get('continuationItemRenderer')
6843 if continuation:
6844 continuation_list[0] = continuation
6845
6846 def _extract_notification_renderer(self, notification):
6847 video_id = traverse_obj(
6848 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6849 url = f'https://www.youtube.com/watch?v={video_id}'
6850 channel_id = None
6851 if not video_id:
6852 browse_ep = traverse_obj(
6853 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6854 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
6855 post_id = self._search_regex(
6856 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6857 'post id', default=None)
6858 if not channel_id or not post_id:
6859 return
6860 # The direct /post url redirects to this in the browser
6861 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6862
6863 channel = traverse_obj(
6864 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6865 expected_type=str)
6866 notification_title = self._get_text(notification, 'shortMessage')
6867 if notification_title:
6868 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6869 # TODO: handle recommended videos
6870 title = self._search_regex(
6871 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6872 'video title', default=None)
6873 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6874 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6875 else None)
6876 return {
6877 '_type': 'url',
6878 'url': url,
6879 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6880 'video_id': video_id,
6881 'title': title,
6882 'channel_id': channel_id,
6883 'channel': channel,
6884 'uploader': channel,
6885 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6886 'timestamp': timestamp,
6887 }
6888
6889 def _notification_menu_entries(self, ytcfg):
6890 continuation_list = [None]
6891 response = None
6892 for page in itertools.count(1):
6893 ctoken = traverse_obj(
6894 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6895 response = self._extract_response(
6896 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6897 ep='notification/get_notification_menu', check_get_keys='actions',
6898 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6899 yield from self._extract_notification_menu(response, continuation_list)
6900 if not continuation_list[0]:
6901 break
6902
6903 def _real_extract(self, url):
6904 display_id = 'notifications'
6905 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6906 self._report_playlist_authcheck(ytcfg)
6907 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6908
6909
6910 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6911 IE_DESC = 'YouTube search'
6912 IE_NAME = 'youtube:search'
6913 _SEARCH_KEY = 'ytsearch'
6914 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6915 _TESTS = [{
6916 'url': 'ytsearch5:youtube-dl test video',
6917 'playlist_count': 5,
6918 'info_dict': {
6919 'id': 'youtube-dl test video',
6920 'title': 'youtube-dl test video',
6921 }
6922 }]
6923
6924
6925 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6926 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6927 _SEARCH_KEY = 'ytsearchdate'
6928 IE_DESC = 'YouTube search, newest videos first'
6929 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6930 _TESTS = [{
6931 'url': 'ytsearchdate5:youtube-dl test video',
6932 'playlist_count': 5,
6933 'info_dict': {
6934 'id': 'youtube-dl test video',
6935 'title': 'youtube-dl test video',
6936 }
6937 }]
6938
6939
6940 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6941 IE_DESC = 'YouTube search URLs with sorting and filter support'
6942 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6943 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6944 _TESTS = [{
6945 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6946 'playlist_mincount': 5,
6947 'info_dict': {
6948 'id': 'youtube-dl test video',
6949 'title': 'youtube-dl test video',
6950 }
6951 }, {
6952 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6953 'playlist_mincount': 5,
6954 'info_dict': {
6955 'id': 'python',
6956 'title': 'python',
6957 }
6958 }, {
6959 'url': 'https://www.youtube.com/results?search_query=%23cats',
6960 'playlist_mincount': 1,
6961 'info_dict': {
6962 'id': '#cats',
6963 'title': '#cats',
6964 # The test suite does not have support for nested playlists
6965 # 'entries': [{
6966 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6967 # 'title': '#cats',
6968 # }],
6969 },
6970 }, {
6971 # Channel results
6972 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6973 'info_dict': {
6974 'id': 'kurzgesagt',
6975 'title': 'kurzgesagt',
6976 },
6977 'playlist': [{
6978 'info_dict': {
6979 '_type': 'url',
6980 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6981 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6982 'ie_key': 'YoutubeTab',
6983 'channel': 'Kurzgesagt – In a Nutshell',
6984 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6985 'title': 'Kurzgesagt – In a Nutshell',
6986 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6987 # No longer available for search as it is set to the handle.
6988 # 'playlist_count': int,
6989 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6990 'thumbnails': list,
6991 'uploader_id': '@kurzgesagt',
6992 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
6993 'uploader': 'Kurzgesagt – In a Nutshell',
6994 'channel_is_verified': True,
6995 'channel_follower_count': int,
6996 }
6997 }],
6998 'params': {'extract_flat': True, 'playlist_items': '1'},
6999 'playlist_mincount': 1,
7000 }, {
7001 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7002 'only_matching': True,
7003 }]
7004
7005 def _real_extract(self, url):
7006 qs = parse_qs(url)
7007 query = (qs.get('search_query') or qs.get('q'))[0]
7008 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7009
7010
7011 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7012 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7013 IE_NAME = 'youtube:music:search_url'
7014 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7015 _TESTS = [{
7016 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7017 'playlist_count': 16,
7018 'info_dict': {
7019 'id': 'royalty free music',
7020 'title': 'royalty free music',
7021 }
7022 }, {
7023 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7024 'playlist_mincount': 30,
7025 'info_dict': {
7026 'id': 'royalty free music - songs',
7027 'title': 'royalty free music - songs',
7028 },
7029 'params': {'extract_flat': 'in_playlist'}
7030 }, {
7031 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7032 'playlist_mincount': 30,
7033 'info_dict': {
7034 'id': 'royalty free music - community playlists',
7035 'title': 'royalty free music - community playlists',
7036 },
7037 'params': {'extract_flat': 'in_playlist'}
7038 }]
7039
7040 _SECTIONS = {
7041 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7042 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7043 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7044 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7045 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7046 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7047 }
7048
7049 def _real_extract(self, url):
7050 qs = parse_qs(url)
7051 query = (qs.get('search_query') or qs.get('q'))[0]
7052 params = qs.get('sp', (None,))[0]
7053 if params:
7054 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7055 else:
7056 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
7057 params = self._SECTIONS.get(section)
7058 if not params:
7059 section = None
7060 title = join_nonempty(query, section, delim=' - ')
7061 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7062
7063
7064 class YoutubeFeedsInfoExtractor(InfoExtractor):
7065 """
7066 Base class for feed extractors
7067 Subclasses must re-define the _FEED_NAME property.
7068 """
7069 _LOGIN_REQUIRED = True
7070 _FEED_NAME = 'feeds'
7071
7072 def _real_initialize(self):
7073 YoutubeBaseInfoExtractor._check_login_required(self)
7074
7075 @classproperty
7076 def IE_NAME(self):
7077 return f'youtube:{self._FEED_NAME}'
7078
7079 def _real_extract(self, url):
7080 return self.url_result(
7081 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7082
7083
7084 class YoutubeWatchLaterIE(InfoExtractor):
7085 IE_NAME = 'youtube:watchlater'
7086 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7087 _VALID_URL = r':ytwatchlater'
7088 _TESTS = [{
7089 'url': ':ytwatchlater',
7090 'only_matching': True,
7091 }]
7092
7093 def _real_extract(self, url):
7094 return self.url_result(
7095 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7096
7097
7098 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7099 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7100 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7101 _FEED_NAME = 'recommended'
7102 _LOGIN_REQUIRED = False
7103 _TESTS = [{
7104 'url': ':ytrec',
7105 'only_matching': True,
7106 }, {
7107 'url': ':ytrecommended',
7108 'only_matching': True,
7109 }, {
7110 'url': 'https://youtube.com',
7111 'only_matching': True,
7112 }]
7113
7114
7115 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7116 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7117 _VALID_URL = r':ytsub(?:scription)?s?'
7118 _FEED_NAME = 'subscriptions'
7119 _TESTS = [{
7120 'url': ':ytsubs',
7121 'only_matching': True,
7122 }, {
7123 'url': ':ytsubscriptions',
7124 'only_matching': True,
7125 }]
7126
7127
7128 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7129 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7130 _VALID_URL = r':ythis(?:tory)?'
7131 _FEED_NAME = 'history'
7132 _TESTS = [{
7133 'url': ':ythistory',
7134 'only_matching': True,
7135 }]
7136
7137
7138 class YoutubeShortsAudioPivotIE(InfoExtractor):
7139 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7140 IE_NAME = 'youtube:shorts:pivot:audio'
7141 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7142 _TESTS = [{
7143 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7144 'only_matching': True,
7145 }]
7146
7147 @staticmethod
7148 def _generate_audio_pivot_params(video_id):
7149 """
7150 Generates sfv_audio_pivot browse params for this video id
7151 """
7152 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7153 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7154
7155 def _real_extract(self, url):
7156 video_id = self._match_id(url)
7157 return self.url_result(
7158 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7159 ie=YoutubeTabIE)
7160
7161
7162 class YoutubeTruncatedURLIE(InfoExtractor):
7163 IE_NAME = 'youtube:truncated_url'
7164 IE_DESC = False # Do not list
7165 _VALID_URL = r'''(?x)
7166 (?:https?://)?
7167 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7168 (?:watch\?(?:
7169 feature=[a-z_]+|
7170 annotation_id=annotation_[^&]+|
7171 x-yt-cl=[0-9]+|
7172 hl=[^&]*|
7173 t=[0-9]+
7174 )?
7175 |
7176 attribution_link\?a=[^&]+
7177 )
7178 $
7179 '''
7180
7181 _TESTS = [{
7182 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7183 'only_matching': True,
7184 }, {
7185 'url': 'https://www.youtube.com/watch?',
7186 'only_matching': True,
7187 }, {
7188 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7189 'only_matching': True,
7190 }, {
7191 'url': 'https://www.youtube.com/watch?feature=foo',
7192 'only_matching': True,
7193 }, {
7194 'url': 'https://www.youtube.com/watch?hl=en-GB',
7195 'only_matching': True,
7196 }, {
7197 'url': 'https://www.youtube.com/watch?t=2372',
7198 'only_matching': True,
7199 }]
7200
7201 def _real_extract(self, url):
7202 raise ExtractorError(
7203 'Did you forget to quote the URL? Remember that & is a meta '
7204 'character in most shells, so you want to put the URL in quotes, '
7205 'like youtube-dl '
7206 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7207 ' or simply youtube-dl BaW_jenozKc .',
7208 expected=True)
7209
7210
7211 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7212 IE_NAME = 'youtube:clip'
7213 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7214 _TESTS = [{
7215 # FIXME: Other metadata should be extracted from the clip, not from the base video
7216 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7217 'info_dict': {
7218 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7219 'ext': 'mp4',
7220 'section_start': 29.0,
7221 'section_end': 39.7,
7222 'duration': 10.7,
7223 'age_limit': 0,
7224 'availability': 'public',
7225 'categories': ['Gaming'],
7226 'channel': 'Scott The Woz',
7227 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7228 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7229 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7230 'like_count': int,
7231 'playable_in_embed': True,
7232 'tags': 'count:17',
7233 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7234 'title': 'Mobile Games on Console - Scott The Woz',
7235 'upload_date': '20210920',
7236 'uploader': 'Scott The Woz',
7237 'uploader_id': '@ScottTheWoz',
7238 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7239 'view_count': int,
7240 'live_status': 'not_live',
7241 'channel_follower_count': int,
7242 'chapters': 'count:20',
7243 'comment_count': int,
7244 'heatmap': 'count:100',
7245 }
7246 }]
7247
7248 def _real_extract(self, url):
7249 clip_id = self._match_id(url)
7250 _, data = self._extract_webpage(url, clip_id)
7251
7252 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7253 if not video_id:
7254 raise ExtractorError('Unable to find video ID')
7255
7256 clip_data = traverse_obj(data, (
7257 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7258 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7259 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7260 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7261
7262 return {
7263 '_type': 'url_transparent',
7264 'url': f'https://www.youtube.com/watch?v={video_id}',
7265 'ie_key': YoutubeIE.ie_key(),
7266 'id': clip_id,
7267 'section_start': int(clip_data['startTimeMs']) / 1000,
7268 'section_end': int(clip_data['endTimeMs']) / 1000,
7269 }
7270
7271
7272 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7273 IE_NAME = 'youtube:consent'
7274 IE_DESC = False # Do not list
7275 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7276 _TESTS = [{
7277 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7278 'info_dict': {
7279 'id': 'qVv6vCqciTM',
7280 'ext': 'mp4',
7281 'age_limit': 0,
7282 'uploader_id': '@sana_natori',
7283 'comment_count': int,
7284 'chapters': 'count:13',
7285 'upload_date': '20221223',
7286 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7287 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7288 'uploader_url': 'https://www.youtube.com/@sana_natori',
7289 'like_count': int,
7290 'release_date': '20221223',
7291 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7292 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7293 'view_count': int,
7294 'playable_in_embed': True,
7295 'duration': 4438,
7296 'availability': 'public',
7297 'channel_follower_count': int,
7298 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7299 'categories': ['Entertainment'],
7300 'live_status': 'was_live',
7301 'release_timestamp': 1671793345,
7302 'channel': 'さなちゃんねる',
7303 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7304 'uploader': 'さなちゃんねる',
7305 'channel_is_verified': True,
7306 'heatmap': 'count:100',
7307 },
7308 'add_ie': ['Youtube'],
7309 'params': {'skip_download': 'Youtube'},
7310 }]
7311
7312 def _real_extract(self, url):
7313 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7314 if not redirect_url:
7315 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7316 return self.url_result(redirect_url)
7317
7318
7319 class YoutubeTruncatedIDIE(InfoExtractor):
7320 IE_NAME = 'youtube:truncated_id'
7321 IE_DESC = False # Do not list
7322 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7323
7324 _TESTS = [{
7325 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7326 'only_matching': True,
7327 }]
7328
7329 def _real_extract(self, url):
7330 video_id = self._match_id(url)
7331 raise ExtractorError(
7332 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7333 expected=True)