]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
c305a25c 5import datetime as dt
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8828f457 14import shlex
46383212 15import sys
f8271158 16import threading
8a784c74 17import time
e0df6211 18import traceback
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
3d2623a8 25from ..networking.exceptions import HTTPError, network_exceptions
4bb4a188 26from ..utils import (
f8271158 27 NO_DEFAULT,
28 ExtractorError,
4d37720a 29 LazyList,
693f0600 30 UserNotLive,
720c3099 31 bug_reports_message,
82d02080 32 classproperty,
c5e8d7af 33 clean_html,
d92f5d5a 34 datetime_from_str,
11f9be09 35 dict_get,
a25a4243 36 filesize_from_tbr,
7a32c70d 37 filter_dict,
2d30521a 38 float_or_none,
11f9be09 39 format_field,
ff91cf74 40 get_first,
dd27fd17 41 int_or_none,
641ad5d8 42 is_html,
34921b43 43 join_nonempty,
48416bc4 44 js_to_json,
94278f72 45 mimetype2ext,
11f9be09 46 orderedSet,
6310acf5 47 parse_codecs,
49bd8c66 48 parse_count,
7c80519c 49 parse_duration,
7ea65411 50 parse_iso8601,
4dfbf869 51 parse_qs,
dca3ff4a 52 qualities,
3995d37d 53 remove_start,
cf7e015f 54 smuggle_url,
dbdaaa23 55 str_or_none,
c93d53f5 56 str_to_int,
f3aa3c3f 57 strftime_or_none,
7c365c21 58 traverse_obj,
a25a4243 59 try_call,
556dbe7f 60 try_get,
c5e8d7af
PH
61 unescapeHTML,
62 unified_strdate,
f0d785d3 63 unified_timestamp,
cf7e015f 64 unsmuggle_url,
8bdd16b4 65 update_url_query,
21c340b8 66 url_or_none,
fe93e2c4 67 urljoin,
7c365c21 68 variadic,
c5e8d7af
PH
69)
70
c795c39f 71STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
962ffcf8 72# any clients starting with _ cannot be explicitly requested by the user
000c15a4 73INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
77 'client': {
78 'clientName': 'WEB',
a0c830f4 79 'clientVersion': '2.20220801.00.00',
add96eb9 80 },
000c15a4 81 },
add96eb9 82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
000c15a4 83 },
84 'web_embedded': {
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
87 'client': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 89 'clientVersion': '1.20220731.00.00',
000c15a4 90 },
91 },
add96eb9 92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
000c15a4 93 },
94 'web_music': {
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
98 'client': {
99 'clientName': 'WEB_REMIX',
a0c830f4 100 'clientVersion': '1.20220727.01.00',
add96eb9 101 },
000c15a4 102 },
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
104 },
e7e94f2a 105 'web_creator': {
18c7683d 106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
107 'INNERTUBE_CONTEXT': {
108 'client': {
109 'clientName': 'WEB_CREATOR',
a0c830f4 110 'clientVersion': '1.20220726.00.00',
add96eb9 111 },
e7e94f2a
D
112 },
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
114 },
000c15a4 115 'android': {
18c7683d 116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 117 'INNERTUBE_CONTEXT': {
118 'client': {
119 'clientName': 'ANDROID',
7aad0654 120 'clientVersion': '19.09.37',
50ac0e54 121 'androidSdkVersion': 30,
add96eb9 122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
123 },
000c15a4 124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
add96eb9 126 'REQUIRE_JS_PLAYER': False,
000c15a4 127 },
128 'android_embedded': {
18c7683d 129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
7aad0654 133 'clientVersion': '19.09.37',
50ac0e54 134 'androidSdkVersion': 30,
add96eb9 135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
000c15a4 136 },
137 },
b6de707d 138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
add96eb9 139 'REQUIRE_JS_PLAYER': False,
000c15a4 140 },
141 'android_music': {
18c7683d 142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
7aad0654 146 'clientVersion': '6.42.52',
50ac0e54 147 'androidSdkVersion': 30,
add96eb9 148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
149 },
000c15a4 150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
add96eb9 152 'REQUIRE_JS_PLAYER': False,
000c15a4 153 },
e7e94f2a 154 'android_creator': {
18c7683d 155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
50ac0e54 159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
add96eb9 161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
e7e94f2a
D
162 },
163 },
b6de707d 164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
add96eb9 165 'REQUIRE_JS_PLAYER': False,
e7e94f2a 166 },
18c7683d 167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 169 'ios': {
18c7683d 170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS',
7aad0654 174 'clientVersion': '19.09.3',
18c7683d 175 'deviceModel': 'iPhone14,3',
add96eb9 176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
177 },
000c15a4 178 },
b6de707d 179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
add96eb9 180 'REQUIRE_JS_PLAYER': False,
000c15a4 181 },
182 'ios_embedded': {
000c15a4 183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
7aad0654 186 'clientVersion': '19.09.3',
18c7683d 187 'deviceModel': 'iPhone14,3',
add96eb9 188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
000c15a4 189 },
190 },
b6de707d 191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
add96eb9 192 'REQUIRE_JS_PLAYER': False,
000c15a4 193 },
194 'ios_music': {
18c7683d 195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'IOS_MUSIC',
7aad0654 199 'clientVersion': '6.33.3',
224b5a35 200 'deviceModel': 'iPhone14,3',
add96eb9 201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
000c15a4 202 },
203 },
b6de707d 204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
add96eb9 205 'REQUIRE_JS_PLAYER': False,
000c15a4 206 },
e7e94f2a
D
207 'ios_creator': {
208 'INNERTUBE_CONTEXT': {
209 'client': {
210 'clientName': 'IOS_CREATOR',
224b5a35
SF
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
add96eb9 213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
e7e94f2a
D
214 },
215 },
b6de707d 216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
add96eb9 217 'REQUIRE_JS_PLAYER': False,
e7e94f2a 218 },
3619f78d 219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 221 'mweb': {
18c7683d 222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 223 'INNERTUBE_CONTEXT': {
224 'client': {
225 'clientName': 'MWEB',
a0c830f4 226 'clientVersion': '2.20220801.00.00',
add96eb9 227 },
000c15a4 228 },
add96eb9 229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
e7870111
D
230 },
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
233 'tv_embedded': {
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
236 'client': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
239 },
240 },
add96eb9 241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
e7870111 242 },
cf212d0a
H
243 # This client has pre-merged video+audio 720p/1080p streams
244 'mediaconnect': {
245 'INNERTUBE_CONTEXT': {
246 'client': {
247 'clientName': 'MEDIA_CONNECT_FRONTEND',
248 'clientVersion': '0.1',
249 },
250 },
add96eb9 251 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
cf212d0a 252 },
000c15a4 253}
254
255
e7870111
D
256def _split_innertube_client(client_name):
257 variant, *base = client_name.rsplit('.', 1)
258 if base:
259 return variant, base[0], variant
260 base, *variant = client_name.split('_', 1)
261 return client_name, base, variant[0] if variant else None
262
263
c795c39f
L
264def short_client_name(client_name):
265 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
266 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
267
268
000c15a4 269def build_innertube_clients():
2e4cacd0 270 THIRD_PARTY = {
e7870111 271 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 272 }
1e75d97d 273 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
2e4cacd0 274 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 275
276 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 277 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 278 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 279 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 280 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 281
e7870111 282 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 283 ytcfg['priority'] = 10 * priority(base_client)
284
e48b3875 285 if not variant:
e7870111
D
286 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
287 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
288 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
289 embedscreen['priority'] -= 3
290 elif variant == 'embedded':
e48b3875 291 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 292 ytcfg['priority'] -= 2
e48b3875 293 else:
000c15a4 294 ytcfg['priority'] -= 3
295
296
297build_innertube_clients()
298
299
c26f9b99 300class BadgeType(enum.Enum):
301 AVAILABILITY_UNLISTED = enum.auto()
302 AVAILABILITY_PRIVATE = enum.auto()
303 AVAILABILITY_PUBLIC = enum.auto()
304 AVAILABILITY_PREMIUM = enum.auto()
305 AVAILABILITY_SUBSCRIPTION = enum.auto()
306 LIVE_NOW = enum.auto()
14a14335 307 VERIFIED = enum.auto()
c26f9b99 308
309
de7f3446 310class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 311 """Provide base functions for Youtube extractors"""
e00eb564 312
3462ffa8 313 _RESERVED_NAMES = (
08e29b9f 314 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 315 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 316 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 317 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 318
3619f78d 319 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
320
52efa4b3 321 # _NETRC_MACHINE = 'youtube'
3619f78d 322
b2e8bc1b
JMF
323 # If True it will raise an error if no login info is provided
324 _LOGIN_REQUIRED = False
325
d9190e44
RH
326 _INVIDIOUS_SITES = (
327 # invidious-redirect websites
328 r'(?:www\.)?redirect\.invidious\.io',
329 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 330 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
331 r'(?:www\.)?invidious\.pussthecat\.org',
332 r'(?:www\.)?invidious\.zee\.li',
333 r'(?:www\.)?invidious\.ethibox\.fr',
05799a48
RH
334 r'(?:www\.)?iv\.ggtyler\.dev',
335 r'(?:www\.)?inv\.vern\.i2p',
336 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
337 r'(?:www\.)?inv\.riverside\.rocks',
338 r'(?:www\.)?invidious\.silur\.me',
339 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
340 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
341 r'(?:www\.)?invidious\.slipfox\.xyz',
342 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
343 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
344 r'(?:www\.)?invidious\.tiekoetter\.com',
345 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
346 r'(?:www\.)?invidious\.nerdvpn\.de',
347 r'(?:www\.)?invidious\.weblibre\.org',
348 r'(?:www\.)?inv\.odyssey346\.dev',
349 r'(?:www\.)?invidious\.dhusch\.de',
350 r'(?:www\.)?iv\.melmac\.space',
351 r'(?:www\.)?watch\.thekitty\.zone',
352 r'(?:www\.)?invidious\.privacydev\.net',
353 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
354 r'(?:www\.)?invidious\.drivet\.xyz',
355 r'(?:www\.)?vid\.priv\.au',
356 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
357 r'(?:www\.)?inv\.vern\.cc',
358 r'(?:www\.)?invidious\.esmailelbob\.xyz',
359 r'(?:www\.)?invidious\.sethforprivacy\.com',
360 r'(?:www\.)?yt\.oelrichsgarcia\.de',
361 r'(?:www\.)?yt\.artemislena\.eu',
362 r'(?:www\.)?invidious\.flokinet\.to',
363 r'(?:www\.)?invidious\.baczek\.me',
364 r'(?:www\.)?y\.com\.sb',
365 r'(?:www\.)?invidious\.epicsite\.xyz',
366 r'(?:www\.)?invidious\.lidarshield\.cloud',
367 r'(?:www\.)?yt\.funami\.tech',
d9190e44 368 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
369 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
370 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
371 # youtube-dl invidious instances list
372 r'(?:(?:www|no)\.)?invidiou\.sh',
373 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
374 r'(?:www\.)?invidious\.kabi\.tk',
375 r'(?:www\.)?invidious\.mastodon\.host',
376 r'(?:www\.)?invidious\.zapashcanon\.fr',
377 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
378 r'(?:www\.)?invidious\.tinfoil-hat\.net',
379 r'(?:www\.)?invidious\.himiko\.cloud',
380 r'(?:www\.)?invidious\.reallyancient\.tech',
381 r'(?:www\.)?invidious\.tube',
382 r'(?:www\.)?invidiou\.site',
383 r'(?:www\.)?invidious\.site',
384 r'(?:www\.)?invidious\.xyz',
385 r'(?:www\.)?invidious\.nixnet\.xyz',
386 r'(?:www\.)?invidious\.048596\.xyz',
387 r'(?:www\.)?invidious\.drycat\.fr',
388 r'(?:www\.)?inv\.skyn3t\.in',
389 r'(?:www\.)?tube\.poal\.co',
390 r'(?:www\.)?tube\.connect\.cafe',
391 r'(?:www\.)?vid\.wxzm\.sx',
392 r'(?:www\.)?vid\.mint\.lgbt',
393 r'(?:www\.)?vid\.puffyan\.us',
394 r'(?:www\.)?yewtu\.be',
395 r'(?:www\.)?yt\.elukerio\.org',
396 r'(?:www\.)?yt\.lelux\.fi',
397 r'(?:www\.)?invidious\.ggc-project\.de',
398 r'(?:www\.)?yt\.maisputain\.ovh',
399 r'(?:www\.)?ytprivate\.com',
400 r'(?:www\.)?invidious\.13ad\.de',
401 r'(?:www\.)?invidious\.toot\.koeln',
402 r'(?:www\.)?invidious\.fdn\.fr',
403 r'(?:www\.)?watch\.nettohikari\.com',
404 r'(?:www\.)?invidious\.namazso\.eu',
405 r'(?:www\.)?invidious\.silkky\.cloud',
406 r'(?:www\.)?invidious\.exonip\.de',
407 r'(?:www\.)?invidious\.riverside\.rocks',
408 r'(?:www\.)?invidious\.blamefran\.net',
409 r'(?:www\.)?invidious\.moomoo\.de',
410 r'(?:www\.)?ytb\.trom\.tf',
411 r'(?:www\.)?yt\.cyberhost\.uk',
412 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
413 r'(?:www\.)?qklhadlycap4cnod\.onion',
414 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
415 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
416 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
417 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
418 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
419 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
420 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
421 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
422 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
423 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
424 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
425 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 426 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 427 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 428 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
429 r'(?:www\.)?watch\.whatever\.social',
430 r'(?:www\.)?piped\.garudalinux\.org',
431 r'(?:www\.)?piped\.rivo\.lol',
432 r'(?:www\.)?piped-libre\.kavin\.rocks',
433 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 434 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
435 r'(?:www\.)?il\.ax',
436 r'(?:www\.)?piped\.esmailelbob\.xyz',
437 r'(?:www\.)?piped\.projectsegfau\.lt',
438 r'(?:www\.)?piped\.privacydev\.net',
439 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
440 r'(?:www\.)?piped\.smnz\.de',
441 r'(?:www\.)?piped\.adminforge\.de',
442 r'(?:www\.)?watch\.whatevertinfoil\.de',
443 r'(?:www\.)?piped\.qdi\.fi',
6a9c7a2b 444 r'(?:(?:www|cf)\.)?piped\.video',
bc87dac7 445 r'(?:www\.)?piped\.aeong\.one',
05799a48
RH
446 r'(?:www\.)?piped\.moomoo\.me',
447 r'(?:www\.)?piped\.chauvet\.pro',
448 r'(?:www\.)?watch\.leptons\.xyz',
449 r'(?:www\.)?pd\.vern\.cc',
450 r'(?:www\.)?piped\.hostux\.net',
451 r'(?:www\.)?piped\.lunar\.icu',
78a78fa7
BG
452 # Hyperpipe instances from https://hyperpipe.codeberg.page/
453 r'(?:www\.)?hyperpipe\.surge\.sh',
454 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
455 r'(?:www\.)?listen\.whatever\.social',
456 r'(?:www\.)?music\.adminforge\.de',
d9190e44
RH
457 )
458
c26f9b99 459 # extracted from account/account_menu ep
460 # XXX: These are the supported YouTube UI and API languages,
461 # which is slightly different from languages supported for translation in YouTube studio
462 _SUPPORTED_LANG_CODES = [
463 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
464 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
465 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
466 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
467 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
add96eb9 468 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
c26f9b99 469 ]
470
a057779d 471 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
472
7666b936 473 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
474 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
475
476 def ucid_or_none(self, ucid):
477 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
478
479 def handle_or_none(self, handle):
480 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
481
482 def handle_from_url(self, url):
483 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
484 url, 'channel handle', default=None)
485
486 def ucid_from_url(self, url):
487 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
488 url, 'channel id', default=None)
489
c26f9b99 490 @functools.cached_property
491 def _preferred_lang(self):
492 """
493 Returns a language code supported by YouTube for the user preferred language.
494 Returns None if no preferred language set.
495 """
496 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
497 if not preferred_lang:
498 return
499 if preferred_lang not in self._SUPPORTED_LANG_CODES:
500 raise ExtractorError(
501 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
502 expected=True)
503 elif preferred_lang != 'en':
504 self.report_warning(
505 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
506 return preferred_lang
507
cce889b9 508 def _initialize_consent(self):
509 cookies = self._get_cookies('https://www.youtube.com/')
510 if cookies.get('__Secure-3PSID'):
511 return
378ae9f9 512 socs = cookies.get('SOCS')
513 if socs and not socs.value.startswith('CAA'): # not consented
514 return
515 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
8d81f3e3 516
f3aa3c3f 517 def _initialize_pref(self):
518 cookies = self._get_cookies('https://www.youtube.com/')
519 pref_cookie = cookies.get('PREF')
520 pref = {}
521 if pref_cookie:
522 try:
14f25df2 523 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 524 except ValueError:
525 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 526 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 527 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 528
b2e8bc1b 529 def _real_initialize(self):
f3aa3c3f 530 self._initialize_pref()
cce889b9 531 self._initialize_consent()
a25bca9f 532 self._check_login_required()
533
534 def _check_login_required(self):
24146491 535 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 536 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 537
b7c47b74 538 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
539 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 540
000c15a4 541 def _get_default_ytcfg(self, client='web'):
542 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 543
000c15a4 544 def _get_innertube_host(self, client='web'):
545 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 546
000c15a4 547 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 548 # try_get but with fallback to default ytcfg client values when present
549 _func = lambda y: try_get(y, getter, expected_type)
550 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
551
000c15a4 552 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 553 return self._ytcfg_get_safe(
554 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 555 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 556
000c15a4 557 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 558 return self._ytcfg_get_safe(
559 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 560 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 561
2ae778b8 562 def _select_api_hostname(self, req_api_hostname, default_client=None):
563 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
564 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
565
000c15a4 566 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 567 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 568
000c15a4 569 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 570 context = get_first(
571 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 572 # Enforce language and tz for extraction
573 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 574 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 575 return context
576
cf87314d 577 _SAPISID = None
578
109dd3b2 579 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 580 time_now = round(time.time())
cf87314d 581 if self._SAPISID is None:
582 yt_cookies = self._get_cookies('https://www.youtube.com')
583 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
584 # See: https://github.com/yt-dlp/yt-dlp/issues/393
585 sapisid_cookie = dict_get(
586 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
587 if sapisid_cookie and sapisid_cookie.value:
588 self._SAPISID = sapisid_cookie.value
589 self.write_debug('Extracted SAPISID cookie')
590 # SAPISID cookie is required if not already present
591 if not yt_cookies.get('SAPISID'):
592 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
593 self._set_cookie(
594 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
595 else:
596 self._SAPISID = False
597 if not self._SAPISID:
598 return None
1974e99f 599 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
600 sapisidhash = hashlib.sha1(
86e5f3ed 601 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 602 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
603
604 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 605 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 606 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 607
109dd3b2 608 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 609 data.update(query)
11f9be09 610 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 611 real_headers.update({'content-type': 'application/json'})
612 if headers:
613 real_headers.update(headers)
2ae778b8 614 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
615 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 616 return self._download_json(
2ae778b8 617 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 618 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 619 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 620 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 621
65141660 622 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
623 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 624
99e9e001 625 @staticmethod
626 def _extract_session_index(*data):
627 """
628 Index of current account in account list.
629 See: https://github.com/yt-dlp/yt-dlp/pull/519
630 """
631 for ytcfg in data:
632 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
633 if session_index is not None:
634 return session_index
635
636 # Deprecated?
637 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 638 if ytcfg:
14f25df2 639 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
640 if token:
641 return token
99e9e001 642 if webpage:
643 return self._search_regex(
644 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
645 'identity token', default=None, fatal=False)
a1c5d2ca
M
646
647 @staticmethod
fe93e2c4 648 def _extract_account_syncid(*args):
8ea3f7b9 649 """
650 Extract syncId required to download private playlists of secondary channels
fe93e2c4 651 @params response and/or ytcfg
8ea3f7b9 652 """
fe93e2c4 653 for data in args:
654 # ytcfg includes channel_syncid if on secondary channel
14f25df2 655 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 656 if delegated_sid:
657 return delegated_sid
658 sync_ids = (try_get(
659 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 660 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 661 if len(sync_ids) >= 2 and sync_ids[1]:
662 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
663 # and just "user_syncid||" for primary channel. We only want the channel_syncid
664 return sync_ids[0]
a1c5d2ca 665
ac56cf38 666 @staticmethod
667 def _extract_visitor_data(*args):
668 """
669 Extracts visitorData from an API response or ytcfg
670 Appears to be used to track session state
671 """
9222c381 672 return get_first(
6c73052c 673 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 674 expected_type=str)
ac56cf38 675
2762dbb1 676 @functools.cached_property
99e9e001 677 def is_authenticated(self):
678 return bool(self._generate_sapisidhash_header())
679
11f9be09 680 def extract_ytcfg(self, video_id, webpage):
8c54a305 681 if not webpage:
682 return {}
29f7c58a 683 return self._parse_json(
684 self._search_regex(
685 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 686 default='{}'), video_id, fatal=False) or {}
687
11f9be09 688 def generate_api_headers(
99e9e001 689 self, *, ytcfg=None, account_syncid=None, session_index=None,
690 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
691
2ae778b8 692 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 693 headers = {
14f25df2 694 'X-YouTube-Client-Name': str(
11f9be09 695 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
696 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 697 'Origin': origin,
698 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
699 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 700 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
add96eb9 701 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
99e9e001 702 }
703 if session_index is None:
314ee305 704 session_index = self._extract_session_index(ytcfg)
705 if account_syncid or session_index is not None:
706 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 707
109dd3b2 708 auth = self._generate_sapisidhash_header(origin)
f4f751af 709 if auth is not None:
710 headers['Authorization'] = auth
109dd3b2 711 headers['X-Origin'] = origin
7a32c70d 712 return filter_dict(headers)
29f7c58a 713
a25bca9f 714 def _download_ytcfg(self, client, video_id):
715 url = {
716 'web': 'https://www.youtube.com',
717 'web_music': 'https://music.youtube.com',
add96eb9 718 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
a25bca9f 719 }.get(client)
720 if not url:
721 return {}
722 webpage = self._download_webpage(
723 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
724 return self.extract_ytcfg(video_id, webpage) or {}
725
2d6659b9 726 @staticmethod
727 def _build_api_continuation_query(continuation, ctp=None):
728 query = {
add96eb9 729 'continuation': continuation,
2d6659b9 730 }
731 # TODO: Inconsistency with clickTrackingParams.
732 # Currently we have a fixed ctp contained within context (from ytcfg)
733 # and a ctp in root query for continuation.
734 if ctp:
735 query['clickTracking'] = {'clickTrackingParams': ctp}
736 return query
737
2d6659b9 738 @classmethod
739 def _extract_next_continuation_data(cls, renderer):
740 next_continuation = try_get(
741 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
742 lambda x: x['continuation']['reloadContinuationData']), dict)
743 if not next_continuation:
744 return
745 continuation = next_continuation.get('continuation')
746 if not continuation:
747 return
748 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 749 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 750
751 @classmethod
752 def _extract_continuation_ep_data(cls, continuation_ep: dict):
753 if isinstance(continuation_ep, dict):
754 continuation = try_get(
14f25df2 755 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 756 if not continuation:
757 return
758 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 759 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 760
761 @classmethod
762 def _extract_continuation(cls, renderer):
763 next_continuation = cls._extract_next_continuation_data(renderer)
764 if next_continuation:
765 return next_continuation
fe93e2c4 766
7a32c70d 767 return traverse_obj(renderer, (
768 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
add96eb9 769 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
7a32c70d 770 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 771
fe93e2c4 772 @classmethod
773 def _extract_alerts(cls, data):
109dd3b2 774 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
775 if not isinstance(alert_dict, dict):
776 continue
777 for alert in alert_dict.values():
778 alert_type = alert.get('type')
779 if not alert_type:
780 continue
052e1350 781 message = cls._get_text(alert, 'text')
109dd3b2 782 if message:
783 yield alert_type, message
784
c0ac49bc 785 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 786 errors, warnings = [], []
109dd3b2 787 for alert_type, alert_message in alerts:
641ad5d8 788 if alert_type.lower() == 'error' and fatal:
109dd3b2 789 errors.append([alert_type, alert_message])
a057779d 790 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 791 warnings.append([alert_type, alert_message])
792
793 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 794 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 795 if errors:
add96eb9 796 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
109dd3b2 797
798 def _extract_and_report_alerts(self, data, *args, **kwargs):
799 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
800
14a14335 801 def _extract_badges(self, badge_list: list):
802 """
803 Extract known BadgeType's from a list of badge renderers.
804 @returns [{'type': BadgeType}]
805 """
806 icon_type_map = {
c26f9b99 807 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
808 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
14a14335 809 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
810 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
811 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
8213ce28 812 'CHECK': BadgeType.VERIFIED,
c26f9b99 813 }
814
815 badge_style_map = {
816 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
817 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
14a14335 818 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
8213ce28 819 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
ad54c913 820 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
c26f9b99 821 }
822
823 label_map = {
824 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
825 'private': BadgeType.AVAILABILITY_PRIVATE,
826 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
827 'live': BadgeType.LIVE_NOW,
14a14335 828 'premium': BadgeType.AVAILABILITY_PREMIUM,
8213ce28 829 'verified': BadgeType.VERIFIED,
ad54c913 830 'official artist channel': BadgeType.VERIFIED,
c26f9b99 831 }
832
833 badges = []
14a14335 834 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
c26f9b99 835 badge_type = (
14a14335 836 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
c26f9b99 837 or badge_style_map.get(traverse_obj(badge, 'style'))
838 )
839 if badge_type:
840 badges.append({'type': badge_type})
841 continue
842
843 # fallback, won't work in some languages
14a14335 844 label = traverse_obj(
845 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
c26f9b99 846 for match, label_badge_type in label_map.items():
847 if match in label.lower():
14a14335 848 badges.append({'type': label_badge_type})
849 break
c26f9b99 850
47193e02 851 return badges
852
c26f9b99 853 @staticmethod
854 def _has_badge(badges, badge_type):
855 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
856
47193e02 857 @staticmethod
052e1350 858 def _get_text(data, *path_list, max_runs=None):
859 for path in path_list or [None]:
860 if path is None:
861 obj = [data]
862 else:
863 obj = traverse_obj(data, path, default=[])
864 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
865 obj = [obj]
866 for item in obj:
14f25df2 867 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 868 if text:
869 return text
870 runs = try_get(item, lambda x: x['runs'], list) or []
871 if not runs and isinstance(item, list):
872 runs = item
873
874 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 875 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 876 if text:
877 return text
47193e02 878
f0d785d3 879 def _get_count(self, data, *path_list):
880 count_text = self._get_text(data, *path_list) or ''
881 count = parse_count(count_text)
882 if count is None:
883 count = str_to_int(
884 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
885 return count
886
a709d873 887 @staticmethod
888 def _extract_thumbnails(data, *path_list):
889 """
890 Extract thumbnails from thumbnails dict
891 @param path_list: path list to level that contains 'thumbnails' key
892 """
893 thumbnails = []
894 for path in path_list or [()]:
6839ae1f 895 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
a709d873 896 thumbnail_url = url_or_none(thumbnail.get('url'))
897 if not thumbnail_url:
898 continue
899 # Sometimes youtube gives a wrong thumbnail URL. See:
900 # https://github.com/yt-dlp/yt-dlp/issues/233
901 # https://github.com/ytdl-org/youtube-dl/issues/28023
902 if 'maxresdefault' in thumbnail_url:
903 thumbnail_url = thumbnail_url.split('?')[0]
904 thumbnails.append({
905 'url': thumbnail_url,
906 'height': int_or_none(thumbnail.get('height')),
907 'width': int_or_none(thumbnail.get('width')),
908 })
909 return thumbnails
910
f3aa3c3f 911 @staticmethod
912 def extract_relative_time(relative_time_text):
913 """
914 Extracts a relative time from string and converts to dt object
2fb35f60 915 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
f3aa3c3f 916 """
2fb35f60 917
5ca095cb 918 # XXX: this could be moved to a general function in utils/_utils.py
2fb35f60 919 # The relative time text strings are roughly the same as what
920 # Javascript's Intl.RelativeTimeFormat function generates.
921 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
922 mobj = re.search(
923 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
924 relative_time_text)
f3aa3c3f 925 if mobj:
f0d785d3 926 start = mobj.group('start')
927 if start:
928 return datetime_from_str(start)
f3aa3c3f 929 try:
add96eb9 930 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
f3aa3c3f 931 except ValueError:
932 return None
933
c26f9b99 934 def _parse_time_text(self, text):
935 if not text:
936 return
c305a25c 937 dt_ = self.extract_relative_time(text)
f3aa3c3f 938 timestamp = None
c305a25c 939 if isinstance(dt_, dt.datetime):
940 timestamp = calendar.timegm(dt_.timetuple())
f0d785d3 941
942 if timestamp is None:
943 timestamp = (
944 unified_timestamp(text) or unified_timestamp(
945 self._search_regex(
17322130 946 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 947 text.lower(), 'time text', default=None)))
f0d785d3 948
c26f9b99 949 if text and timestamp is None and self._preferred_lang in (None, 'en'):
950 self.report_warning(
951 f'Cannot parse localized time text "{text}"', only_once=True)
952 return timestamp
f3aa3c3f 953
109dd3b2 954 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
955 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 956 default_client='web'):
eb5bdbfa 957 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
958 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
959 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
960 icd_rm = next(icd_retries)
961 main_retries = iter(self.RetryManager())
962 main_rm = next(main_retries)
feebf6d0
SS
963 # Manual retry loop for multiple RetryManagers
964 # The proper RetryManager MUST be advanced after an error
b634ba74 965 # and its result MUST be checked if the manager is non fatal
feebf6d0 966 while True:
109dd3b2 967 try:
968 response = self._call_api(
969 ep=ep, fatal=True, headers=headers,
be5c1ae8 970 video_id=item_id, query=query, note=note,
109dd3b2 971 context=self._extract_context(ytcfg, default_client),
972 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 973 api_hostname=api_hostname, default_client=default_client)
109dd3b2 974 except ExtractorError as e:
be5c1ae8 975 if not isinstance(e.cause, network_exceptions):
976 return self._error_or_warning(e, fatal=fatal)
3d2623a8 977 elif not isinstance(e.cause, HTTPError):
eb5bdbfa 978 main_rm.error = e
979 next(main_retries)
be5c1ae8 980 continue
109dd3b2 981
3d2623a8 982 first_bytes = e.cause.response.read(512)
be5c1ae8 983 if not is_html(first_bytes):
984 yt_error = try_get(
985 self._parse_json(
3d2623a8 986 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
be5c1ae8 987 lambda x: x['error']['message'], str)
988 if yt_error:
989 self._report_alerts([('ERROR', yt_error)], fatal=False)
990 # Downloading page may result in intermittent 5xx HTTP error
eb5bdbfa 991 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
be5c1ae8 992 # We also want to catch all other network exceptions since errors in later pages can be troublesome
993 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
3d2623a8 994 if e.cause.status not in (403, 429):
eb5bdbfa 995 main_rm.error = e
996 next(main_retries)
be5c1ae8 997 continue
998 return self._error_or_warning(e, fatal=fatal)
999
1000 try:
1001 self._extract_and_report_alerts(response, only_once=True)
1002 except ExtractorError as e:
eb5bdbfa 1003 # YouTube's servers may return errors we want to retry on in a 200 OK response
be5c1ae8 1004 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1005 if 'unknown error' in e.msg.lower():
eb5bdbfa 1006 main_rm.error = e
1007 next(main_retries)
be5c1ae8 1008 continue
1009 return self._error_or_warning(e, fatal=fatal)
1010 # Youtube sometimes sends incomplete data
1011 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1012 if not traverse_obj(response, *variadic(check_get_keys)):
eb5bdbfa 1013 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1014 should_retry = next(icd_retries, None)
1015 if not should_retry:
1016 return None
be5c1ae8 1017 continue
1018
1019 return response
109dd3b2 1020
9297939e 1021 @staticmethod
1022 def is_music_url(url):
5b28cef7 1023 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
9297939e 1024
30a074c2 1025 def _extract_video(self, renderer):
1026 video_id = renderer.get('videoId')
4dc23a80
M
1027
1028 reel_header_renderer = traverse_obj(renderer, (
1029 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1030 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1031
1032 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 1033 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
1034
1035 duration = int_or_none(renderer.get('lengthSeconds'))
1036 if duration is None:
1037 duration = parse_duration(self._get_text(
1038 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 1039 if duration is None:
4dc23a80 1040 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 1041 duration = parse_duration(self._search_regex(
1042 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1043 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1044 video_id, default=None, group='duration'))
1045
f3aa3c3f 1046 channel_id = traverse_obj(
a44ca5a4 1047 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1048 expected_type=str, get_all=False)
4dc23a80
M
1049 if not channel_id:
1050 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1051
7666b936 1052 channel_id = self.ucid_or_none(channel_id)
1053
f3aa3c3f 1054 overlay_style = traverse_obj(
a44ca5a4 1055 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1056 get_all=False, expected_type=str)
14a14335 1057 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
8213ce28 1058 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
fd2ad7cb 1059 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 1060 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1061 expected_type=str)) or ''
fd2ad7cb 1062 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 1063 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 1064 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 1065
4dc23a80
M
1066 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1067 or self._get_text(reel_header_renderer, 'timestampText') or '')
1068 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1069
867c66ff
M
1070 live_status = (
1071 'is_upcoming' if scheduled_timestamp is not None
1072 else 'was_live' if 'streamed' in time_text.lower()
1073 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1074 else None)
1075
4dc23a80
M
1076 # videoInfo is a string like '50K views • 10 years ago'.
1077 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1078 view_count = (0 if 'no views' in view_count_text.lower()
1079 else self._get_count({'simpleText': view_count_text}))
1080 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1081
93e12ed7 1082 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1083 or self._get_text(reel_header_renderer, 'channelTitleText'))
1084
1085 channel_handle = traverse_obj(renderer, (
1086 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1087 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1088 expected_type=self.handle_from_url, get_all=False)
30a074c2 1089 return {
39ed931e 1090 '_type': 'url',
30a074c2 1091 'ie_key': YoutubeIE.ie_key(),
1092 'id': video_id,
fd2ad7cb 1093 'url': url,
30a074c2 1094 'title': title,
1095 'description': description,
1096 'duration': duration,
f3aa3c3f 1097 'channel_id': channel_id,
93e12ed7 1098 'channel': channel,
4dc23a80 1099 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
93e12ed7 1100 'uploader': channel,
1101 'uploader_id': channel_handle,
1102 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4dc23a80 1103 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 1104 'timestamp': (self._parse_time_text(time_text)
1105 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1106 else None),
f3aa3c3f 1107 'release_timestamp': scheduled_timestamp,
c26f9b99 1108 'availability':
1109 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1110 else self._availability(
1111 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1112 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1113 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 1114 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 1115 view_count_field: view_count,
14a14335 1116 'live_status': live_status,
add96eb9 1117 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
30a074c2 1118 }
1119
0c148415 1120
360e1ca5 1121class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1122 IE_DESC = 'YouTube'
add96eb9 1123 _VALID_URL = r'''(?x)^
c5e8d7af 1124 (
edb53e2d 1125 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1126 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1127 (?:www\.)?deturl\.com/www\.youtube\.com|
1128 (?:www\.)?pwnyoutube\.com|
1129 (?:www\.)?hooktube\.com|
1130 (?:www\.)?yourepeat\.com|
1131 tube\.majestyc\.net|
add96eb9 1132 {invidious}|
bc2ca1bb 1133 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1134 (?:.*?\#/)? # handle anchor (#/) redirect urls
1135 (?: # the various things that can precede the ID:
dad2210c 1136 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1137 |(?: # or the v= param in all its forms
f7000f3a 1138 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1139 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1140 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1141 v=
1142 )
f4b05232 1143 ))
cbaed4bb
S
1144 |(?:
1145 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1146 vid\.plus| # or vid.plus/xxxx
1147 zwearz\.com/watch| # or zwearz.com/watch/xxxx
add96eb9 1148 {invidious}
cbaed4bb 1149 )/
edb53e2d 1150 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1151 )
c5e8d7af 1152 )? # all until now is optional -> you can pass the naked ID
add96eb9 1153 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
c5e8d7af 1154 (?(1).+)? # if we found the ID, everything can follow
add96eb9 1155 (?:\#|$)'''.format(
1156 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1157 )
7c6eb424 1158 _EMBED_REGEX = [
1159 r'''(?x)
1160 (?:
0ca0f881 1161 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1162 data-video-url=|
1163 <embed[^>]+?src=|
1164 embedSWF\(?:\s*|
1165 <object[^>]+data=|
1166 new\s+SWFObject\(
1167 )
1168 (["\'])
1169 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1170 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1171 \1''',
1172 # https://wordpress.org/plugins/lazy-load-for-videos/
1173 r'''(?xs)
1174 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1175 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1176 ]
6368e2e6 1177 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1178
e40c758c 1179 _PLAYER_INFO_RE = (
cc2db878 1180 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1181 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1182 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1183 )
85ec2a33 1184 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
c2d3cb4c 1185 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1186 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1187 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1188 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1189 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1190 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1191 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1192 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1193 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1194 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1195 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1196 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1197 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1198 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1199 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1200 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1201 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1202 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1203
1204
1205 # 3D videos
c2d3cb4c 1206 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1207 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1208 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1209 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1210 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1211 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1212 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1213
96fb5605 1214 # Apple HTTP Live Streaming
11f12195 1215 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1216 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1217 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1218 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1219 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1220 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1221 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1222 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1223
1224 # DASH mp4 video
d23028a8
S
1225 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1226 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1227 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1228 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1229 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1230 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1231 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1232 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1233 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1234 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1235 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1236 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1237
f6f1fc92 1238 # Dash mp4 audio
d23028a8
S
1239 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1240 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1241 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1242 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1243 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1244 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1245 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1246
1247 # Dash webm
d23028a8
S
1248 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1249 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1250 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1251 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1252 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1253 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1254 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1255 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1256 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1257 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1258 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1260 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1261 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1262 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1263 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1264 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1265 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1266 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1267 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1268 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1269 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1270
1271 # Dash webm audio
d23028a8
S
1272 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1273 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1274
0857baad 1275 # Dash webm audio with opus inside
d23028a8
S
1276 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1277 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1278 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1279
ce6b9a2d
PH
1280 # RTMP (unnamed)
1281 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1282
1283 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1284 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1285 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1286 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1287 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1288 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1289 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1290 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1291 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1292 }
29f7c58a 1293 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1294
fd5c4aab
S
1295 _GEO_BYPASS = False
1296
78caa52a 1297 IE_NAME = 'youtube'
2eb88d95
PH
1298 _TESTS = [
1299 {
2d3d2997 1300 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1301 'info_dict': {
1302 'id': 'BaW_jenozKc',
1303 'ext': 'mp4',
3867038a 1304 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
ff9f925b 1305 'channel': 'Philipp Hagemeister',
dd4c4492
S
1306 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1307 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1308 'upload_date': '20121002',
ff9f925b 1309 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1310 'categories': ['Science & Technology'],
3867038a 1311 'tags': ['youtube-dl'],
556dbe7f 1312 'duration': 10,
dbdaaa23 1313 'view_count': int,
3e7c1224 1314 'like_count': int,
ff9f925b 1315 'availability': 'public',
1316 'playable_in_embed': True,
1317 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1318 'live_status': 'not_live',
1319 'age_limit': 0,
7c80519c 1320 'start_time': 1,
297a564b 1321 'end_time': 9,
12a1b225 1322 'comment_count': int,
7666b936 1323 'channel_follower_count': int,
1324 'uploader': 'Philipp Hagemeister',
1325 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1326 'uploader_id': '@PhilippHagemeister',
5caf30db 1327 'heatmap': 'count:100',
96a134de 1328 'timestamp': 1349198244,
add96eb9 1329 },
0e853ca4 1330 },
fccd3771 1331 {
4bc3a23e
PH
1332 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1333 'note': 'Embed-only video (#1746)',
1334 'info_dict': {
1335 'id': 'yZIXLfi8CZQ',
1336 'ext': 'mp4',
1337 'upload_date': '20120608',
1338 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1339 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
94bfcd23 1340 'age_limit': 18,
545cc85d 1341 },
1342 'skip': 'Private video',
fccd3771 1343 },
11b56058 1344 {
8bdd16b4 1345 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1346 'note': 'Use the first video ID in the URL',
1347 'info_dict': {
1348 'id': 'BaW_jenozKc',
1349 'ext': 'mp4',
3867038a 1350 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976ae3ea 1351 'channel': 'Philipp Hagemeister',
1352 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1353 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1354 'upload_date': '20121002',
976ae3ea 1355 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1356 'categories': ['Science & Technology'],
3867038a 1357 'tags': ['youtube-dl'],
556dbe7f 1358 'duration': 10,
dbdaaa23 1359 'view_count': int,
11b56058 1360 'like_count': int,
976ae3ea 1361 'availability': 'public',
1362 'playable_in_embed': True,
1363 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1364 'live_status': 'not_live',
1365 'age_limit': 0,
12a1b225 1366 'comment_count': int,
7666b936 1367 'channel_follower_count': int,
1368 'uploader': 'Philipp Hagemeister',
1369 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1370 'uploader_id': '@PhilippHagemeister',
14a14335 1371 'heatmap': 'count:100',
96a134de 1372 'timestamp': 1349198244,
34a7de29
S
1373 },
1374 'params': {
1375 'skip_download': True,
1376 },
11b56058 1377 },
dd27fd17 1378 {
2d3d2997 1379 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1380 'note': '256k DASH audio (format 141) via DASH manifest',
1381 'info_dict': {
1382 'id': 'a9LDPn-MO4I',
1383 'ext': 'm4a',
1384 'upload_date': '20121002',
4bc3a23e 1385 'description': '',
add96eb9 1386 'title': 'UHDTV TEST 8K VIDEO.mp4',
4919603f 1387 },
4bc3a23e
PH
1388 'params': {
1389 'youtube_include_dash_manifest': True,
1390 'format': '141',
4919603f 1391 },
de3c7fe0 1392 'skip': 'format 141 not served anymore',
dd27fd17 1393 },
8bdd16b4 1394 # DASH manifest with encrypted signature
1395 {
1396 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1397 'info_dict': {
1398 'id': 'IB3lcPjvWLA',
1399 'ext': 'm4a',
1400 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1401 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1402 'duration': 244,
8bdd16b4 1403 'upload_date': '20131011',
cc2db878 1404 'abr': 129.495,
976ae3ea 1405 'like_count': int,
1406 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1407 'playable_in_embed': True,
1408 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1409 'view_count': int,
1410 'track': 'The Spark',
1411 'live_status': 'not_live',
1412 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1413 'channel': 'Afrojack',
976ae3ea 1414 'tags': 'count:19',
1415 'availability': 'public',
1416 'categories': ['Music'],
1417 'age_limit': 0,
1418 'alt_title': 'The Spark',
7666b936 1419 'channel_follower_count': int,
1420 'uploader': 'Afrojack',
1421 'uploader_url': 'https://www.youtube.com/@Afrojack',
1422 'uploader_id': '@Afrojack',
8bdd16b4 1423 },
1424 'params': {
1425 'youtube_include_dash_manifest': True,
1426 'format': '141/bestaudio[ext=m4a]',
1427 },
1428 },
65c2fde2 1429 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1430 {
65c2fde2 1431 'note': 'Embed allowed age-gate video',
2d3d2997 1432 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1433 'info_dict': {
1434 'id': 'HtVdAasjOgU',
1435 'ext': 'mp4',
1436 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1437 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1438 'duration': 142,
c522adb1 1439 'upload_date': '20140605',
34952f09 1440 'age_limit': 18,
976ae3ea 1441 'categories': ['Gaming'],
1442 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1443 'availability': 'needs_auth',
1444 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1445 'like_count': int,
1446 'channel': 'The Witcher',
1447 'live_status': 'not_live',
1448 'tags': 'count:17',
1449 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1450 'playable_in_embed': True,
1451 'view_count': int,
7666b936 1452 'channel_follower_count': int,
1453 'uploader': 'The Witcher',
1454 'uploader_url': 'https://www.youtube.com/@thewitcher',
1455 'uploader_id': '@thewitcher',
14a14335 1456 'comment_count': int,
8213ce28 1457 'channel_is_verified': True,
14a14335 1458 'heatmap': 'count:100',
96a134de 1459 'timestamp': 1401991663,
c522adb1
JMF
1460 },
1461 },
65c2fde2 1462 {
1463 'note': 'Age-gate video with embed allowed in public site',
1464 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1465 'info_dict': {
1466 'id': 'HsUATh_Nc2U',
1467 'ext': 'mp4',
1468 'title': 'Godzilla 2 (Official Video)',
1469 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1470 'upload_date': '20200408',
65c2fde2 1471 'age_limit': 18,
976ae3ea 1472 'availability': 'needs_auth',
1473 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
976ae3ea 1474 'channel': 'FlyingKitty',
1475 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1476 'view_count': int,
1477 'categories': ['Entertainment'],
1478 'live_status': 'not_live',
1479 'tags': ['Flyingkitty', 'godzilla 2'],
1480 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1481 'like_count': int,
1482 'duration': 177,
1483 'playable_in_embed': True,
7666b936 1484 'channel_follower_count': int,
1485 'uploader': 'FlyingKitty',
1486 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1487 'uploader_id': '@FlyingKitty900',
5caf30db 1488 'comment_count': int,
8213ce28 1489 'channel_is_verified': True,
65c2fde2 1490 },
1491 },
1492 {
1493 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1494 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1495 'info_dict': {
1496 'id': 'Tq92D6wQ1mg',
1497 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1498 'ext': 'mp4',
17322130 1499 'upload_date': '20191228',
65c2fde2 1500 'description': 'md5:17eccca93a786d51bc67646756894066',
1501 'age_limit': 18,
976ae3ea 1502 'like_count': int,
1503 'availability': 'needs_auth',
976ae3ea 1504 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1505 'view_count': int,
1506 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1507 'channel': 'Projekt Melody',
1508 'live_status': 'not_live',
1509 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1510 'playable_in_embed': True,
1511 'categories': ['Entertainment'],
1512 'duration': 106,
1513 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1514 'comment_count': int,
7666b936 1515 'channel_follower_count': int,
1516 'uploader': 'Projekt Melody',
1517 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1518 'uploader_id': '@ProjektMelody',
96a134de 1519 'timestamp': 1577508724,
65c2fde2 1520 },
1521 },
1522 {
1523 'note': 'Non-Agegated non-embeddable video',
1524 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1525 'info_dict': {
1526 'id': 'MeJVWBSsPAY',
1527 'ext': 'mp4',
1528 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
65c2fde2 1529 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1530 'upload_date': '20130730',
976ae3ea 1531 'track': 'Such mich find mich',
1532 'age_limit': 0,
1533 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1534 'like_count': int,
1535 'playable_in_embed': False,
1536 'creator': 'OOMPH!',
1537 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1538 'view_count': int,
1539 'alt_title': 'Such mich find mich',
1540 'duration': 210,
1541 'channel': 'Herr Lurik',
1542 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1543 'categories': ['Music'],
1544 'availability': 'public',
976ae3ea 1545 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1546 'live_status': 'not_live',
1547 'artist': 'OOMPH!',
7666b936 1548 'channel_follower_count': int,
1549 'uploader': 'Herr Lurik',
1550 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1551 'uploader_id': '@HerrLurik',
65c2fde2 1552 },
1553 },
1554 {
1555 'note': 'Non-bypassable age-gated video',
1556 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1557 'only_matching': True,
1558 },
8bdd16b4 1559 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1560 # YouTube Red ad is not captured for creator
1561 {
1562 'url': '__2ABJjxzNo',
1563 'info_dict': {
1564 'id': '__2ABJjxzNo',
1565 'ext': 'mp4',
1566 'duration': 266,
1567 'upload_date': '20100430',
545cc85d 1568 'creator': 'deadmau5',
1569 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1570 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1571 'alt_title': 'Some Chords',
976ae3ea 1572 'availability': 'public',
1573 'tags': 'count:14',
1574 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1575 'view_count': int,
1576 'live_status': 'not_live',
1577 'channel': 'deadmau5',
1578 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1579 'like_count': int,
1580 'track': 'Some Chords',
1581 'artist': 'deadmau5',
1582 'playable_in_embed': True,
1583 'age_limit': 0,
1584 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1585 'categories': ['Music'],
1586 'album': 'Some Chords',
7666b936 1587 'channel_follower_count': int,
1588 'uploader': 'deadmau5',
1589 'uploader_url': 'https://www.youtube.com/@deadmau5',
1590 'uploader_id': '@deadmau5',
8bdd16b4 1591 },
1592 'expected_warnings': [
1593 'DASH manifest missing',
add96eb9 1594 ],
8bdd16b4 1595 },
067aa17e 1596 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1597 {
1598 'url': 'lqQg6PlCWgI',
1599 'info_dict': {
1600 'id': 'lqQg6PlCWgI',
1601 'ext': 'mp4',
556dbe7f 1602 'duration': 6085,
90227264 1603 'upload_date': '20150827',
12a1b225 1604 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
cbe2bd91 1605 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1606 'like_count': int,
1607 'release_timestamp': 1343767800,
1608 'playable_in_embed': True,
1609 'categories': ['Sports'],
1610 'release_date': '20120731',
1611 'channel': 'Olympics',
1612 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1613 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1614 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1615 'age_limit': 0,
1616 'availability': 'public',
1617 'live_status': 'was_live',
1618 'view_count': int,
1619 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
7666b936 1620 'channel_follower_count': int,
1621 'uploader': 'Olympics',
1622 'uploader_url': 'https://www.youtube.com/@Olympics',
1623 'uploader_id': '@Olympics',
8213ce28 1624 'channel_is_verified': True,
96a134de 1625 'timestamp': 1440707674,
cbe2bd91
PH
1626 },
1627 'params': {
1628 'skip_download': 'requires avconv',
add96eb9 1629 },
cbe2bd91 1630 },
6271f1ca
PH
1631 # Non-square pixels
1632 {
1633 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1634 'info_dict': {
1635 'id': '_b-2C3KPAM0',
1636 'ext': 'mp4',
1637 'stretched_ratio': 16 / 9.,
556dbe7f 1638 'duration': 85,
6271f1ca 1639 'upload_date': '20110310',
6271f1ca 1640 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
6271f1ca 1641 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1642 'playable_in_embed': True,
1643 'channel': '孫ᄋᄅ',
1644 'age_limit': 0,
1645 'tags': 'count:11',
1646 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1647 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1648 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1649 'view_count': int,
1650 'categories': ['People & Blogs'],
1651 'like_count': int,
1652 'live_status': 'not_live',
1653 'availability': 'unlisted',
12a1b225 1654 'comment_count': int,
7666b936 1655 'channel_follower_count': int,
1656 'uploader': '孫ᄋᄅ',
1657 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1658 'uploader_id': '@AllenMeow',
96a134de 1659 'timestamp': 1299776999,
6271f1ca 1660 },
06b491eb
S
1661 },
1662 # url_encoded_fmt_stream_map is empty string
1663 {
1664 'url': 'qEJwOuvDf7I',
1665 'info_dict': {
1666 'id': 'qEJwOuvDf7I',
f57b7835 1667 'ext': 'webm',
06b491eb
S
1668 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1669 'description': '',
1670 'upload_date': '20150404',
06b491eb
S
1671 },
1672 'params': {
1673 'skip_download': 'requires avconv',
e323cf3f
S
1674 },
1675 'skip': 'This live event has ended.',
06b491eb 1676 },
067aa17e 1677 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1678 {
1679 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1680 'info_dict': {
1681 'id': 'FIl7x6_3R5Y',
eb6793ba 1682 'ext': 'webm',
da77d856
S
1683 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1684 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1685 'duration': 220,
da77d856 1686 'upload_date': '20150625',
eb6793ba 1687 'formats': 'mincount:31',
da77d856 1688 },
eb6793ba 1689 'skip': 'not actual anymore',
2ee8f5d8 1690 },
8a1a26ce
YCH
1691 # DASH manifest with segment_list
1692 {
1693 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1694 'md5': '8ce563a1d667b599d21064e982ab9e31',
1695 'info_dict': {
1696 'id': 'CsmdDsKjzN8',
1697 'ext': 'mp4',
17ee98e1 1698 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce 1699 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
8a1a26ce
YCH
1700 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1701 },
1702 'params': {
1703 'youtube_include_dash_manifest': True,
1704 'format': '135', # bestvideo
be49068d
S
1705 },
1706 'skip': 'This live event has ended.',
2ee8f5d8 1707 },
cf7e015f 1708 {
6368e2e6 1709 # Multifeed videos (multiple cameras), URL can be of any Camera
7666b936 1710 # TODO: fix multifeed titles
6368e2e6 1711 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1712 'info_dict': {
6368e2e6 1713 'id': 'zaPI8MvL8pg',
1714 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1715 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1716 },
1717 'playlist': [{
1718 'info_dict': {
6368e2e6 1719 'id': 'j5yGuxZ8lLU',
cf7e015f 1720 'ext': 'mp4',
6368e2e6 1721 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
6368e2e6 1722 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1723 'duration': 10120,
1724 'channel_follower_count': int,
1725 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1726 'availability': 'public',
1727 'playable_in_embed': True,
1728 'upload_date': '20131105',
6368e2e6 1729 'categories': ['Gaming'],
1730 'live_status': 'was_live',
1731 'tags': 'count:24',
1732 'release_timestamp': 1383701910,
1733 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1734 'comment_count': int,
1735 'age_limit': 0,
1736 'like_count': int,
1737 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1738 'channel': 'WiiLikeToPlay',
1739 'view_count': int,
1740 'release_date': '20131106',
7666b936 1741 'uploader': 'WiiLikeToPlay',
1742 'uploader_id': '@WLTP',
1743 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1744 },
1745 }, {
1746 'info_dict': {
6368e2e6 1747 'id': 'zaPI8MvL8pg',
cf7e015f 1748 'ext': 'mp4',
6368e2e6 1749 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
6368e2e6 1750 'availability': 'public',
1751 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1752 'channel': 'WiiLikeToPlay',
6368e2e6 1753 'channel_follower_count': int,
1754 'description': 'md5:563ccbc698b39298481ca3c571169519',
1755 'duration': 10108,
1756 'age_limit': 0,
1757 'like_count': int,
1758 'tags': 'count:24',
1759 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
6368e2e6 1760 'release_timestamp': 1383701915,
1761 'comment_count': int,
1762 'upload_date': '20131105',
1763 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1764 'release_date': '20131106',
1765 'playable_in_embed': True,
1766 'live_status': 'was_live',
1767 'categories': ['Gaming'],
1768 'view_count': int,
7666b936 1769 'uploader': 'WiiLikeToPlay',
1770 'uploader_id': '@WLTP',
1771 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1772 },
1773 }, {
1774 'info_dict': {
6368e2e6 1775 'id': 'R7r3vfO7Hao',
cf7e015f 1776 'ext': 'mp4',
6368e2e6 1777 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1778 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1779 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1780 'like_count': int,
1781 'availability': 'public',
1782 'playable_in_embed': True,
1783 'upload_date': '20131105',
1784 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1785 'channel_follower_count': int,
1786 'tags': 'count:24',
1787 'release_date': '20131106',
6368e2e6 1788 'comment_count': int,
1789 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1790 'channel': 'WiiLikeToPlay',
1791 'categories': ['Gaming'],
1792 'release_timestamp': 1383701914,
1793 'live_status': 'was_live',
1794 'age_limit': 0,
1795 'duration': 10128,
1796 'view_count': int,
7666b936 1797 'uploader': 'WiiLikeToPlay',
1798 'uploader_id': '@WLTP',
1799 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1800 },
1801 }],
6368e2e6 1802 'params': {'skip_download': True},
96a134de 1803 'skip': 'Not multifeed anymore',
cbaed4bb 1804 },
f9f49d87 1805 {
067aa17e 1806 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1807 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1808 'info_dict': {
1809 'id': 'gVfLd0zydlo',
1810 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1811 },
1812 'playlist_count': 2,
be49068d 1813 'skip': 'Not multifeed anymore',
f9f49d87 1814 },
cbaed4bb 1815 {
2d3d2997 1816 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1817 'only_matching': True,
0e49d9a6 1818 },
6d4fc66b 1819 {
2d3d2997 1820 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1821 'only_matching': True,
1822 },
0e49d9a6 1823 {
067aa17e 1824 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1825 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1826 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1827 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1828 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1829 'info_dict': {
1830 'id': 'lsguqyKfVQg',
1831 'ext': 'mp4',
1832 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1833 'alt_title': 'Dark Walk',
0e49d9a6 1834 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1835 'duration': 133,
0e49d9a6 1836 'upload_date': '20151119',
11f9be09 1837 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1838 'track': 'Dark Walk',
1839 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1840 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1841 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1842 'categories': ['Film & Animation'],
1843 'view_count': int,
1844 'live_status': 'not_live',
1845 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1846 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1847 'tags': 'count:13',
1848 'availability': 'public',
1849 'channel': 'IronSoulElf',
1850 'playable_in_embed': True,
1851 'like_count': int,
1852 'age_limit': 0,
add96eb9 1853 'channel_follower_count': int,
0e49d9a6
LL
1854 },
1855 'params': {
1856 'skip_download': True,
1857 },
1858 },
61f92af1 1859 {
067aa17e 1860 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1861 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1862 'only_matching': True,
1863 },
313dfc45
LL
1864 {
1865 # Video with yt:stretch=17:0
1866 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1867 'info_dict': {
1868 'id': 'Q39EVAstoRM',
1869 'ext': 'mp4',
1870 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1871 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1872 'upload_date': '20151107',
313dfc45
LL
1873 },
1874 'params': {
1875 'skip_download': True,
1876 },
be49068d 1877 'skip': 'This video does not exist.',
313dfc45 1878 },
201c1459 1879 {
1880 # Video with incomplete 'yt:stretch=16:'
1881 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1882 'only_matching': True,
1883 },
7caf9830
S
1884 {
1885 # Video licensed under Creative Commons
1886 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1887 'info_dict': {
1888 'id': 'M4gD1WSo5mA',
1889 'ext': 'mp4',
1890 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1891 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1892 'duration': 721,
17322130 1893 'upload_date': '20150128',
7caf9830 1894 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1895 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1896 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1897 'like_count': int,
1898 'age_limit': 0,
1899 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1900 'channel': 'The Berkman Klein Center for Internet & Society',
1901 'availability': 'public',
1902 'view_count': int,
1903 'categories': ['Education'],
1904 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1905 'live_status': 'not_live',
1906 'playable_in_embed': True,
d5d1df8a 1907 'channel_follower_count': int,
1908 'chapters': list,
7666b936 1909 'uploader': 'The Berkman Klein Center for Internet & Society',
1910 'uploader_id': '@BKCHarvard',
1911 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
96a134de 1912 'timestamp': 1422422076,
7caf9830
S
1913 },
1914 'params': {
1915 'skip_download': True,
1916 },
1917 },
fd050249 1918 {
fd050249
S
1919 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1920 'info_dict': {
1921 'id': 'eQcmzGIKrzg',
1922 'ext': 'mp4',
1923 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1924 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1925 'duration': 4060,
17322130 1926 'upload_date': '20151120',
fd050249 1927 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1928 'playable_in_embed': True,
1929 'tags': 'count:12',
1930 'like_count': int,
1931 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1932 'age_limit': 0,
1933 'availability': 'public',
1934 'categories': ['News & Politics'],
1935 'channel': 'Bernie Sanders',
1936 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1937 'view_count': int,
1938 'live_status': 'not_live',
1939 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1940 'comment_count': int,
d5d1df8a 1941 'channel_follower_count': int,
1942 'chapters': list,
7666b936 1943 'uploader': 'Bernie Sanders',
1944 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1945 'uploader_id': '@BernieSanders',
8213ce28 1946 'channel_is_verified': True,
14a14335 1947 'heatmap': 'count:100',
96a134de 1948 'timestamp': 1447987198,
fd050249
S
1949 },
1950 'params': {
1951 'skip_download': True,
1952 },
1953 },
040ac686
S
1954 {
1955 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1956 'only_matching': True,
7f29cf54
S
1957 },
1958 {
067aa17e 1959 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1960 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1961 'only_matching': True,
6496ccb4
S
1962 },
1963 {
1964 # Rental video preview
1965 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1966 'info_dict': {
1967 'id': 'uGpuVWrhIzE',
1968 'ext': 'mp4',
1969 'title': 'Piku - Trailer',
1970 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1971 'upload_date': '20150811',
6496ccb4
S
1972 'license': 'Standard YouTube License',
1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
eb6793ba 1977 'skip': 'This video is not available.',
022a5d66 1978 },
12afdc2a
S
1979 {
1980 # YouTube Red video with episode data
1981 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1982 'info_dict': {
1983 'id': 'iqKdEhx-dD4',
1984 'ext': 'mp4',
1985 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1986 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1987 'duration': 2085,
12afdc2a 1988 'upload_date': '20170118',
12afdc2a
S
1989 'series': 'Mind Field',
1990 'season_number': 1,
1991 'episode_number': 1,
976ae3ea 1992 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1993 'tags': 'count:12',
1994 'view_count': int,
1995 'availability': 'public',
1996 'age_limit': 0,
1997 'channel': 'Vsauce',
1998 'episode': 'Episode 1',
1999 'categories': ['Entertainment'],
2000 'season': 'Season 1',
2001 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2002 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2003 'like_count': int,
2004 'playable_in_embed': True,
2005 'live_status': 'not_live',
7666b936 2006 'channel_follower_count': int,
2007 'uploader': 'Vsauce',
2008 'uploader_url': 'https://www.youtube.com/@Vsauce',
2009 'uploader_id': '@Vsauce',
14a14335 2010 'comment_count': int,
8213ce28 2011 'channel_is_verified': True,
96a134de 2012 'timestamp': 1484761047,
12afdc2a
S
2013 },
2014 'params': {
2015 'skip_download': True,
2016 },
2017 'expected_warnings': [
2018 'Skipping DASH manifest',
2019 ],
2020 },
c7121fa7
S
2021 {
2022 # The following content has been identified by the YouTube community
2023 # as inappropriate or offensive to some audiences.
2024 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2025 'info_dict': {
2026 'id': '6SJNVb0GnPI',
2027 'ext': 'mp4',
2028 'title': 'Race Differences in Intelligence',
2029 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2030 'duration': 965,
2031 'upload_date': '20140124',
c7121fa7
S
2032 },
2033 'params': {
2034 'skip_download': True,
2035 },
545cc85d 2036 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 2037 },
022a5d66
S
2038 {
2039 # itag 212
2040 'url': '1t24XAntNCY',
2041 'only_matching': True,
fd5c4aab
S
2042 },
2043 {
2044 # geo restricted to JP
2045 'url': 'sJL6WA-aGkQ',
2046 'only_matching': True,
2047 },
cd5a74a2
S
2048 {
2049 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2050 'only_matching': True,
2051 },
bc2ca1bb 2052 {
2053 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2054 'only_matching': True,
2055 },
2056 {
2057 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2058 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2059 'only_matching': True,
2060 },
825cd268
RA
2061 {
2062 # DRM protected
2063 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2064 'only_matching': True,
4fe54c12
S
2065 },
2066 {
2067 # Video with unsupported adaptive stream type formats
2068 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2069 'info_dict': {
2070 'id': 'Z4Vy8R84T1U',
2071 'ext': 'mp4',
2072 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2073 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2074 'duration': 433,
2075 'upload_date': '20130923',
4fe54c12
S
2076 'formats': 'maxcount:10',
2077 },
2078 'params': {
2079 'skip_download': True,
2080 'youtube_include_dash_manifest': False,
2081 },
5429d6a9 2082 'skip': 'not actual anymore',
5caabd3c 2083 },
2084 {
822b9d9c 2085 # Youtube Music Auto-generated description
7666b936 2086 # TODO: fix metadata extraction
5caabd3c 2087 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2088 'info_dict': {
2089 'id': 'MgNrAu2pzNs',
2090 'ext': 'mp4',
2091 'title': 'Voyeur Girl',
2092 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2093 'upload_date': '20190312',
104a7b5a
L
2094 'artists': ['Stephen'],
2095 'creators': ['Stephen'],
5caabd3c 2096 'track': 'Voyeur Girl',
2097 'album': 'it\'s too much love to know my dear',
2098 'release_date': '20190313',
976ae3ea 2099 'alt_title': 'Voyeur Girl',
2100 'view_count': int,
976ae3ea 2101 'playable_in_embed': True,
2102 'like_count': int,
2103 'categories': ['Music'],
2104 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
7666b936 2105 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2106 'uploader': 'Stephen',
976ae3ea 2107 'availability': 'public',
976ae3ea 2108 'duration': 169,
2109 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2110 'age_limit': 0,
2111 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2112 'tags': 'count:11',
2113 'live_status': 'not_live',
add96eb9 2114 'channel_follower_count': int,
5caabd3c 2115 },
2116 'params': {
2117 'skip_download': True,
2118 },
2119 },
66b48727
RA
2120 {
2121 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2122 'only_matching': True,
2123 },
011e75e6
S
2124 {
2125 # invalid -> valid video id redirection
2126 'url': 'DJztXj2GPfl',
2127 'info_dict': {
2128 'id': 'DJztXj2GPfk',
2129 'ext': 'mp4',
2130 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2131 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2132 'upload_date': '20090125',
011e75e6
S
2133 'artist': 'Panjabi MC',
2134 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2135 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2136 },
2137 'params': {
2138 'skip_download': True,
2139 },
545cc85d 2140 'skip': 'Video unavailable',
ea74e00b
DP
2141 },
2142 {
2143 # empty description results in an empty string
2144 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2145 'info_dict': {
2146 'id': 'x41yOUIvK2k',
2147 'ext': 'mp4',
2148 'title': 'IMG 3456',
2149 'description': '',
2150 'upload_date': '20170613',
976ae3ea 2151 'view_count': int,
2152 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
976ae3ea 2153 'like_count': int,
2154 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2155 'tags': [],
2156 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2157 'availability': 'public',
2158 'age_limit': 0,
2159 'categories': ['Pets & Animals'],
2160 'duration': 7,
2161 'playable_in_embed': True,
2162 'live_status': 'not_live',
7666b936 2163 'channel': 'l\'Or Vert asbl',
2164 'channel_follower_count': int,
2165 'uploader': 'l\'Or Vert asbl',
2166 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2167 'uploader_id': '@ElevageOrVert',
96a134de 2168 'timestamp': 1497343210,
ea74e00b
DP
2169 },
2170 'params': {
2171 'skip_download': True,
2172 },
2173 },
a0566bbf 2174 {
29f7c58a 2175 # with '};' inside yt initial data (see [1])
2176 # see [2] for an example with '};' inside ytInitialPlayerResponse
2177 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2178 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2179 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2180 'info_dict': {
2181 'id': 'CHqg6qOn4no',
2182 'ext': 'mp4',
2183 'title': 'Part 77 Sort a list of simple types in c#',
2184 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2185 'upload_date': '20130831',
976ae3ea 2186 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2187 'like_count': int,
976ae3ea 2188 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2189 'live_status': 'not_live',
2190 'categories': ['Education'],
2191 'availability': 'public',
2192 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2193 'tags': 'count:12',
2194 'playable_in_embed': True,
2195 'age_limit': 0,
2196 'view_count': int,
2197 'duration': 522,
2198 'channel': 'kudvenkat',
12a1b225 2199 'comment_count': int,
d5d1df8a 2200 'channel_follower_count': int,
2201 'chapters': list,
7666b936 2202 'uploader': 'kudvenkat',
2203 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2204 'uploader_id': '@Csharp-video-tutorialsBlogspot',
8213ce28 2205 'channel_is_verified': True,
14a14335 2206 'heatmap': 'count:100',
96a134de 2207 'timestamp': 1377976349,
a0566bbf 2208 },
2209 'params': {
2210 'skip_download': True,
2211 },
2212 },
29f7c58a 2213 {
2214 # another example of '};' in ytInitialData
2215 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2216 'only_matching': True,
2217 },
2218 {
2219 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2220 'only_matching': True,
2221 },
545cc85d 2222 {
cc2db878 2223 # https://github.com/ytdl-org/youtube-dl/pull/28094
2224 'url': 'OtqTfy26tG0',
2225 'info_dict': {
2226 'id': 'OtqTfy26tG0',
2227 'ext': 'mp4',
2228 'title': 'Burn Out',
2229 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2230 'upload_date': '20141120',
cc2db878 2231 'artist': 'The Cinematic Orchestra',
2232 'track': 'Burn Out',
2233 'album': 'Every Day',
976ae3ea 2234 'like_count': int,
2235 'live_status': 'not_live',
2236 'alt_title': 'Burn Out',
2237 'duration': 614,
2238 'age_limit': 0,
2239 'view_count': int,
2240 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2241 'creator': 'The Cinematic Orchestra',
2242 'channel': 'The Cinematic Orchestra',
2243 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2244 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2245 'availability': 'public',
2246 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2247 'categories': ['Music'],
2248 'playable_in_embed': True,
7666b936 2249 'channel_follower_count': int,
2250 'uploader': 'The Cinematic Orchestra',
2251 'comment_count': int,
cc2db878 2252 },
2253 'params': {
2254 'skip_download': True,
2255 },
545cc85d 2256 },
bc2ca1bb 2257 {
2258 # controversial video, only works with bpctr when authenticated with cookies
2259 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2260 'only_matching': True,
2261 },
a1a7907b 2262 {
2263 # controversial video, requires bpctr/contentCheckOk
2264 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2265 'info_dict': {
2266 'id': 'SZJvDhaSDnc',
2267 'ext': 'mp4',
2268 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2269 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
a1a7907b 2270 'upload_date': '20140716',
976ae3ea 2271 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2272 'duration': 170,
2273 'categories': ['News & Politics'],
976ae3ea 2274 'view_count': int,
2275 'channel': 'CBS Mornings',
2276 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2277 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2278 'age_limit': 18,
2279 'availability': 'needs_auth',
2280 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2281 'like_count': int,
2282 'live_status': 'not_live',
2283 'playable_in_embed': True,
7666b936 2284 'channel_follower_count': int,
2285 'uploader': 'CBS Mornings',
2286 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2287 'uploader_id': '@CBSMornings',
14a14335 2288 'comment_count': int,
8213ce28 2289 'channel_is_verified': True,
96a134de 2290 'timestamp': 1405513526,
add96eb9 2291 },
a1a7907b 2292 },
f7ad7160 2293 {
2294 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2295 'url': 'cBvYw8_A0vQ',
2296 'info_dict': {
2297 'id': 'cBvYw8_A0vQ',
2298 'ext': 'mp4',
2299 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2300 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2301 'upload_date': '20201120',
976ae3ea 2302 'duration': 1456,
2303 'categories': ['Travel & Events'],
2304 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2305 'view_count': int,
2306 'channel': 'Walk around Japan',
2307 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
96a134de 2308 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
976ae3ea 2309 'age_limit': 0,
2310 'availability': 'public',
2311 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2312 'live_status': 'not_live',
2313 'playable_in_embed': True,
7666b936 2314 'channel_follower_count': int,
2315 'uploader': 'Walk around Japan',
2316 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2317 'uploader_id': '@walkaroundjapan7124',
96a134de 2318 'timestamp': 1605884416,
f7ad7160 2319 },
2320 'params': {
2321 'skip_download': True,
2322 },
0fb983f6 2323 }, {
2324 # Has multiple audio streams
2325 'url': 'WaOKSUlf4TM',
add96eb9 2326 'only_matching': True,
9297939e 2327 }, {
2328 # Requires Premium: has format 141 when requested using YTM url
2329 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
add96eb9 2330 'only_matching': True,
9297939e 2331 }, {
120916da 2332 # multiple subtitles with same lang_code
2333 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2334 'only_matching': True,
109dd3b2 2335 }, {
2336 # Force use android client fallback
2337 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2338 'info_dict': {
2339 'id': 'YOelRv7fMxY',
11f9be09 2340 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2341 'ext': '3gp',
2342 'upload_date': '20210624',
2343 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
109dd3b2 2344 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2345 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2346 'duration': 596,
2347 'categories': ['Entertainment'],
976ae3ea 2348 'view_count': int,
2349 'channel': 'colinfurze',
2350 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2351 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2352 'age_limit': 0,
2353 'availability': 'public',
2354 'like_count': int,
2355 'live_status': 'not_live',
2356 'playable_in_embed': True,
d5d1df8a 2357 'channel_follower_count': int,
2358 'chapters': list,
7666b936 2359 'uploader': 'colinfurze',
2360 'uploader_url': 'https://www.youtube.com/@colinfurze',
2361 'uploader_id': '@colinfurze',
14a14335 2362 'comment_count': int,
8213ce28 2363 'channel_is_verified': True,
14a14335 2364 'heatmap': 'count:100',
109dd3b2 2365 },
2366 'params': {
2367 'format': '17', # 3gp format available on android
2368 'extractor_args': {'youtube': {'player_client': ['android']}},
2369 },
12d8ea82 2370 'skip': 'android client broken',
120916da 2371 },
109dd3b2 2372 {
2373 # Skip download of additional client configs (remix client config in this case)
2374 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2375 'only_matching': True,
2376 'params': {
2377 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2378 },
8fc54b12 2379 }, {
2380 # shorts
2381 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2382 'only_matching': True,
9222c381 2383 }, {
2384 'note': 'Storyboards',
2385 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2386 'info_dict': {
2387 'id': '5KLPxDtMqe8',
2388 'ext': 'mhtml',
2389 'format_id': 'sb0',
2390 'title': 'Your Brain is Plastic',
9222c381 2391 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2392 'upload_date': '20140324',
976ae3ea 2393 'like_count': int,
2394 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2395 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2396 'view_count': int,
2397 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2398 'playable_in_embed': True,
2399 'tags': 'count:12',
976ae3ea 2400 'availability': 'public',
2401 'channel': 'SciShow',
2402 'live_status': 'not_live',
2403 'duration': 248,
2404 'categories': ['Education'],
2405 'age_limit': 0,
d5d1df8a 2406 'channel_follower_count': int,
2407 'chapters': list,
7666b936 2408 'uploader': 'SciShow',
2409 'uploader_url': 'https://www.youtube.com/@SciShow',
2410 'uploader_id': '@SciShow',
14a14335 2411 'comment_count': int,
8213ce28 2412 'channel_is_verified': True,
14a14335 2413 'heatmap': 'count:100',
96a134de 2414 'timestamp': 1395685455,
add96eb9 2415 }, 'params': {'format': 'mhtml', 'skip_download': True},
992f9a73 2416 }, {
2417 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2418 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2419 'info_dict': {
2420 'id': '2NUZ8W2llS4',
2421 'ext': 'mp4',
2422 'title': 'The NP that test your phone performance 🙂',
2423 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
992f9a73 2424 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2425 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2426 'duration': 21,
2427 'view_count': int,
2428 'age_limit': 0,
2429 'categories': ['Gaming'],
2430 'tags': 'count:23',
2431 'playable_in_embed': True,
2432 'live_status': 'not_live',
2433 'upload_date': '20220103',
2434 'like_count': int,
2435 'availability': 'public',
2436 'channel': 'Leon Nguyen',
2437 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2438 'comment_count': int,
7666b936 2439 'channel_follower_count': int,
2440 'uploader': 'Leon Nguyen',
2441 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2442 'uploader_id': '@LeonNguyen',
14a14335 2443 'heatmap': 'count:100',
96a134de 2444 'timestamp': 1641170939,
add96eb9 2445 },
992f9a73 2446 }, {
2447 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2448 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2449 'info_dict': {
2450 'id': 'mzZzzBU6lrM',
2451 'ext': 'mp4',
2452 'title': 'I Met GeorgeNotFound In Real Life...',
7666b936 2453 'description': 'md5:978296ec9783a031738b684d4ebf302d',
992f9a73 2454 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2455 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2456 'duration': 955,
2457 'view_count': int,
2458 'age_limit': 0,
2459 'categories': ['Entertainment'],
2460 'tags': 'count:26',
2461 'playable_in_embed': True,
2462 'live_status': 'not_live',
2463 'release_timestamp': 1641172509,
2464 'release_date': '20220103',
2465 'upload_date': '20220103',
2466 'like_count': int,
2467 'availability': 'public',
2468 'channel': 'Quackity',
2469 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
7666b936 2470 'channel_follower_count': int,
2471 'uploader': 'Quackity',
2472 'uploader_id': '@Quackity',
2473 'uploader_url': 'https://www.youtube.com/@Quackity',
14a14335 2474 'comment_count': int,
8213ce28 2475 'channel_is_verified': True,
14a14335 2476 'heatmap': 'count:100',
96a134de 2477 'timestamp': 1641172509,
add96eb9 2478 },
992f9a73 2479 },
96a134de 2480 { # continuous livestream.
2481 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2482 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
992f9a73 2483 'info_dict': {
96a134de 2484 'id': 'jfKfPfyJRdk',
992f9a73 2485 'ext': 'mp4',
96a134de 2486 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2487 'like_count': int,
2488 'uploader': 'Lofi Girl',
2489 'categories': ['Music'],
2490 'concurrent_view_count': int,
2491 'playable_in_embed': True,
2492 'timestamp': 1657627949,
2493 'release_date': '20220712',
2494 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2495 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
992f9a73 2496 'age_limit': 0,
96a134de 2497 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2498 'release_timestamp': 1657641570,
2499 'uploader_url': 'https://www.youtube.com/@LofiGirl',
992f9a73 2500 'channel_follower_count': int,
96a134de 2501 'channel_is_verified': True,
2502 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
992f9a73 2503 'view_count': int,
96a134de 2504 'live_status': 'is_live',
2505 'tags': 'count:32',
2506 'channel': 'Lofi Girl',
2507 'availability': 'public',
2508 'upload_date': '20220712',
2509 'uploader_id': '@LofiGirl',
992f9a73 2510 },
96a134de 2511 'params': {'skip_download': True},
ee27297f 2512 }, {
2513 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2514 'info_dict': {
2515 'id': 'tjjjtzRLHvA',
2516 'ext': 'mp4',
2517 'title': 'ハッシュタグ無し };if window.ytcsi',
2518 'upload_date': '20220323',
2519 'like_count': int,
2520 'availability': 'unlisted',
7666b936 2521 'channel': 'Lesmiscore',
2522 'thumbnail': r're:^https?://.*\.jpg',
ee27297f 2523 'age_limit': 0,
ee27297f 2524 'categories': ['Music'],
6e634cbe 2525 'view_count': int,
2526 'description': '',
ee27297f 2527 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2528 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2529 'live_status': 'not_live',
2530 'playable_in_embed': True,
2531 'channel_follower_count': int,
2532 'duration': 6,
2533 'tags': [],
7666b936 2534 'uploader_id': '@lesmiscore',
2535 'uploader': 'Lesmiscore',
2536 'uploader_url': 'https://www.youtube.com/@lesmiscore',
96a134de 2537 'timestamp': 1648005313,
add96eb9 2538 },
c26f9b99 2539 }, {
2540 # Prefer primary title+description language metadata by default
2541 # Do not prefer translated description if primary is empty
2542 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2543 'info_dict': {
2544 'id': 'el3E4MbxRqQ',
2545 'ext': 'mp4',
2546 'title': 'dlp test video 2 - primary sv no desc',
2547 'description': '',
2548 'channel': 'cole-dlp-test-acc',
2549 'tags': [],
2550 'view_count': int,
2551 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2552 'like_count': int,
2553 'playable_in_embed': True,
2554 'availability': 'unlisted',
7666b936 2555 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2556 'age_limit': 0,
2557 'duration': 5,
c26f9b99 2558 'live_status': 'not_live',
2559 'upload_date': '20220908',
2560 'categories': ['People & Blogs'],
c26f9b99 2561 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
7666b936 2562 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2563 'uploader_id': '@coletdjnz',
2564 'uploader': 'cole-dlp-test-acc',
96a134de 2565 'timestamp': 1662677394,
c26f9b99 2566 },
add96eb9 2567 'params': {'skip_download': True},
c26f9b99 2568 }, {
2569 # Extractor argument: prefer translated title+description
2570 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2571 'info_dict': {
2572 'id': 'gHKT4uU8Zng',
2573 'ext': 'mp4',
2574 'channel': 'cole-dlp-test-acc',
2575 'tags': [],
2576 'duration': 5,
2577 'live_status': 'not_live',
2578 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
96a134de 2579 'upload_date': '20220729',
c26f9b99 2580 'view_count': int,
2581 'categories': ['People & Blogs'],
7666b936 2582 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2583 'title': 'dlp test video title translated (fr)',
2584 'availability': 'public',
c26f9b99 2585 'age_limit': 0,
2586 'description': 'dlp test video description translated (fr)',
2587 'playable_in_embed': True,
2588 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
7666b936 2589 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2590 'uploader_id': '@coletdjnz',
2591 'uploader': 'cole-dlp-test-acc',
96a134de 2592 'timestamp': 1659073275,
2593 'like_count': int,
c26f9b99 2594 },
2595 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2596 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2597 }, {
2598 'note': '6 channel audio',
2599 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2600 'only_matching': True,
a4894d3e 2601 }, {
2602 'note': 'Multiple HLS formats with same itag',
2603 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2604 'info_dict': {
2605 'id': 'kX3nB4PpJko',
2606 'ext': 'mp4',
2607 'categories': ['Entertainment'],
2608 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
a4894d3e 2609 'live_status': 'not_live',
2610 'duration': 937,
2611 'channel_follower_count': int,
2612 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2613 'title': 'Last To Take Hand Off Jet, Keeps It!',
2614 'channel': 'MrBeast',
2615 'playable_in_embed': True,
2616 'view_count': int,
2617 'upload_date': '20221112',
a4894d3e 2618 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2619 'age_limit': 0,
2620 'availability': 'public',
2621 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2622 'like_count': int,
2623 'tags': [],
7666b936 2624 'uploader': 'MrBeast',
2625 'uploader_url': 'https://www.youtube.com/@MrBeast',
2626 'uploader_id': '@MrBeast',
14a14335 2627 'comment_count': int,
8213ce28 2628 'channel_is_verified': True,
14a14335 2629 'heatmap': 'count:100',
a4894d3e 2630 },
2631 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2632 }, {
2633 'note': 'Audio formats with Dynamic Range Compression',
2634 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2635 'info_dict': {
2636 'id': 'Tq92D6wQ1mg',
7666b936 2637 'ext': 'webm',
9bb85699 2638 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2639 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2640 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2641 'channel_follower_count': int,
2642 'description': 'md5:17eccca93a786d51bc67646756894066',
2643 'upload_date': '20191228',
9bb85699 2644 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2645 'playable_in_embed': True,
2646 'like_count': int,
2647 'categories': ['Entertainment'],
2648 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2649 'age_limit': 18,
2650 'channel': 'Projekt Melody',
9bb85699 2651 'view_count': int,
2652 'availability': 'needs_auth',
2653 'comment_count': int,
2654 'live_status': 'not_live',
9bb85699 2655 'duration': 106,
7666b936 2656 'uploader': 'Projekt Melody',
2657 'uploader_id': '@ProjektMelody',
2658 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
96a134de 2659 'timestamp': 1577508724,
9bb85699 2660 },
2661 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2662 },
2663 {
2664 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2665 'info_dict': {
2666 'id': 'qVv6vCqciTM',
2667 'ext': 'mp4',
2668 'age_limit': 0,
dad2210c 2669 'comment_count': int,
2670 'chapters': 'count:13',
2671 'upload_date': '20221223',
2672 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2673 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
dad2210c 2674 'like_count': int,
2675 'release_date': '20221223',
2676 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2677 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2678 'view_count': int,
2679 'playable_in_embed': True,
2680 'duration': 4438,
2681 'availability': 'public',
2682 'channel_follower_count': int,
2683 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2684 'categories': ['Entertainment'],
2685 'live_status': 'was_live',
2686 'release_timestamp': 1671793345,
2687 'channel': 'さなちゃんねる',
2688 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2689 'uploader': 'さなちゃんねる',
7666b936 2690 'uploader_url': 'https://www.youtube.com/@sana_natori',
2691 'uploader_id': '@sana_natori',
8213ce28 2692 'channel_is_verified': True,
14a14335 2693 'heatmap': 'count:100',
96a134de 2694 'timestamp': 1671798112,
7666b936 2695 },
2696 },
2697 {
2698 # Fallbacks when webpage and web client is unavailable
2699 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2700 'info_dict': {
2701 'id': 'wSSmNUl9Snw',
2702 'ext': 'mp4',
2703 # 'categories': ['Science & Technology'],
2704 'view_count': int,
2705 'chapters': 'count:2',
2706 'channel': 'Scott Manley',
2707 'like_count': int,
2708 'age_limit': 0,
2709 # 'availability': 'public',
2710 'channel_follower_count': int,
2711 'live_status': 'not_live',
2712 'upload_date': '20170831',
2713 'duration': 682,
2714 'tags': 'count:8',
2715 'uploader_url': 'https://www.youtube.com/@scottmanley',
2716 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2717 'uploader': 'Scott Manley',
2718 'uploader_id': '@scottmanley',
2719 'title': 'The Computer Hack That Saved Apollo 14',
2720 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2721 'thumbnail': r're:^https?://.*\.webp',
2722 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2723 'playable_in_embed': True,
14a14335 2724 'comment_count': int,
8213ce28 2725 'channel_is_verified': True,
14a14335 2726 'heatmap': 'count:100',
7666b936 2727 },
2728 'params': {
12d8ea82 2729 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
dad2210c 2730 },
2731 },
2eb88d95
PH
2732 ]
2733
f2e8dbcc 2734 _WEBPAGE_TESTS = [
2735 # YouTube <object> embed
2736 {
2737 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2738 'md5': '873c81d308b979f0e23ee7e620b312a3',
2739 'info_dict': {
2740 'id': 'msN87y-iEx0',
2741 'ext': 'mp4',
2742 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2743 'upload_date': '20080526',
2744 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
f2e8dbcc 2745 'age_limit': 0,
2746 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2747 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2748 'playable_in_embed': True,
2749 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2750 'like_count': int,
2751 'comment_count': int,
2752 'channel': 'Christopher Sykes',
2753 'live_status': 'not_live',
2754 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2755 'availability': 'public',
2756 'duration': 195,
2757 'view_count': int,
2758 'categories': ['Science & Technology'],
2759 'channel_follower_count': int,
7666b936 2760 'uploader': 'Christopher Sykes',
2761 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2762 'uploader_id': '@ChristopherSykesDocumentaries',
14a14335 2763 'heatmap': 'count:100',
96a134de 2764 'timestamp': 1211825920,
f2e8dbcc 2765 },
2766 'params': {
2767 'skip_download': True,
add96eb9 2768 },
f2e8dbcc 2769 },
2770 ]
2771
201c1459 2772 @classmethod
2773 def suitable(cls, url):
4dfbf869 2774 from ..utils import parse_qs
2775
201c1459 2776 qs = parse_qs(url)
2777 if qs.get('list', [None])[0]:
2778 return False
86e5f3ed 2779 return super().suitable(url)
201c1459 2780
e0df6211 2781 def __init__(self, *args, **kwargs):
86e5f3ed 2782 super().__init__(*args, **kwargs)
545cc85d 2783 self._code_cache = {}
83799698 2784 self._player_cache = {}
e0df6211 2785
4d37720a 2786 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2787 lock = threading.Lock()
185bf310 2788 start_time = time.time()
adbc4ec4
THD
2789 formats = [f for f in formats if f.get('is_from_start')]
2790
185bf310 2791 def refetch_manifest(format_id, delay):
2792 nonlocal formats, start_time, is_live
2793 if time.time() <= start_time + delay:
adbc4ec4
THD
2794 return
2795
2796 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2797 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2798 microformats = traverse_obj(
2799 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2800 expected_type=dict)
4d37720a
L
2801 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2802 is_live = live_status == 'is_live'
185bf310 2803 start_time = time.time()
adbc4ec4 2804
185bf310 2805 def mpd_feed(format_id, delay):
adbc4ec4
THD
2806 """
2807 @returns (manifest_url, manifest_stream_number, is_live) or None
2808 """
253ac4ba 2809 for retry in self.RetryManager(fatal=False):
2810 with lock:
2811 refetch_manifest(format_id, delay)
2812
2813 f = next((f for f in formats if f['format_id'] == format_id), None)
2814 if not f:
2815 if not is_live:
2816 retry.error = f'{video_id}: Video is no longer live'
2817 else:
2818 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2819 continue
2820 return f['manifest_url'], f['manifest_stream_number'], is_live
2821 return None
adbc4ec4
THD
2822
2823 for f in formats:
4d37720a
L
2824 f['is_live'] = is_live
2825 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2826 live_start_time, mpd_feed, not is_live and f.copy())
2827 if is_live:
2828 f['fragments'] = gen
2829 f['protocol'] = 'http_dash_segments_generator'
2830 else:
2831 f['fragments'] = LazyList(gen({}))
2832 del f['is_from_start']
adbc4ec4 2833
4d37720a 2834 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2835 FETCH_SPAN, MAX_DURATION = 5, 432000
2836
2837 mpd_url, stream_number, is_live = None, None, True
2838
2839 begin_index = 0
2840 download_start_time = ctx.get('start') or time.time()
2841
2842 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2843 if lack_early_segments:
2844 self.report_warning(bug_reports_message(
2845 'Starting download from the last 120 hours of the live stream since '
2846 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2847 lack_early_segments = True
2848
2849 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2850 fragments, fragment_base_url = None, None
2851
a539f065 2852 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2853 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2854 # Obtain from MPD's maximum seq value
2855 old_mpd_url = mpd_url
185bf310 2856 last_error = ctx.pop('last_error', None)
3d2623a8 2857 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
185bf310 2858 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2859 or (mpd_url, stream_number, False))
2860 if not refresh_sequence:
2861 if expire_fast and not is_live:
2862 return False, last_seq
2863 elif old_mpd_url == mpd_url:
2864 return True, last_seq
4d37720a
L
2865 if manifestless_orig_fmt:
2866 fmt_info = manifestless_orig_fmt
2867 else:
2868 try:
2869 fmts, _ = self._extract_mpd_formats_and_subtitles(
2870 mpd_url, None, note=False, errnote=False, fatal=False)
2871 except ExtractorError:
2872 fmts = None
2873 if not fmts:
2874 no_fragment_score += 2
2875 return False, last_seq
2876 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2877 fragments = fmt_info['fragments']
2878 fragment_base_url = fmt_info['fragment_base_url']
2879 assert fragment_base_url
2880
2881 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2882 return True, _last_seq
2883
4d37720a 2884 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2885 while is_live:
2886 fetch_time = time.time()
2887 if no_fragment_score > 30:
2888 return
2889 if last_segment_url:
2890 # Obtain from "X-Head-Seqnum" header value from each segment
2891 try:
2892 urlh = self._request_webpage(
2893 last_segment_url, None, note=False, errnote=False, fatal=False)
2894 except ExtractorError:
2895 urlh = None
2896 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2897 if last_seq is None:
a539f065 2898 no_fragment_score += 2
adbc4ec4
THD
2899 last_segment_url = None
2900 continue
2901 else:
a539f065
LNO
2902 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2903 no_fragment_score += 2
185bf310 2904 if not should_continue:
adbc4ec4
THD
2905 continue
2906
2907 if known_idx > last_seq:
2908 last_segment_url = None
2909 continue
2910
2911 last_seq += 1
2912
2913 if begin_index < 0 and known_idx < 0:
2914 # skip from the start when it's negative value
2915 known_idx = last_seq + begin_index
2916 if lack_early_segments:
2917 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2918 try:
2919 for idx in range(known_idx, last_seq):
2920 # do not update sequence here or you'll get skipped some part of it
a539f065 2921 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2922 if not should_continue:
adbc4ec4
THD
2923 known_idx = idx - 1
2924 raise ExtractorError('breaking out of outer loop')
add96eb9 2925 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
adbc4ec4
THD
2926 yield {
2927 'url': last_segment_url,
36195c44 2928 'fragment_count': last_seq,
adbc4ec4
THD
2929 }
2930 if known_idx == last_seq:
2931 no_fragment_score += 5
2932 else:
2933 no_fragment_score = 0
2934 known_idx = last_seq
2935 except ExtractorError:
2936 continue
2937
4d37720a
L
2938 if manifestless_orig_fmt:
2939 # Stop at the first iteration if running for post-live manifestless;
2940 # fragment count no longer increase since it starts
2941 break
2942
adbc4ec4
THD
2943 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2944
b6de707d 2945 def _extract_player_url(self, *ytcfgs, webpage=None):
2946 player_url = traverse_obj(
2947 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2948 get_all=False, expected_type=str)
11f9be09 2949 if not player_url:
b6de707d 2950 return
60f393e4 2951 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2952
b6de707d 2953 def _download_player_url(self, video_id, fatal=False):
2954 res = self._download_webpage(
2955 'https://www.youtube.com/iframe_api',
2956 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2957 if res:
2958 player_version = self._search_regex(
2959 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2960 if player_version:
2961 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2962
60064c53
PH
2963 def _signature_cache_id(self, example_sig):
2964 """ Return a string representation of a signature """
14f25df2 2965 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2966
e40c758c
S
2967 @classmethod
2968 def _extract_player_info(cls, player_url):
2969 for player_re in cls._PLAYER_INFO_RE:
2970 id_m = re.search(player_re, player_url)
2971 if id_m:
2972 break
2973 else:
add96eb9 2974 raise ExtractorError(f'Cannot identify player {player_url!r}')
545cc85d 2975 return id_m.group('id')
e40c758c 2976
404f611f 2977 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2978 player_id = self._extract_player_info(player_url)
2979 if player_id not in self._code_cache:
1276a43a 2980 code = self._download_webpage(
109dd3b2 2981 player_url, video_id, fatal=fatal,
2982 note='Downloading player ' + player_id,
add96eb9 2983 errnote=f'Download of {player_url} failed')
1276a43a 2984 if code:
2985 self._code_cache[player_id] = code
404f611f 2986 return self._code_cache.get(player_id)
109dd3b2 2987
e40c758c 2988 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2989 player_id = self._extract_player_info(player_url)
e0df6211 2990
c4417ddb 2991 # Read from filesystem cache
86e5f3ed 2992 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2993 assert os.path.basename(func_id) == func_id
a0e07d31 2994
ae61d108 2995 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2996 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2997
580ce007 2998 if not cache_spec:
2999 code = self._load_player(video_id, player_url)
404f611f 3000 if code:
109dd3b2 3001 res = self._parse_sig_js(code)
ac668111 3002 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 3003 cache_spec = [ord(c) for c in res(test_string)]
9809740b 3004 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 3005
3006 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 3007
60064c53 3008 def _print_sig_code(self, func, example_sig):
404f611f 3009 if not self.get_param('youtube_print_sig_code'):
3010 return
3011
edf3e38e
PH
3012 def gen_sig_code(idxs):
3013 def _genslice(start, end, step):
78caa52a 3014 starts = '' if start == 0 else str(start)
8bcc8756 3015 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 3016 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 3017 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
3018
3019 step = None
7af808a5
PH
3020 # Quelch pyflakes warnings - start will be set when step is set
3021 start = '(Never used)'
edf3e38e
PH
3022 for i, prev in zip(idxs[1:], idxs[:-1]):
3023 if step is not None:
3024 if i - prev == step:
3025 continue
3026 yield _genslice(start, prev, step)
3027 step = None
3028 continue
3029 if i - prev in [-1, 1]:
3030 step = i - prev
3031 start = prev
3032 continue
3033 else:
78caa52a 3034 yield 's[%d]' % prev
edf3e38e 3035 if step is None:
78caa52a 3036 yield 's[%d]' % i
edf3e38e
PH
3037 else:
3038 yield _genslice(start, i, step)
3039
ac668111 3040 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 3041 cache_res = func(test_string)
edf3e38e 3042 cache_spec = [ord(c) for c in cache_res]
78caa52a 3043 expr_code = ' + '.join(gen_sig_code(cache_spec))
add96eb9 3044 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3045 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3046 f' return {expr_code}\n')
69ea8ca4 3047 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 3048
e0df6211
PH
3049 def _parse_sig_js(self, jscode):
3050 funcname = self._search_regex(
abefc03f
S
3051 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3052 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
3053 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3054 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
4823ec9f 3055 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
31ce6e99 3056 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f 3057 # Obsolete patterns
4823ec9f 3058 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 3059 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
3060 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3061 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3062 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f 3063 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 3064 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
3065
3066 jsi = JSInterpreter(jscode)
3067 initial_function = jsi.extract_function(funcname)
e0df6211
PH
3068 return lambda s: initial_function([s])
3069
580ce007 3070 def _cached(self, func, *cache_id):
3071 def inner(*args, **kwargs):
3072 if cache_id not in self._player_cache:
3073 try:
3074 self._player_cache[cache_id] = func(*args, **kwargs)
3075 except ExtractorError as e:
3076 self._player_cache[cache_id] = e
3077 except Exception as e:
3078 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3079
3080 ret = self._player_cache[cache_id]
3081 if isinstance(ret, Exception):
3082 raise ret
3083 return ret
3084 return inner
3085
545cc85d 3086 def _decrypt_signature(self, s, video_id, player_url):
257a2501 3087 """Turn the encrypted s field into a working signature"""
580ce007 3088 extract_sig = self._cached(
3089 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3090 func = extract_sig(video_id, player_url, s)
3091 self._print_sig_code(func, s)
3092 return func(s)
404f611f 3093
3094 def _decrypt_nsig(self, s, video_id, player_url):
3095 """Turn the encrypted n field into a working signature"""
3096 if player_url is None:
3097 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 3098 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 3099
b505e851 3100 try:
3101 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3102 except ExtractorError as e:
3103 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 3104 if self.get_param('youtube_print_sig_code'):
3105 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 3106
25836db6 3107 try:
3108 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3109 ret = extract_nsig(jsi, func_code)(s)
3110 except JSInterpreter.Exception as e:
3111 try:
992dc6b4 3112 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 3113 except ExtractorError:
3114 raise e
3115 self.report_warning(
3116 f'Native nsig extraction failed: Trying with PhantomJS\n'
3117 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 3118 self.write_debug(e, only_once=True)
25836db6 3119
3120 args, func_body = func_code
3121 ret = jsi.execute(
3122 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3123 video_id=video_id, note='Executing signature code').strip()
580ce007 3124
3125 self.write_debug(f'Decrypted nsig {s} => {ret}')
3126 return ret
3127
90a1df30 3128 def _extract_n_function_name(self, jscode):
3129 funcname, idx = self._search_regex(
3130 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3131 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3132 if not idx:
3133 return funcname
3134
3135 return json.loads(js_to_json(self._search_regex(
337734d4 3136 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
90a1df30 3137 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3138
580ce007 3139 def _extract_n_function_code(self, video_id, player_url):
404f611f 3140 player_id = self._extract_player_info(player_url)
05deb747 3141 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3142 jscode = func_code or self._load_player(video_id, player_url)
3143 jsi = JSInterpreter(jscode)
404f611f 3144
3145 if func_code:
580ce007 3146 return jsi, player_id, func_code
404f611f 3147
b505e851 3148 func_name = self._extract_n_function_name(jscode)
3149
3150 # For redundancy
3151 func_code = self._search_regex(
add96eb9 3152 rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
b505e851 3153 # NB: The end of the regex is intentionally kept strict
add96eb9 3154 {{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''',
b505e851 3155 jscode, 'nsig function', group=('var', 'code'), default=None)
3156 if func_code:
3157 func_code = ([func_code[0]], func_code[1])
3158 else:
3159 self.write_debug('Extracting nsig function with jsinterp')
3160 func_code = jsi.extract_function_code(func_name)
3161
580ce007 3162 self.cache.store('youtube-nsig', player_id, func_code)
3163 return jsi, player_id, func_code
3164
3165 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3166 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3167
580ce007 3168 def extract_nsig(s):
25836db6 3169 try:
3170 ret = func([s])
3171 except JSInterpreter.Exception:
3172 raise
3173 except Exception as e:
3174 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3175
f6ca640b 3176 if ret.startswith('enhanced_except_'):
25836db6 3177 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3178 return ret
580ce007 3179
3180 return extract_nsig
e0df6211 3181
109dd3b2 3182 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3183 """
3184 Extract signatureTimestamp (sts)
3185 Required to tell API what sig/player version is in use.
3186 """
3187 sts = None
3188 if isinstance(ytcfg, dict):
3189 sts = int_or_none(ytcfg.get('STS'))
3190
3191 if not sts:
3192 # Attempt to extract from player
3193 if player_url is None:
3194 error_msg = 'Cannot extract signature timestamp without player_url.'
3195 if fatal:
3196 raise ExtractorError(error_msg)
3197 self.report_warning(error_msg)
3198 return
404f611f 3199 code = self._load_player(video_id, player_url, fatal=fatal)
3200 if code:
109dd3b2 3201 sts = int_or_none(self._search_regex(
3202 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3203 'JS player signature timestamp', group='sts', fatal=fatal))
3204 return sts
3205
11f9be09 3206 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3207 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3208 label = 'fully ' if is_full else ''
3209 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3210 expected_type=url_or_none)
3211 if not url:
3212 self.report_warning(f'Unable to mark {label}watched')
3213 return
14f25df2 3214 parsed_url = urllib.parse.urlparse(url)
3215 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3216
3217 # cpn generation algorithm is reverse engineered from base.js.
3218 # In fact it works even with dummy cpn.
3219 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
add96eb9 3220 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
06cc8f10
B
3221
3222 # # more consistent results setting it to right before the end
3223 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3224
3225 qs.update({
3226 'ver': ['2'],
3227 'cpn': [cpn],
3228 'cmt': video_length,
3229 'el': 'detailpage', # otherwise defaults to "shorts"
3230 })
3231
3232 if is_full:
3233 # these seem to mark watchtime "history" in the real world
3234 # they're required, so send in a single value
3235 qs.update({
5318156f 3236 'st': 0,
06cc8f10
B
3237 'et': video_length,
3238 })
3239
14f25df2 3240 url = urllib.parse.urlunparse(
3241 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3242
3243 self._download_webpage(
3244 url, video_id, f'Marking {label}watched',
3245 'Unable to mark watched', fatal=False)
d77ab8e2 3246
bfd973ec 3247 @classmethod
3248 def _extract_from_webpage(cls, url, webpage):
3249 # Invidious Instances
3250 # https://github.com/yt-dlp/yt-dlp/issues/195
3251 # https://github.com/iv-org/invidious/pull/1730
3252 mobj = re.search(
3253 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3254 webpage)
3255 if mobj:
3256 yield cls.url_result(mobj.group('url'), cls)
add96eb9 3257 raise cls.StopExtraction
bfd973ec 3258
3259 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3260
3261 # lazyYT YouTube embed
bfd973ec 3262 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3263 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3264
3265 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3266 for m in re.findall(r'''(?x)<div[^>]+
3267 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3268 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3269 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3270
97665381
PH
3271 @classmethod
3272 def extract_id(cls, url):
ae61d108 3273 video_id = cls.get_temp_id(url)
3274 if not video_id:
3275 raise ExtractorError(f'Invalid URL: {url}')
3276 return video_id
c5e8d7af 3277
7c365c21 3278 def _extract_chapters_from_json(self, data, duration):
3279 chapter_list = traverse_obj(
3280 data, (
3281 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
add96eb9 3282 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
7c365c21 3283 ), expected_type=list)
3284
22ccd542 3285 return self._extract_chapters_helper(
7c365c21 3286 chapter_list,
22ccd542 3287 start_function=lambda chapter: float_or_none(
7c365c21 3288 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
22ccd542 3289 title_function=lambda chapter: traverse_obj(
7c365c21 3290 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3291 duration=duration)
3292
3293 def _extract_chapters_from_engagement_panel(self, data, duration):
3294 content_list = traverse_obj(
8bdd16b4 3295 data,
7c365c21 3296 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3297 expected_type=list)
052e1350 3298 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3299 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3300
1890fc63 3301 return next(filter(None, (
22ccd542 3302 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3303 chapter_time, chapter_title, duration)
1890fc63 3304 for contents in content_list)), [])
7c365c21 3305
03e85ea9 3306 def _extract_heatmap(self, data):
3307 return traverse_obj(data, (
3308 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3309 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3310 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3311 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3312 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3313 'value': ('intensityScoreNormalized', {float_or_none}),
3314 })) or None
5caf30db 3315
8e15177b
JK
3316 def _extract_comment(self, entities, parent=None):
3317 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3318 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3319 return
3320
3321 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3322 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3323
3324 return {
3325 'id': comment_id,
3326 'parent': parent or 'root',
3327 **traverse_obj(comment_entity_payload, {
3328 'text': ('properties', 'content', 'content', {str}),
3329 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3330 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3331 'author': ('author', 'displayName', {str}),
3332 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3333 'author_is_uploader': ('author', 'isCreator', {bool}),
3334 'author_is_verified': ('author', 'isVerified', {bool}),
3335 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
add96eb9 3336 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
8e15177b
JK
3337 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3338 }, get_all=False),
3339 'is_favorited': (None if toolbar_entity_payload is None else
3340 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3341 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3342 'timestamp': self._parse_time_text(time_text),
3343 }
3344
3345 def _extract_comment_old(self, comment_renderer, parent=None):
a1c5d2ca
M
3346 comment_id = comment_renderer.get('commentId')
3347 if not comment_id:
3348 return
fe93e2c4 3349
c35448b7 3350 info = {
3351 'id': comment_id,
3352 'text': self._get_text(comment_renderer, 'contentText'),
3353 'like_count': self._get_count(comment_renderer, 'voteCount'),
3354 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3355 'author': self._get_text(comment_renderer, 'authorText'),
3356 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3357 'parent': parent or 'root',
3358 }
fe93e2c4 3359
c26f9b99 3360 # Timestamp is an estimate calculated from the current time and time_text
3361 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3362 timestamp = self._parse_time_text(time_text)
3363
c35448b7 3364 info.update({
3365 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3366 '_time_text': time_text,
3367 'timestamp': timestamp,
3368 })
fe93e2c4 3369
c35448b7 3370 info['author_url'] = urljoin(
3371 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3372 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3373 expected_type=str, get_all=False))
a1c5d2ca 3374
c35448b7 3375 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3376 if author_is_uploader is not None:
3377 info['author_is_uploader'] = author_is_uploader
3378
3379 comment_abr = traverse_obj(
89bed013 3380 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
c35448b7 3381 if comment_abr is not None:
3382 info['is_favorited'] = 'creatorHeart' in comment_abr
3383
14a14335 3384 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3385 if self._has_badge(badges, BadgeType.VERIFIED):
3386 info['author_is_verified'] = True
c35448b7 3387
3388 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3389 if is_pinned:
3390 info['is_pinned'] = True
3391
3392 return info
a1c5d2ca 3393
46383212 3394 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3395
3396 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3397
3398 def extract_header(contents):
2d6659b9 3399 _continuation = None
3400 for content in contents:
46383212 3401 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3402 expected_comment_count = self._get_count(
3403 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3404
18f8fba7 3405 if expected_comment_count is not None:
46383212 3406 tracker['est_total'] = expected_comment_count
3407 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3408 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3409
3410 sort_menu_item = try_get(
3411 comments_header_renderer,
3412 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3413 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3414
3415 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3416 if not _continuation:
3417 continue
3418
46383212 3419 sort_text = str_or_none(sort_menu_item.get('title'))
3420 if not sort_text:
2d6659b9 3421 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
add96eb9 3422 self.to_screen(f'Sorting comments by {sort_text.lower()}')
2d6659b9 3423 break
a2160aa4 3424 return _continuation
a1c5d2ca 3425
8e15177b 3426 def extract_thread(contents, entity_payloads):
a1c5d2ca 3427 if not parent:
46383212 3428 tracker['current_page_thread'] = 0
a1c5d2ca 3429 for content in contents:
46383212 3430 if not parent and tracker['total_parent_comments'] >= max_parents:
3431 yield
a1c5d2ca 3432 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
a1c5d2ca 3433
8e15177b
JK
3434 # old comment format
3435 if not entity_payloads:
3436 comment_renderer = get_first(
3437 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3438 expected_type=dict, default={})
3439
3440 comment = self._extract_comment_old(comment_renderer, parent)
3441
3442 # new comment format
3443 else:
3444 view_model = (
3445 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3446 or traverse_obj(content, ('commentViewModel', {dict})))
3447 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3448 if not comment_keys:
3449 continue
3450 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3451 comment = self._extract_comment(entities, parent)
3452 if comment:
3453 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3454
a1c5d2ca
M
3455 if not comment:
3456 continue
141a8dff 3457 comment_id = comment['id']
8e15177b 3458
c35448b7 3459 if comment.get('is_pinned'):
141a8dff 3460 tracker['pinned_comment_ids'].add(comment_id)
7f51861b 3461 # Sometimes YouTube may break and give us infinite looping comments.
3462 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
141a8dff 3463 if comment_id in tracker['seen_comment_ids']:
c35448b7 3464 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
141a8dff 3465 # Pinned comments may appear a second time in newest first sort
3466 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3467 continue
4dc4d847 3468 self.report_warning(
3469 'Detected YouTube comments looping. Stopping comment extraction '
3470 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
7f51861b 3471 yield
3472 else:
3473 tracker['seen_comment_ids'].add(comment['id'])
46383212 3474
3475 tracker['running_total'] += 1
3476 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3477 yield comment
46383212 3478
a1c5d2ca
M
3479 # Attempt to get the replies
3480 comment_replies_renderer = try_get(
3481 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3482
3483 if comment_replies_renderer:
46383212 3484 tracker['current_page_thread'] += 1
a1c5d2ca 3485 comment_entries_iter = self._comment_entries(
99e9e001 3486 comment_replies_renderer, ytcfg, video_id,
46383212 3487 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3488 yield from itertools.islice(comment_entries_iter, min(
3489 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3490
46383212 3491 # Keeps track of counts across recursive calls
3492 if not tracker:
add96eb9 3493 tracker = {
3494 'running_total': 0,
3495 'est_total': None,
3496 'current_page_thread': 0,
3497 'total_parent_comments': 0,
3498 'total_reply_comments': 0,
3499 'seen_comment_ids': set(),
3500 'pinned_comment_ids': set(),
3501 }
46383212 3502
3503 # TODO: Deprecated
2d6659b9 3504 # YouTube comments have a max depth of 2
46383212 3505 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3506 if max_depth:
da4db748 3507 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3508 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3509 if max_depth == 1 and parent:
3510 return
a1c5d2ca 3511
add96eb9 3512 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3513 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
2d6659b9 3514
46383212 3515 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3516
46383212 3517 response = None
6e634cbe 3518 is_forced_continuation = False
2d6659b9 3519 is_first_continuation = parent is None
6e634cbe 3520 if is_first_continuation and not continuation:
3521 # Sometimes you can get comments by generating the continuation yourself,
3522 # even if YouTube initially reports them being disabled - e.g. stories comments.
3523 # Note: if the comment section is actually disabled, YouTube may return a response with
3524 # required check_get_keys missing. So we will disable that check initially in this case.
3525 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3526 is_forced_continuation = True
a1c5d2ca 3527
18f8fba7 3528 continuation_items_path = (
3529 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
a1c5d2ca
M
3530 for page_num in itertools.count(0):
3531 if not continuation:
3532 break
46383212 3533 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
18f8fba7 3534 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
2d6659b9 3535 if page_num == 0:
3536 if is_first_continuation:
3537 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3538 else:
2d6659b9 3539 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3540 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3541 else:
add96eb9 3542 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
2d6659b9 3543 ' ' if parent else '', ' replies' if parent else '',
3544 page_num, comment_prog_str)
18f8fba7 3545
3546 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3547 # Ignore check if YouTube says the comment count is 0.
3548 check_get_keys = None
3549 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3550 check_get_keys = [[*continuation_items_path, ..., (
8e15177b 3551 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
e72e48c5
M
3552 try:
3553 response = self._extract_response(
3554 item_id=None, query=continuation,
3555 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
18f8fba7 3556 check_get_keys=check_get_keys)
e72e48c5
M
3557 except ExtractorError as e:
3558 # Ignore incomplete data error for replies if retries didn't work.
3559 # This is to allow any other parent comments and comment threads to be downloaded.
3560 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
4dc4d847 3561 if 'incomplete data' in str(e).lower() and parent:
3562 if self.get_param('ignoreerrors') in (True, 'only_download'):
3563 self.report_warning(
3564 'Received incomplete data for a comment reply thread and retrying did not help. '
3565 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3566 return
3567 else:
3568 raise ExtractorError(
3569 'Incomplete data received for comment reply thread. '
3570 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3571 expected=True)
3572 raise
6e634cbe 3573 is_forced_continuation = False
2d6659b9 3574 continuation = None
8e15177b 3575 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
18f8fba7 3576 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
46383212 3577 if is_first_continuation:
3578 continuation = extract_header(continuation_items)
3579 is_first_continuation = False
2d6659b9 3580 if continuation:
a1c5d2ca 3581 break
46383212 3582 continue
a1c5d2ca 3583
8e15177b 3584 for entry in extract_thread(continuation_items, mutations):
46383212 3585 if not entry:
3586 return
3587 yield entry
3588 continuation = self._extract_continuation({'contents': continuation_items})
3589 if continuation:
2d6659b9 3590 break
a1c5d2ca 3591
6e634cbe 3592 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3593 if message and not parent and tracker['running_total'] == 0:
3594 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3595 raise self.CommentsDisabled
6e634cbe 3596
3597 @staticmethod
3598 def _generate_comment_continuation(video_id):
3599 """
3600 Generates initial comment section continuation token from given video id
3601 """
3602 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3603 return base64.b64encode(token.encode()).decode()
3604
a2160aa4 3605 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3606 """Entry for comment extraction"""
2d6659b9 3607 def _real_comment_extract(contents):
aae16f6e 3608 renderer = next((
3609 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3610 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3611 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3612
a2160aa4 3613 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3614 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3615
109dd3b2 3616 @staticmethod
99e9e001 3617 def _get_checkok_params():
3618 return {'contentCheckOk': True, 'racyCheckOk': True}
3619
3620 @classmethod
3621 def _generate_player_context(cls, sts=None):
109dd3b2 3622 context = {
3623 'html5Preference': 'HTML5_PREF_WANTS',
3624 }
3625 if sts is not None:
3626 context['signatureTimestamp'] = sts
3627 return {
3628 'playbackContext': {
add96eb9 3629 'contentPlaybackContext': context,
a1a7907b 3630 },
add96eb9 3631 **cls._get_checkok_params(),
109dd3b2 3632 }
3633
e7e94f2a
D
3634 @staticmethod
3635 def _is_agegated(player_response):
3636 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3637 return True
e7e94f2a 3638
6839ae1f 3639 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3640 AGE_GATE_REASONS = (
3641 'confirm your age', 'age-restricted', 'inappropriate', # reason
3642 'age_verification_required', 'age_check_required', # status
3643 )
3644 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3645
3646 @staticmethod
3647 def _is_unplayable(player_response):
3648 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3649
50ac0e54 3650 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3651
11f9be09 3652 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3653 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3654 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3655 headers = self.generate_api_headers(
99e9e001 3656 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3657
6e634cbe 3658 yt_query = {
3659 'videoId': video_id,
6e634cbe 3660 }
ba06d77a 3661
546b2c28 3662 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 3663 if pp_arg:
3664 yt_query['params'] = pp_arg
50ac0e54 3665
11f9be09 3666 yt_query.update(self._generate_player_context(sts))
3667 return self._extract_response(
3668 item_id=video_id, ep='player', query=yt_query,
379e44ed 3669 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3670 default_client=client,
add96eb9 3671 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
11f9be09 3672 ) or None
3673
11f9be09 3674 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3675 requested_clients = []
12d8ea82 3676 android_clients = []
3677 default = ['ios', 'web']
000c15a4 3678 allowed_clients = sorted(
add96eb9 3679 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
000c15a4 3680 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3681 for client in self._configuration_arg('player_client'):
12d8ea82 3682 if client == 'default':
d0d012d4 3683 requested_clients.extend(default)
b4c055ba 3684 elif client == 'all':
3685 requested_clients.extend(allowed_clients)
12d8ea82 3686 elif client not in allowed_clients:
b4c055ba 3687 self.report_warning(f'Skipping unsupported client {client}')
12d8ea82 3688 elif client.startswith('android'):
3689 android_clients.append(client)
3690 else:
3691 requested_clients.append(client)
3692 # Force deprioritization of broken Android clients for format de-duplication
3693 requested_clients.extend(android_clients)
11f9be09 3694 if not requested_clients:
d0d012d4 3695 requested_clients = default
cf7e015f 3696
11f9be09 3697 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3698 requested_clients.extend(
e7e94f2a 3699 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3700
11f9be09 3701 return orderedSet(requested_clients)
cf7e015f 3702
5eedc208
SS
3703 def _invalid_player_response(self, pr, video_id):
3704 # YouTube may return a different video player response than expected.
3705 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3706 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3707 return pr_id
3708
50ac0e54 3709 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3710 initial_pr = None
3711 if webpage:
b7c47b74 3712 initial_pr = self._search_json(
3713 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3714
5eedc208
SS
3715 prs = []
3716 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3717 # Android player_response does not have microFormats which are needed for
3718 # extraction of some data. So we return the initial_pr with formats
3719 # stripped out even if not requested by the user
3720 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3721 prs.append({**initial_pr, 'streamingData': None})
3722
ae729626 3723 all_clients = set(clients)
c0bc527b 3724 clients = clients[::-1]
e7e94f2a 3725
ae729626 3726 def append_client(*client_names):
e7870111 3727 """ Append the first client name that exists but not already used """
ae729626 3728 for client_name in client_names:
e7870111
D
3729 actual_client = _split_innertube_client(client_name)[0]
3730 if actual_client in INNERTUBE_CLIENTS:
3731 if actual_client not in all_clients:
ae729626 3732 clients.append(client_name)
e7870111
D
3733 all_clients.add(actual_client)
3734 return
e7e94f2a 3735
b6de707d 3736 tried_iframe_fallback = False
3737 player_url = None
5eedc208 3738 skipped_clients = {}
c0bc527b 3739 while clients:
e7870111 3740 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3741 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3742 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3743 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3744
b6de707d 3745 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3746 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3747 if 'js' in self._configuration_arg('player_skip'):
3748 require_js_player = False
3749 player_url = None
3750
3751 if not player_url and not tried_iframe_fallback and require_js_player:
3752 player_url = self._download_player_url(video_id)
3753 tried_iframe_fallback = True
3754
379e44ed 3755 try:
3756 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3757 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3758 except ExtractorError as e:
5eedc208 3759 self.report_warning(e)
379e44ed 3760 continue
3761
5eedc208
SS
3762 if pr_id := self._invalid_player_response(pr, video_id):
3763 skipped_clients[client] = pr_id
3764 elif pr:
3765 # Save client name for introspection later
3766 name = short_client_name(client)
3767 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3768 sd[STREAMING_DATA_CLIENT_NAME] = name
3769 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3770 f[STREAMING_DATA_CLIENT_NAME] = name
3771 prs.append(pr)
c0bc527b 3772
e7e94f2a 3773 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3774 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3775 append_client(f'{base_client}_creator')
e7e94f2a 3776 elif self._is_agegated(pr):
e7870111
D
3777 if variant == 'tv_embedded':
3778 append_client(f'{base_client}_embedded')
3779 elif not variant:
3780 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3781
5eedc208
SS
3782 if skipped_clients:
3783 self.report_warning(
3784 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3785 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3786 if not prs:
3787 raise ExtractorError(
3788 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3789 elif not prs:
3790 raise ExtractorError('Failed to extract any player response')
b6de707d 3791 return prs, player_url
11f9be09 3792
4d37720a
L
3793 def _needs_live_processing(self, live_status, duration):
3794 if (live_status == 'is_live' and self.get_param('live_from_start')
d949c10c 3795 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
4d37720a
L
3796 return live_status
3797
3798 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
e389d172 3799 CHUNK_SIZE = 10 << 20
a4894d3e 3800 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3801 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3802 q = qualities([
2a9c6dcd 3803 # Normally tiny is the smallest video-only formats. But
3804 # audio-only formats with unknown quality may get tagged as tiny
3805 'tiny',
3806 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
add96eb9 3807 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
d3fc8074 3808 ])
6839ae1f 3809 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
58786a10 3810 format_types = self._configuration_arg('formats')
3811 all_formats = 'duplicate' in format_types
3812 if self._configuration_arg('include_duplicate_formats'):
3813 all_formats = True
3814 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3815 'Use formats=duplicate extractor argument instead')
9297939e 3816
e389d172 3817 def build_fragments(f):
3818 return LazyList({
3819 'url': update_url_query(f['url'], {
add96eb9 3820 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3821 }),
e389d172 3822 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3823
545cc85d 3824 for fmt in streaming_formats:
727029c5 3825 if fmt.get('targetDurationSec'):
545cc85d 3826 continue
321bf820 3827
cc2db878 3828 itag = str_or_none(fmt.get('itag'))
9297939e 3829 audio_track = fmt.get('audioTrack') or {}
9bb85699 3830 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
86cb9221 3831 if not all_formats:
3832 if stream_id in stream_ids:
3833 continue
9297939e 3834
cc2db878 3835 quality = fmt.get('quality')
2a9c6dcd 3836 height = int_or_none(fmt.get('height'))
d3fc8074 3837 if quality == 'tiny' or not quality:
3838 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3839 # The 3gp format (17) in android client has a quality of "small",
3840 # but is actually worse than other formats
3841 if itag == '17':
3842 quality = 'tiny'
3843 if quality:
3844 if itag:
3845 itag_qualities[itag] = quality
3846 if height:
3847 res_qualities[height] = quality
cc2db878 3848 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3849 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3850 # number of fragment that would subsequently requested with (`&sq=N`)
3851 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3852 continue
3853
545cc85d 3854 fmt_url = fmt.get('url')
3855 if not fmt_url:
14f25df2 3856 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3857 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3858 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3859 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3860 continue
52023f12 3861 try:
add96eb9 3862 fmt_url += '&{}={}'.format(
52023f12 3863 traverse_obj(sc, ('sp', -1)) or 'signature',
add96eb9 3864 self._decrypt_signature(encrypted_sig, video_id, player_url),
52023f12 3865 )
3866 except ExtractorError as e:
580ce007 3867 self.report_warning('Signature extraction failed: Some formats may be missing',
3868 video_id=video_id, only_once=True)
52023f12 3869 self.write_debug(e, only_once=True)
201e9eaa 3870 continue
545cc85d 3871
404f611f 3872 query = parse_qs(fmt_url)
3873 throttled = False
b2916526 3874 if query.get('n'):
404f611f 3875 try:
580ce007 3876 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3877 fmt_url = update_url_query(fmt_url, {
add96eb9 3878 'n': decrypt_nsig(query['n'][0], video_id, player_url),
580ce007 3879 })
404f611f 3880 except ExtractorError as e:
25836db6 3881 phantomjs_hint = ''
3882 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3883 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3884 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3885 if player_url:
3886 self.report_warning(
3887 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3888 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3889 self.write_debug(e, only_once=True)
3890 else:
3891 self.report_warning(
3892 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3893 video_id=video_id, only_once=True)
404f611f 3894 throttled = True
3895
86e3b822 3896 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3897 language_preference = (
3898 10 if audio_track.get('audioIsDefault') and 10
3899 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3900 else -1)
a25a4243 3901 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
0ad92dfb 3902 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3903 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3904 # Make sure to avoid false positives with small duration differences.
62b58c09 3905 # E.g. __2ABJjxzNo, ySuUZEjARPY
a25a4243 3906 is_damaged = try_call(lambda: format_duration < duration // 2)
08d30158 3907 if is_damaged:
0f06bcd7 3908 self.report_warning(
3909 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
c795c39f
L
3910
3911 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
12d8ea82 3912 # Android client formats are broken due to integrity check enforcement
3913 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
3914 is_broken = client_name and client_name.startswith(short_client_name('android'))
3915 if is_broken:
3916 self.report_warning(
3917 f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
3918 'They will be deprioritized', only_once=True)
3919
51a07b0d 3920 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
97afb093 3921 fps = int_or_none(fmt.get('fps')) or 0
545cc85d 3922 dct = {
3923 'asr': int_or_none(fmt.get('audioSampleRate')),
3924 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3925 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3926 'format_note': join_nonempty(
392389b7 3927 join_nonempty(audio_track.get('displayName'),
3928 language_preference > 0 and ' (default)', delim=''),
51a07b0d 3929 name, fmt.get('isDrc') and 'DRC',
a4166234 3930 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3931 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
12d8ea82 3932 throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
86cb9221 3933 (self.get_param('verbose') or all_formats) and client_name,
c795c39f 3934 delim=', '),
91e5e839 3935 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
51a07b0d 3936 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
3937 + (100 if 'Premium' in name else 0)),
97afb093 3938 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
a4166234 3939 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3940 'height': height,
9bb85699 3941 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3942 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3943 'tbr': tbr,
a25a4243 3944 'filesize_approx': filesize_from_tbr(tbr, format_duration),
545cc85d 3945 'url': fmt_url,
2a9c6dcd 3946 'width': int_or_none(fmt.get('width')),
ab6df717 3947 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3948 'desc' if language_preference < -1 else '') or None,
ab6df717 3949 'language_preference': language_preference,
12d8ea82 3950 # Strictly de-prioritize broken, damaged and 3gp formats
3951 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3952 }
60bdb7bd 3953 mime_mobj = re.match(
3954 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3955 if mime_mobj:
3956 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3957 dct.update(parse_codecs(mime_mobj.group(2)))
86cb9221 3958 if itag:
3959 itags[itag].add(('https', dct.get('language')))
3960 stream_ids.append(stream_id)
c9abebb8 3961 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3962 if single_stream and dct.get('ext'):
3963 dct['container'] = dct['ext'] + '_dash'
86cb9221 3964
58786a10 3965 if (all_formats or 'dashy' in format_types) and dct['filesize']:
86cb9221 3966 yield {
3967 **dct,
3968 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
5038f6d7 3969 'protocol': 'http_dash_segments',
e389d172 3970 'fragments': build_fragments(dct),
86cb9221 3971 }
58786a10 3972 if all_formats or 'dashy' not in format_types:
3973 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3974 yield dct
545cc85d 3975
4d37720a 3976 needs_live_processing = self._needs_live_processing(live_status, duration)
58786a10 3977 skip_bad_formats = 'incomplete' not in format_types
3978 if self._configuration_arg('include_incomplete_formats'):
3979 skip_bad_formats = False
3980 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3981 'Use formats=incomplete extractor argument instead')
4d37720a
L
3982
3983 skip_manifests = set(self._configuration_arg('skip'))
3984 if (not self.get_param('youtube_include_hls_manifest', True)
3985 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3986 or needs_live_processing and skip_bad_formats):
3987 skip_manifests.add('hls')
3988
0f06bcd7 3989 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3990 skip_manifests.add('dash')
3991 if self._configuration_arg('include_live_dash'):
3992 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
58786a10 3993 'Use formats=incomplete extractor argument instead')
4d37720a
L
3994 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3995 skip_manifests.add('dash')
5d3a0e79 3996
c795c39f 3997 def process_manifest_format(f, proto, client_name, itag):
a4894d3e 3998 key = (proto, f.get('language'))
86cb9221 3999 if not all_formats and key in itags[itag]:
a4894d3e 4000 return False
4001 itags[itag].add(key)
4002
86cb9221 4003 if itag and all_formats:
4004 f['format_id'] = f'{itag}-{proto}'
4005 elif any(p != proto for p, _ in itags[itag]):
a4894d3e 4006 f['format_id'] = f'{itag}-{proto}'
4007 elif itag:
a0bb6ce5 4008 f['format_id'] = itag
a0bb6ce5 4009
94ed638a 4010 if f.get('source_preference') is None:
4011 f['source_preference'] = -1
4012
1e75d97d 4013 if itag in ('616', '235'):
4014 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
94ed638a 4015 f['source_preference'] += 100
1e75d97d 4016
b25cac65 4017 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 4018 if f['quality'] == -1 and f.get('height'):
4019 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
ad54c913 4020 if self.get_param('verbose') or all_formats:
c795c39f 4021 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
97afb093 4022 if f.get('fps') and f['fps'] <= 1:
4023 del f['fps']
94ed638a 4024
4025 if proto == 'hls' and f.get('has_drm'):
4026 f['has_drm'] = 'maybe'
4027 f['source_preference'] -= 5
a0bb6ce5 4028 return True
2a9c6dcd 4029
c646d76f 4030 subtitles = {}
11f9be09 4031 for sd in streaming_data:
c795c39f
L
4032 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4033
4d37720a 4034 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 4035 if hls_manifest_url:
4d37720a
L
4036 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4037 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 4038 subtitles = self._merge_subtitles(subs, subtitles)
4039 for f in fmts:
c795c39f 4040 if process_manifest_format(f, 'hls', client_name, self._search_regex(
a0bb6ce5 4041 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4042 yield f
545cc85d 4043
4d37720a 4044 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 4045 if dash_manifest_url:
c646d76f 4046 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4047 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4048 for f in formats:
c795c39f 4049 if process_manifest_format(f, 'dash', client_name, f['format_id']):
a0bb6ce5 4050 f['filesize'] = int_or_none(self._search_regex(
4051 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 4052 if needs_live_processing:
adbc4ec4
THD
4053 f['is_from_start'] = True
4054
a0bb6ce5 4055 yield f
c646d76f 4056 yield subtitles
11f9be09 4057
720c3099 4058 def _extract_storyboard(self, player_responses, duration):
4059 spec = get_first(
4060 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 4061 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4062 if not base_url:
720c3099 4063 return
720c3099 4064 L = len(spec) - 1
4065 for i, args in enumerate(spec):
4066 args = args.split('#')
4067 counts = list(map(int_or_none, args[:5]))
4068 if len(args) != 8 or not all(counts):
4069 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4070 continue
4071 width, height, frame_count, cols, rows = counts
4072 N, sigh = args[6:]
4073
4074 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4075 fragment_count = frame_count / (cols * rows)
4076 fragment_duration = duration / fragment_count
4077 yield {
4078 'format_id': f'sb{i}',
4079 'format_note': 'storyboard',
4080 'ext': 'mhtml',
4081 'protocol': 'mhtml',
4082 'acodec': 'none',
4083 'vcodec': 'none',
4084 'url': url,
4085 'width': width,
4086 'height': height,
45e8a04e 4087 'fps': frame_count / duration,
4088 'rows': rows,
4089 'columns': cols,
720c3099 4090 'fragments': [{
b3edc806 4091 'url': url.replace('$M', str(j)),
720c3099 4092 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4093 } for j in range(math.ceil(fragment_count))],
4094 }
4095
adbc4ec4 4096 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 4097 webpage = None
4098 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 4099 query = {'bpctr': '9999999999', 'has_verified': '1'}
546b2c28 4100 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 4101 if pp:
4102 query['pp'] = pp
b6de707d 4103 webpage = self._download_webpage(
50ac0e54 4104 webpage_url, video_id, fatal=False, query=query)
11f9be09 4105
4106 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 4107
b6de707d 4108 player_responses, player_url = self._extract_player_responses(
11f9be09 4109 self._get_requested_clients(url, smuggled_data),
50ac0e54 4110 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 4111
adbc4ec4
THD
4112 return webpage, master_ytcfg, player_responses, player_url
4113
a1b2d843 4114 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
4115 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4116 is_live = get_first(video_details, 'isLive')
4117 if is_live is None:
4118 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
4119 live_content = get_first(video_details, 'isLiveContent')
4120 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
4121 post_live = get_first(video_details, 'isPostLiveDvr')
4122 live_status = ('post_live' if post_live
4123 else 'is_live' if is_live
4124 else 'is_upcoming' if is_upcoming
6678a4f0 4125 else 'was_live' if live_content
4126 else 'not_live' if False in (is_live, live_content)
4127 else None)
6839ae1f 4128 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 4129 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
94ed638a 4130 if all(f.get('has_drm') for f in formats):
4131 # If there are no formats that definitely don't have DRM, all have DRM
4132 for f in formats:
4133 f['has_drm'] = True
adbc4ec4 4134
4d37720a 4135 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
4136
4137 def _real_extract(self, url):
4138 url, smuggled_data = unsmuggle_url(url, {})
4139 video_id = self._match_id(url)
4140
4141 base_url = self.http_scheme() + '//www.youtube.com/'
4142 webpage_url = base_url + 'watch?v=' + video_id
4143
4144 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4145
11f9be09 4146 playability_statuses = traverse_obj(
6839ae1f 4147 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 4148
4149 trailer_video_id = get_first(
4150 playability_statuses,
4151 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4152 expected_type=str)
4153 if trailer_video_id:
4154 return self.url_result(
4155 trailer_video_id, self.ie_key(), trailer_video_id)
4156
4157 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4158 if webpage else (lambda x: None))
4159
6839ae1f 4160 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 4161 microformats = traverse_obj(
4162 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 4163 expected_type=dict)
c26f9b99 4164
4165 translated_title = self._get_text(microformats, (..., 'title'))
4166 video_title = (self._preferred_lang and translated_title
4167 or get_first(video_details, 'title') # primary
4168 or translated_title
4169 or search_meta(['og:title', 'twitter:title', 'title']))
4170 translated_description = self._get_text(microformats, (..., 'description'))
4171 original_description = get_first(video_details, 'shortDescription')
4172 video_description = (
4173 self._preferred_lang and translated_description
4174 # If original description is blank, it will be an empty string.
4175 # Do not prefer translated description in this case.
4176 or original_description if original_description is not None else translated_description)
11f9be09 4177
d89257f3 4178 multifeed_metadata_list = get_first(
4179 player_responses,
4180 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4181 expected_type=str)
4182 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4183 if self.get_param('noplaylist'):
add96eb9 4184 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
d89257f3 4185 else:
4186 entries = []
4187 feed_ids = []
4188 for feed in multifeed_metadata_list.split(','):
4189 # Unquote should take place before split on comma (,) since textual
4190 # fields may contain comma as well (see
4191 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 4192 feed_data = urllib.parse.parse_qs(
ac668111 4193 urllib.parse.unquote_plus(feed))
d89257f3 4194
4195 def feed_entry(name):
4196 return try_get(
14f25df2 4197 feed_data, lambda x: x[name][0], str)
d89257f3 4198
4199 feed_id = feed_entry('id')
4200 if not feed_id:
4201 continue
4202 feed_title = feed_entry('title')
4203 title = video_title
4204 if feed_title:
add96eb9 4205 title += f' ({feed_title})'
d89257f3 4206 entries.append({
4207 '_type': 'url_transparent',
4208 'ie_key': 'Youtube',
4209 'url': smuggle_url(
add96eb9 4210 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
d89257f3 4211 {'force_singlefeed': True}),
4212 'title': title,
4213 })
4214 feed_ids.append(feed_id)
4215 self.to_screen(
add96eb9 4216 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4217 ', '.join(feed_ids), video_id))
d89257f3 4218 return self.playlist_result(
4219 entries, video_id, video_title, video_description)
11f9be09 4220
9da6612b 4221 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4222 or int_or_none(get_first(microformats, 'lengthSeconds'))
4223 or parse_duration(search_meta('duration')) or None)
a1b2d843 4224
4d37720a
L
4225 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4226 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4227 if live_status == 'post_live':
4228 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 4229
545cc85d 4230 if not formats:
11f9be09 4231 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 4232 self.report_drm(video_id)
11f9be09 4233 pemr = get_first(
4234 playability_statuses,
4235 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4236 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4237 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 4238 if subreason:
545cc85d 4239 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 4240 countries = get_first(microformats, 'availableCountries')
545cc85d 4241 if not countries:
4242 regions_allowed = search_meta('regionsAllowed')
4243 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 4244 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 4245 reason += f'. {subreason}'
545cc85d 4246 if reason:
b7da73eb 4247 self.raise_no_formats(reason, expected=True)
bf1317d2 4248
11f9be09 4249 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 4250 if not keywords and webpage:
4251 keywords = [
4252 unescapeHTML(m.group('content'))
4253 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4254 for keyword in keywords:
4255 if keyword.startswith('yt:stretch='):
201c1459 4256 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4257 if mobj:
4258 # NB: float is intentional for forcing float division
4259 w, h = (float(v) for v in mobj.groups())
4260 if w > 0 and h > 0:
4261 ratio = w / h
4262 for f in formats:
4263 if f.get('vcodec') != 'none':
4264 f['stretched_ratio'] = ratio
4265 break
a709d873 4266 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4267 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4268 if thumbnail_url:
4269 thumbnails.append({
4270 'url': thumbnail_url,
ff2751ac 4271 })
fccf5021 4272 original_thumbnails = thumbnails.copy()
4273
0ba692ac 4274 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4275 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4276 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4277 thumbnail_names = [
962ffcf8 4278 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4279 # in resolution, these are not the custom thumbnail. So de-prioritize them
4280 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
add96eb9 4281 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
cca80fe6 4282 ]
cca80fe6 4283 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4284 thumbnails.extend({
4285 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4286 video_id=video_id, name=name, ext=ext,
4d37720a 4287 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4288 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4289 for thumb in thumbnails:
cca80fe6 4290 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4291 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4292 self._remove_duplicate_formats(thumbnails)
fccf5021 4293 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4294
7ea65411 4295 category = get_first(microformats, 'category') or search_meta('genre')
7666b936 4296 channel_id = self.ucid_or_none(str_or_none(
7ea65411 4297 get_first(video_details, 'channelId')
4298 or get_first(microformats, 'externalChannelId')
7666b936 4299 or search_meta('channelId')))
7ea65411 4300 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4301
adbc4ec4
THD
4302 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4303 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4304 if not duration and live_end_time and live_start_time:
4305 duration = live_end_time - live_start_time
4306
4d37720a
L
4307 needs_live_processing = self._needs_live_processing(live_status, duration)
4308
4309 def is_bad_format(fmt):
4310 if needs_live_processing and not fmt.get('is_from_start'):
4311 return True
4312 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4313 and fmt.get('protocol') == 'http_dash_segments'):
4314 return True
4315
4316 for fmt in filter(is_bad_format, formats):
4317 fmt['preference'] = (fmt.get('preference') or -1) - 10
d949c10c 4318 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4d37720a
L
4319
4320 if needs_live_processing:
4321 self._prepare_live_from_start_formats(
4322 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4323
720c3099 4324 formats.extend(self._extract_storyboard(player_responses, duration))
4325
7666b936 4326 channel_handle = self.handle_from_url(owner_profile_url)
4327
545cc85d 4328 info = {
4329 'id': video_id,
39ca3b5c 4330 'title': video_title,
545cc85d 4331 'formats': formats,
4332 'thumbnails': thumbnails,
fccf5021 4333 # The best thumbnail that we are sure exists. Prevents unnecessary
4334 # URL checking if user don't care about getting the best possible thumbnail
4335 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4336 'description': video_description,
545cc85d 4337 'channel_id': channel_id,
7666b936 4338 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
545cc85d 4339 'duration': duration,
4340 'view_count': int_or_none(
11f9be09 4341 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4342 or search_meta('interactionCount')),
11f9be09 4343 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4344 'age_limit': 18 if (
11f9be09 4345 get_first(microformats, 'isFamilySafe') is False
545cc85d 4346 or search_meta('isFamilyFriendly') == 'false'
4347 or search_meta('og:restrictions:age') == '18+') else 0,
4348 'webpage_url': webpage_url,
4349 'categories': [category] if category else None,
4350 'tags': keywords,
11f9be09 4351 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4352 'live_status': live_status,
adbc4ec4 4353 'release_timestamp': live_start_time,
9f14daf2 4354 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
add96eb9 4355 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
545cc85d 4356 }
b477fc13 4357
c646d76f 4358 subtitles = {}
3944e7af 4359 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4360 if pctr:
ecdc9049 4361 def get_lang_code(track):
4362 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4363 or track.get('languageCode'))
4364
4365 # Converted into dicts to remove duplicates
4366 captions = {
4367 get_lang_code(sub): sub
6839ae1f 4368 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4369 translation_languages = {
4370 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4371 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4372
774d79cc 4373 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4374 lang_subs = container.setdefault(lang_code, [])
545cc85d 4375 for fmt in self._SUBTITLE_FORMATS:
4376 query.update({
4377 'fmt': fmt,
4378 })
4379 lang_subs.append({
4380 'ext': fmt,
60f393e4 4381 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4382 'name': sub_name,
545cc85d 4383 })
7e72694b 4384
07b47084 4385 # NB: Constructing the full subtitle dictionary is slow
4386 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4387 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4388 for lang_code, caption_track in captions.items():
4389 base_url = caption_track.get('baseUrl')
1235d333 4390 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4391 if not base_url:
4392 continue
ecdc9049 4393 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4394 if caption_track.get('kind') != 'asr':
545cc85d 4395 if not lang_code:
4396 continue
4397 process_language(
ecdc9049 4398 subtitles, base_url, lang_code, lang_name, {})
4399 if not caption_track.get('isTranslatable'):
4400 continue
3944e7af 4401 for trans_code, trans_name in translation_languages.items():
4402 if not trans_code:
545cc85d 4403 continue
1235d333 4404 orig_trans_code = trans_code
71eb82d1 4405 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4406 if not get_translated_subs:
18e49408 4407 continue
ecdc9049 4408 trans_code += f'-{lang_code}'
a70635b8 4409 trans_name += format_field(lang_name, None, ' from %s')
1235d333 4410 if lang_code == f'a-{orig_trans_code}':
ff9b0e07 4411 # Set audio language based on original subtitles
4412 for f in formats:
4413 if f.get('acodec') != 'none' and not f.get('language'):
4414 f['language'] = orig_trans_code
4415 # Add an "-orig" label to the original language so that it can be distinguished.
4416 # The subs are returned without "-orig" as well for compatibility
0c8d9e5f 4417 process_language(
d49669ac 4418 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4419 # Setting tlang=lang returns damaged subtitles.
d49669ac 4420 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4421 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4422
4423 info['automatic_captions'] = automatic_captions
4424 info['subtitles'] = subtitles
7e72694b 4425
14f25df2 4426 parsed_url = urllib.parse.urlparse(url)
545cc85d 4427 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4428 query = urllib.parse.parse_qs(component)
545cc85d 4429 for k, v in query.items():
4430 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4431 d_k += '_time'
4432 if d_k not in info and k in s_ks:
add96eb9 4433 info[d_k] = parse_duration(v[0])
822b9d9c
RA
4434
4435 # Youtube Music Auto-generated description
71dc18fa
BT
4436 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4437 # XXX: Causes catastrophic backtracking if description has "·"
4438 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4439 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4440 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
1890fc63 4441 mobj = re.search(
4442 r'''(?xs)
71dc18fa
BT
4443 (?=(?P<track>[^\n·]+))(?P=track)·
4444 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4445 (?=(?P<album>[^\n]+))(?P=album)\n
1890fc63 4446 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4447 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
71dc18fa
BT
4448 (.+?\nArtist\s*:\s*
4449 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4450 )?.+\nAuto-generated\ by\ YouTube\.\s*$
1890fc63 4451 ''', video_description)
822b9d9c 4452 if mobj:
822b9d9c
RA
4453 release_year = mobj.group('release_year')
4454 release_date = mobj.group('release_date')
4455 if release_date:
4456 release_date = release_date.replace('-', '')
4457 if not release_year:
545cc85d 4458 release_year = release_date[:4]
4459 info.update({
4460 'album': mobj.group('album'.strip()),
104a7b5a
L
4461 'artists': ([a] if (a := mobj.group('clean_artist'))
4462 else [a.strip() for a in mobj.group('artist').split('·')]),
545cc85d 4463 'track': mobj.group('track').strip(),
4464 'release_date': release_date,
cc2db878 4465 'release_year': int_or_none(release_year),
545cc85d 4466 })
7e72694b 4467
545cc85d 4468 initial_data = None
4469 if webpage:
56ba69e4 4470 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
607510b9 4471 if not traverse_obj(initial_data, 'contents'):
4472 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4473 initial_data = None
545cc85d 4474 if not initial_data:
99e9e001 4475 query = {'videoId': video_id}
4476 query.update(self._get_checkok_params())
109dd3b2 4477 initial_data = self._extract_response(
4478 item_id=video_id, ep='next', fatal=False,
607510b9 4479 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
99e9e001 4480 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4481 note='Downloading initial data API JSON')
545cc85d 4482
0df111a3 4483 info['comment_count'] = traverse_obj(initial_data, (
4484 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
add96eb9 4485 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
0df111a3 4486 ), (
4487 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
add96eb9 4488 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
071670cb 4489 ), expected_type=self._get_count, get_all=False)
0df111a3 4490
19a03940 4491 try: # This will error if there is no livechat
c60ee3a2 4492 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4493 except (KeyError, IndexError, TypeError):
4494 pass
4495 else:
ecdc9049 4496 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4497 # url is needed to set cookies
4498 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4499 'video_id': video_id,
4500 'ext': 'json',
4d37720a
L
4501 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4502 else 'youtube_live_chat_replay'),
c60ee3a2 4503 }]
545cc85d 4504
4505 if initial_data:
7c365c21 4506 info['chapters'] = (
4507 self._extract_chapters_from_json(initial_data, duration)
4508 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4509 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4510 or None)
545cc85d 4511
03e85ea9 4512 info['heatmap'] = self._extract_heatmap(initial_data)
5caf30db 4513
17322130 4514 contents = traverse_obj(
4515 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4516 expected_type=list, default=[])
4517
4518 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4519 if vpir:
4520 stl = vpir.get('superTitleLink')
4521 if stl:
4522 stl = self._get_text(stl)
4523 if try_get(
4524 vpir,
4525 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4526 info['location'] = stl
4527 else:
affc4fef 4528 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4529 if mobj:
545cc85d 4530 info.update({
17322130 4531 'series': mobj.group(1),
4532 'season_number': int(mobj.group(2)),
4533 'episode_number': int(mobj.group(3)),
545cc85d 4534 })
17322130 4535 for tlb in (try_get(
4536 vpir,
4537 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4538 list) or []):
3ffb2f5b 4539 tbrs = variadic(
4540 traverse_obj(
6839ae1f
SS
4541 tlb, ('toggleButtonRenderer', ...),
4542 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4543 for tbr in tbrs:
4544 for getter, regex in [(
4545 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4546 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4547 lambda x: x['accessibility'],
4548 lambda x: x['accessibilityData']['accessibilityData'],
4549 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4550 label = (try_get(tbr, getter, dict) or {}).get('label')
4551 if label:
4552 mobj = re.match(regex, label)
4553 if mobj:
4554 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4555 break
6b5d93b0
PG
4556
4557 info['like_count'] = traverse_obj(vpir, (
4558 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4559 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4560 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4561 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4562
867c66ff
M
4563 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4564 if vcr:
4565 vc = self._get_count(vcr, 'viewCount')
4566 # Upcoming premieres with waiting count are treated as live here
4567 if vcr.get('isLive'):
4568 info['concurrent_view_count'] = vc
4569 elif info.get('view_count') is None:
4570 info['view_count'] = vc
4571
17322130 4572 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4573 if vsir:
4574 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4575 info.update({
4576 'channel': self._get_text(vor, 'title'),
4577 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4578
7666b936 4579 if not channel_handle:
4580 channel_handle = self.handle_from_url(
4581 traverse_obj(vor, (
4582 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4583 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4584 {str}), get_all=False))
4585
17322130 4586 rows = try_get(
4587 vsir,
4588 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4589 list) or []
4590 multiple_songs = False
4591 for row in rows:
4592 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4593 multiple_songs = True
4594 break
4595 for row in rows:
4596 mrr = row.get('metadataRowRenderer') or {}
4597 mrr_title = mrr.get('title')
4598 if not mrr_title:
4599 continue
4600 mrr_title = self._get_text(mrr, 'title')
4601 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4602 if mrr_title == 'License':
4603 info['license'] = mrr_contents_text
4604 elif not multiple_songs:
4605 if mrr_title == 'Album':
4606 info['album'] = mrr_contents_text
4607 elif mrr_title == 'Artist':
104a7b5a 4608 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
17322130 4609 elif mrr_title == 'Song':
4610 info['track'] = mrr_contents_text
8213ce28 4611 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4612 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4613 info['channel_is_verified'] = True
545cc85d 4614
7666b936 4615 info.update({
4616 'uploader': info.get('channel'),
4617 'uploader_id': channel_handle,
4618 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4619 })
96a134de 4620
4621 # We only want timestamp IF it has time precision AND a timezone
4622 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4623 timestamp = (
4624 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4625 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4626 )
4627 upload_date = (
4628 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4629 (
4630 unified_strdate(get_first(microformats, 'uploadDate'))
4631 or unified_strdate(search_meta('uploadDate'))
4632 ))
4633
4634 # In the case we cannot get the timestamp:
17322130 4635 # The upload date for scheduled, live and past live streams / premieres in microformats
4636 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4637 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
96a134de 4638 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4639 # this should be in UTC, as configured in the cookie/client context
c26f9b99 4640 upload_date = strftime_or_none(
ad54c913 4641 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
96a134de 4642
17322130 4643 info['upload_date'] = upload_date
96a134de 4644 info['timestamp'] = timestamp
992f9a73 4645
ef79d20d 4646 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4647 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
c305a25c 4648 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
bb5a54e6 4649 if upload_datetime >= datetime_from_str('today-2days'):
ef79d20d 4650 for fmt in info['formats']:
4651 if fmt.get('protocol') == 'm3u8_native':
4652 fmt['__needs_testing'] = True
4653
104a7b5a 4654 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
545cc85d 4655 v = info.get(s_k)
4656 if v:
4657 info[d_k] = v
b84071c0 4658
14a14335 4659 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
c26f9b99 4660
4661 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4662 or get_first(video_details, 'isPrivate', expected_type=bool))
4663
4664 info['availability'] = (
4665 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4666 else self._availability(
4667 is_private=is_private,
4668 needs_premium=(
4669 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4670 or False if initial_data and is_private is not None else None),
4671 needs_subscription=(
4672 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4673 or False if initial_data and is_private is not None else None),
4674 needs_auth=info['age_limit'] >= 18,
4675 is_unlisted=None if is_private is None else (
4676 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4677 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4678
a2160aa4 4679 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4680
11f9be09 4681 self.mark_watched(video_id, player_responses)
d77ab8e2 4682
545cc85d 4683 return info
c5e8d7af 4684
a61fd4cf 4685
a6213a49 4686class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4687 @staticmethod
4688 def passthrough_smuggled_data(func):
bd7e919a 4689 def _smuggle(info, smuggled_data):
4690 if info.get('_type') not in ('url', 'url_transparent'):
4691 return info
4692 if smuggled_data.get('is_music_url'):
4693 parsed_url = urllib.parse.urlparse(info['url'])
4694 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4695 smuggled_data.pop('is_music_url')
4696 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4697 if smuggled_data:
4698 info['url'] = smuggle_url(info['url'], smuggled_data)
4699 return info
182bda88 4700
4701 @functools.wraps(func)
4702 def wrapper(self, url):
4703 url, smuggled_data = unsmuggle_url(url, {})
4704 if self.is_music_url(url):
4705 smuggled_data['is_music_url'] = True
4706 info_dict = func(self, url, smuggled_data)
bd7e919a 4707 if smuggled_data:
4708 _smuggle(info_dict, smuggled_data)
4709 if info_dict.get('entries'):
a8c754cc 4710 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4711 return info_dict
4712 return wrapper
4713
8bdd16b4 4714 @staticmethod
cd7c66cf 4715 def _extract_basic_item_renderer(item):
4716 # Modified from _extract_grid_item_renderer
201c1459 4717 known_basic_renderers = (
add96eb9 4718 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
cd7c66cf 4719 )
4720 for key, renderer in item.items():
201c1459 4721 if not isinstance(renderer, dict):
cd7c66cf 4722 continue
201c1459 4723 elif key in known_basic_renderers:
4724 return renderer
4725 elif key.startswith('grid') and key.endswith('Renderer'):
4726 return renderer
8bdd16b4 4727
c7335551 4728 def _extract_channel_renderer(self, renderer):
7666b936 4729 channel_id = self.ucid_or_none(renderer['channelId'])
c7335551 4730 title = self._get_text(renderer, 'title')
7666b936 4731 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
7666b936 4732 channel_handle = self.handle_from_url(
4733 traverse_obj(renderer, (
4734 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4735 ('browseEndpoint', 'canonicalBaseUrl')),
4736 {str}), get_all=False))
14a14335 4737 if not channel_handle:
4738 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4739 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
c7335551
M
4740 return {
4741 '_type': 'url',
4742 'url': channel_url,
4743 'id': channel_id,
4744 'ie_key': YoutubeTabIE.ie_key(),
4745 'channel': title,
7666b936 4746 'uploader': title,
c7335551
M
4747 'channel_id': channel_id,
4748 'channel_url': channel_url,
4749 'title': title,
7666b936 4750 'uploader_id': channel_handle,
4751 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
14a14335 4752 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4753 # However, in feed/channels this is set correctly to the subscriber count
4754 'channel_follower_count': traverse_obj(
4755 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
c7335551 4756 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
14a14335 4757 'playlist_count': (
4758 # videoCountText may be the subscriber count
4759 self._get_count(renderer, 'videoCountText')
4760 if self._get_count(renderer, 'subscriberCountText') is not None else None),
c7335551 4761 'description': self._get_text(renderer, 'descriptionSnippet'),
8213ce28 4762 'channel_is_verified': True if self._has_badge(
4763 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
c7335551
M
4764 }
4765
8bdd16b4 4766 def _grid_entries(self, grid_renderer):
4767 for item in grid_renderer['items']:
4768 if not isinstance(item, dict):
39b62db1 4769 continue
cd7c66cf 4770 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4771 if not isinstance(renderer, dict):
4772 continue
052e1350 4773 title = self._get_text(renderer, 'title')
fe93e2c4 4774
8bdd16b4 4775 # playlist
4776 playlist_id = renderer.get('playlistId')
4777 if playlist_id:
4778 yield self.url_result(
add96eb9 4779 f'https://www.youtube.com/playlist?list={playlist_id}',
8bdd16b4 4780 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4781 video_title=title)
201c1459 4782 continue
8bdd16b4 4783 # video
4784 video_id = renderer.get('videoId')
4785 if video_id:
4786 yield self._extract_video(renderer)
201c1459 4787 continue
8bdd16b4 4788 # channel
4789 channel_id = renderer.get('channelId')
4790 if channel_id:
c7335551 4791 yield self._extract_channel_renderer(renderer)
201c1459 4792 continue
4793 # generic endpoint URL support
4794 ep_url = urljoin('https://www.youtube.com/', try_get(
4795 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4796 str))
201c1459 4797 if ep_url:
4798 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4799 if ie.suitable(ep_url):
4800 yield self.url_result(
4801 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4802 break
8bdd16b4 4803
16aa9ea4 4804 def _music_reponsive_list_entry(self, renderer):
4805 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4806 if video_id:
69a40e4a 4807 title = traverse_obj(renderer, (
4808 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4809 'text', 'runs', 0, 'text'))
16aa9ea4 4810 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
69a40e4a 4811 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
16aa9ea4 4812 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4813 if playlist_id:
4814 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4815 if video_id:
4816 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4817 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4818 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4819 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4820 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4821 if browse_id:
4822 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4823 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4824
3d3dddc9 4825 def _shelf_entries_from_content(self, shelf_renderer):
4826 content = shelf_renderer.get('content')
4827 if not isinstance(content, dict):
8bdd16b4 4828 return
cd7c66cf 4829 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4830 if renderer:
4831 # TODO: add support for nested playlists so each shelf is processed
4832 # as separate playlist
4833 # TODO: this includes only first N items
86e5f3ed 4834 yield from self._grid_entries(renderer)
3d3dddc9 4835 renderer = content.get('horizontalListRenderer')
4836 if renderer:
add96eb9 4837 # TODO: handle case
3d3dddc9 4838 pass
8bdd16b4 4839
29f7c58a 4840 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4841 ep = try_get(
4842 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4843 str)
8bdd16b4 4844 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4845 if shelf_url:
29f7c58a 4846 # Skipping links to another channels, note that checking for
4847 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4848 # will not work
4849 if skip_channels and '/channels?' in shelf_url:
4850 return
052e1350 4851 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4852 yield self.url_result(shelf_url, video_title=title)
4853 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4854 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4855
8bdd16b4 4856 def _playlist_entries(self, video_list_renderer):
4857 for content in video_list_renderer['contents']:
4858 if not isinstance(content, dict):
4859 continue
4860 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4861 if not isinstance(renderer, dict):
4862 continue
4863 video_id = renderer.get('videoId')
4864 if not video_id:
4865 continue
4866 yield self._extract_video(renderer)
07aeced6 4867
3462ffa8 4868 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4869 renderer = traverse_obj(
447afb9e 4870 rich_grid_renderer,
4871 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
3462ffa8 4872 video_id = renderer.get('videoId')
447afb9e 4873 if video_id:
4874 yield self._extract_video(renderer)
4875 return
4876 playlist_id = renderer.get('playlistId')
4877 if playlist_id:
4878 yield self.url_result(
4879 f'https://www.youtube.com/playlist?list={playlist_id}',
4880 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4881 video_title=self._get_text(renderer, 'title'))
3462ffa8 4882 return
3462ffa8 4883
8bdd16b4 4884 def _video_entry(self, video_renderer):
4885 video_id = video_renderer.get('videoId')
4886 if video_id:
4887 return self._extract_video(video_renderer)
dacb3a86 4888
ad210f4f 4889 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4890 url = urljoin('https://youtube.com', traverse_obj(
4891 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4892 if url:
4893 return self.url_result(
4894 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4895
8bdd16b4 4896 def _post_thread_entries(self, post_thread_renderer):
4897 post_renderer = try_get(
4898 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4899 if not post_renderer:
4900 return
4901 # video attachment
4902 video_renderer = try_get(
895b0931 4903 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4904 video_id = video_renderer.get('videoId')
4905 if video_id:
4906 entry = self._extract_video(video_renderer)
8bdd16b4 4907 if entry:
4908 yield entry
895b0931 4909 # playlist attachment
4910 playlist_id = try_get(
14f25df2 4911 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4912 if playlist_id:
4913 yield self.url_result(
add96eb9 4914 f'https://www.youtube.com/playlist?list={playlist_id}',
e28f1c0a 4915 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4916 # inline video links
4917 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4918 for run in runs:
4919 if not isinstance(run, dict):
4920 continue
4921 ep_url = try_get(
14f25df2 4922 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4923 if not ep_url:
4924 continue
4925 if not YoutubeIE.suitable(ep_url):
4926 continue
4927 ep_video_id = YoutubeIE._match_id(ep_url)
4928 if video_id == ep_video_id:
4929 continue
895b0931 4930 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4931
8bdd16b4 4932 def _post_thread_continuation_entries(self, post_thread_continuation):
4933 contents = post_thread_continuation.get('contents')
4934 if not isinstance(contents, list):
4935 return
4936 for content in contents:
4937 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4938 if isinstance(renderer, dict):
4939 yield from self._post_thread_entries(renderer)
8bdd16b4 4940 continue
6b0b0a28 4941 renderer = content.get('videoRenderer')
4942 if isinstance(renderer, dict):
4943 yield self._video_entry(renderer)
07aeced6 4944
39ed931e 4945 r''' # unused
4946 def _rich_grid_entries(self, contents):
4947 for content in contents:
4948 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4949 if video_renderer:
4950 entry = self._video_entry(video_renderer)
4951 if entry:
4952 yield entry
4953 '''
52efa4b3 4954
0a5095fe 4955 def _report_history_entries(self, renderer):
4956 for url in traverse_obj(renderer, (
7a32c70d 4957 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4958 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4959 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4960 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4961
a6213a49 4962 def _extract_entries(self, parent_renderer, continuation_list):
4963 # continuation_list is modified in-place with continuation_list = [continuation_token]
4964 continuation_list[:] = [None]
4965 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4966 for content in contents:
4967 if not isinstance(content, dict):
4968 continue
16aa9ea4 4969 is_renderer = traverse_obj(
4970 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4971 expected_type=dict)
a6213a49 4972 if not is_renderer:
0a5095fe 4973 if content.get('richItemRenderer'):
4974 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4975 yield entry
4976 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4977 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4978 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4979 yield from self._report_history_entries(table)
4980 continuation_list[0] = self._extract_continuation(table)
a6213a49 4981 continue
0a5095fe 4982
a6213a49 4983 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4984 for isr_content in isr_contents:
4985 if not isinstance(isr_content, dict):
8bdd16b4 4986 continue
69184e41 4987
a6213a49 4988 known_renderers = {
4989 'playlistVideoListRenderer': self._playlist_entries,
4990 'gridRenderer': self._grid_entries,
a17526e4 4991 'reelShelfRenderer': self._grid_entries,
4992 'shelfRenderer': self._shelf_entries,
16aa9ea4 4993 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4994 'backstagePostThreadRenderer': self._post_thread_entries,
4995 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4996 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4997 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
fcbc9ed7 4998 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
4999 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
a6213a49 5000 }
5001 for key, renderer in isr_content.items():
5002 if key not in known_renderers:
5003 continue
5004 for entry in known_renderers[key](renderer):
5005 if entry:
5006 yield entry
5007 continuation_list[0] = self._extract_continuation(renderer)
5008 break
70d5c17b 5009
5010 if not continuation_list[0]:
a6213a49 5011 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 5012
a6213a49 5013 if not continuation_list[0]:
5014 continuation_list[0] = self._extract_continuation(parent_renderer)
5015
5016 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5017 continuation_list = [None]
5018 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 5019 tab_content = try_get(tab, lambda x: x['content'], dict)
5020 if not tab_content:
5021 return
3462ffa8 5022 parent_renderer = (
29f7c58a 5023 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5024 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 5025 yield from extract_entries(parent_renderer)
3462ffa8 5026 continuation = continuation_list[0]
1ba6fe9d 5027 seen_continuations = set()
8bdd16b4 5028 for page_num in itertools.count(1):
5029 if not continuation:
5030 break
1ba6fe9d 5031 continuation_token = continuation.get('continuation')
5032 if continuation_token is not None and continuation_token in seen_continuations:
5033 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5034 break
5035 seen_continuations.add(continuation_token)
99e9e001 5036 headers = self.generate_api_headers(
5037 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 5038 response = self._extract_response(
86e5f3ed 5039 item_id=f'{item_id} page {page_num}',
fe93e2c4 5040 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 5041 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
5042
5043 if not response:
8bdd16b4 5044 break
ac56cf38 5045 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5046 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5047 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 5048
a1b535bd 5049 known_renderers = {
e4b98809 5050 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 5051 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5052 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 5053 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 5054 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 5055 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 5056 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 5057 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5058 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 5059 'playlistVideoListContinuation': (self._playlist_entries, None),
5060 'gridContinuation': (self._grid_entries, None),
5061 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5062 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 5063 }
1fb53b94 5064
5065 continuation_items = traverse_obj(response, (
5066 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
add96eb9 5067 'appendContinuationItemsAction', 'continuationItems',
1fb53b94 5068 ), 'continuationContents', get_all=False)
5069 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5070
a1b535bd 5071 video_items_renderer = None
add96eb9 5072 for key in continuation_item:
a1b535bd 5073 if key not in known_renderers:
8bdd16b4 5074 continue
1fb53b94 5075 func, parent_key = known_renderers[key]
5076 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 5077 continuation_list = [None]
1fb53b94 5078 yield from func(video_items_renderer)
9ba5705a 5079 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 5080
5081 if not video_items_renderer:
a1b535bd 5082 break
9558dcec 5083
8bdd16b4 5084 @staticmethod
7c219ea6 5085 def _extract_selected_tab(tabs, fatal=True):
86973308
M
5086 for tab_renderer in tabs:
5087 if tab_renderer.get('selected'):
5088 return tab_renderer
5089 if fatal:
5090 raise ExtractorError('Unable to find selected tab')
5091
5092 @staticmethod
5093 def _extract_tab_renderers(response):
5094 return traverse_obj(
5095 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 5096
ac56cf38 5097 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 5098 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 5099
8bdd16b4 5100 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 5101 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5102 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5103
5104 return self.playlist_result(
5105 self._entries(
5106 selected_tab, metadata['id'], ytcfg,
5107 self._extract_account_syncid(ytcfg, data),
5108 self._extract_visitor_data(data, ytcfg)),
5109 **metadata)
39ed931e 5110
bd7e919a 5111 def _extract_metadata_from_tabs(self, item_id, data):
5112 info = {'id': item_id}
5113
5114 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 5115 if metadata_renderer:
7666b936 5116 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
4823ec9f 5117 ('channelUrl', {self.ucid_from_url}))
bd7e919a 5118 info.update({
7666b936 5119 'channel': metadata_renderer.get('title'),
5120 'channel_id': channel_id,
bd7e919a 5121 })
7666b936 5122 if info['channel_id']:
5123 info['id'] = info['channel_id']
bd7e919a 5124 else:
5125 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 5126
301d07fc 5127 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5128 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5129 def _get_uncropped(url):
5130 return url_or_none((url or '').split('=')[0] + '=s0')
5131
6141346d 5132 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 5133 if avatar_thumbnails:
5134 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5135 if uncropped_avatar:
5136 avatar_thumbnails.append({
5137 'url': uncropped_avatar,
5138 'id': 'avatar_uncropped',
add96eb9 5139 'preference': 1,
301d07fc 5140 })
5141
5142 channel_banners = self._extract_thumbnails(
bd7e919a 5143 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 5144 for banner in channel_banners:
5145 banner['preference'] = -10
5146
5147 if channel_banners:
5148 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5149 if uncropped_banner:
5150 channel_banners.append({
5151 'url': uncropped_banner,
5152 'id': 'banner_uncropped',
add96eb9 5153 'preference': -5,
301d07fc 5154 })
5155
bd7e919a 5156 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5157 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5158 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5159
301d07fc 5160 primary_thumbnails = self._extract_thumbnails(
a17526e4 5161 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
5162 playlist_thumbnails = self._extract_thumbnails(
5163 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5164
bd7e919a 5165 info.update({
5166 'title': (traverse_obj(metadata_renderer, 'title')
5167 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5168 or info['id']),
5169 'availability': self._extract_availability(data),
5170 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
5171 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
8828f457 5172 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5173 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
bd7e919a 5174 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5175 })
f0d785d3 5176
7666b936 5177 channel_handle = (
5178 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5179 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5180
5181 if channel_handle:
5182 info.update({
5183 'uploader_id': channel_handle,
5184 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5185 })
8213ce28 5186
5187 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5188 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5189 info['channel_is_verified'] = True
6141346d
M
5190 # Playlist stats is a text runs array containing [video count, view count, last updated].
5191 # last updated or (view count and last updated) may be missing.
5192 playlist_stats = get_first(
bd7e919a 5193 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5194
6141346d
M
5195 last_updated_unix = self._parse_time_text(
5196 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5197 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
ad54c913 5198 info['modified_date'] = strftime_or_none(last_updated_unix)
6141346d 5199
bd7e919a 5200 info['view_count'] = self._get_count(playlist_stats, 1)
5201 if info['view_count'] is None: # 0 is allowed
5202 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
31e18355 5203 if info['view_count'] is None:
5204 info['view_count'] = self._get_count(data, (
5205 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5206 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
bd7e919a 5207
5208 info['playlist_count'] = self._get_count(playlist_stats, 0)
5209 if info['playlist_count'] is None: # 0 is allowed
5210 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5211
7666b936 5212 if not info.get('channel_id'):
6141346d 5213 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 5214 if not owner: # Deprecated
6141346d
M
5215 owner = traverse_obj(
5216 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5217 ('videoOwner', 'videoOwnerRenderer', 'title'))
5218 owner_text = self._get_text(owner)
5219 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 5220 info.update({
7666b936 5221 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5222 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
add96eb9 5223 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
bd7e919a 5224 })
6141346d 5225
bd7e919a 5226 info.update({
7666b936 5227 'uploader': info['channel'],
5228 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5229 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
bd7e919a 5230 })
7666b936 5231
bd7e919a 5232 return info
73c4ac2c 5233
6e634cbe 5234 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 5235 first_id = last_id = response = None
2be71994 5236 for page_num in itertools.count(1):
cd7c66cf 5237 videos = list(self._playlist_entries(playlist))
5238 if not videos:
5239 return
2be71994 5240 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5241 if start >= len(videos):
5242 return
24146491 5243 yield from videos[start:]
2be71994 5244 first_id = first_id or videos[0]['id']
5245 last_id = videos[-1]['id']
79360d99 5246 watch_endpoint = try_get(
5247 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 5248 headers = self.generate_api_headers(
5249 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5250 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 5251 query = {
5252 'playlistId': playlist_id,
5253 'videoId': watch_endpoint.get('videoId') or last_id,
5254 'index': watch_endpoint.get('index') or len(videos),
add96eb9 5255 'params': watch_endpoint.get('params') or 'OAE%3D',
79360d99 5256 }
5257 response = self._extract_response(
add96eb9 5258 item_id=f'{playlist_id} page {page_num}',
fe93e2c4 5259 query=query, ep='next', headers=headers, ytcfg=ytcfg,
add96eb9 5260 check_get_keys='contents',
79360d99 5261 )
cd7c66cf 5262 playlist = try_get(
79360d99 5263 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 5264
ac56cf38 5265 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 5266 title = playlist.get('title') or try_get(
14f25df2 5267 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 5268 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 5269
5270 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 5271 playlist_url = urljoin(url, try_get(
5272 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 5273 str))
6e634cbe 5274
5275 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5276 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5277 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5278
5279 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 5280 return self.url_result(
5281 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5282 video_title=title)
cd7c66cf 5283
8bdd16b4 5284 return self.playlist_result(
6e634cbe 5285 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 5286 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 5287
47193e02 5288 def _extract_availability(self, data):
5289 """
5290 Gets the availability of a given playlist/tab.
5291 Note: Unless YouTube tells us explicitly, we do not assume it is public
5292 @param data: response
5293 """
6141346d
M
5294 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5295 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5296 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 5297
14a14335 5298 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
47193e02 5299
5300 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
5301 privacy_setting_icon = get_first(
5302 (playlist_header_renderer, sidebar_renderer),
5303 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5304 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5305 expected_type=str)
5306
5307 microformats_is_unlisted = traverse_obj(
5308 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 5309
c26f9b99 5310 return (
5311 'public' if (
5312 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5313 or player_header_privacy == 'PUBLIC'
5314 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5315 else self._availability(
5316 is_private=(
5317 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5318 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5319 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5320 is_unlisted=(
5321 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5322 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
5323 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5324 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 5325 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5326 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5327 needs_auth=False))
47193e02 5328
5329 @staticmethod
5330 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5331 sidebar_renderer = try_get(
5332 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5333 for item in sidebar_renderer:
5334 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5335 if renderer:
5336 return renderer
5337
ac56cf38 5338 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 5339 """
6141346d 5340 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5341 """
6141346d
M
5342 is_playlist = bool(traverse_obj(
5343 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5344 if not is_playlist:
47193e02 5345 return
11f9be09 5346 headers = self.generate_api_headers(
99e9e001 5347 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5348 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5349 query = {
6141346d 5350 'params': 'wgYCCAA=',
add96eb9 5351 'browseId': f'VL{item_id}',
47193e02 5352 }
5353 return self._extract_response(
5354 item_id=item_id, headers=headers, query=query,
fe93e2c4 5355 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5356 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5357
2762dbb1 5358 @functools.cached_property
a25bca9f 5359 def skip_webpage(self):
5360 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5361
ac56cf38 5362 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5363 webpage, data = None, None
5364 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5365 try:
be5c1ae8 5366 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5367 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5368 except ExtractorError as e:
5369 if isinstance(e.cause, network_exceptions):
3d2623a8 5370 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
be5c1ae8 5371 retry.error = e
5372 continue
5373 self._error_or_warning(e, fatal=fatal)
14fdfea9 5374 break
ac56cf38 5375
be5c1ae8 5376 try:
5377 self._extract_and_report_alerts(data)
5378 except ExtractorError as e:
5379 self._error_or_warning(e, fatal=fatal)
5380 break
ac56cf38 5381
be5c1ae8 5382 # Sometimes youtube returns a webpage with incomplete ytInitialData
5383 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5384 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5385 retry.error = ExtractorError('Incomplete yt initial data received')
f9fb3ce8 5386 data = None
be5c1ae8 5387 continue
ac56cf38 5388
cd7c66cf 5389 return webpage, data
5390
a25bca9f 5391 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5392 """Use if failed to extract ytcfg (and data) from initial webpage"""
5393 if not ytcfg and self.is_authenticated:
5394 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5395 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5396 raise ExtractorError(
5397 f'{msg}. If you are not downloading private content, or '
5398 'your cookies are only for the first account and channel,'
5399 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5400 expected=True)
5401 self.report_warning(msg, only_once=True)
5402
ac56cf38 5403 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5404 data = None
a25bca9f 5405 if not self.skip_webpage:
ac56cf38 5406 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5407 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5408 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5409 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5410 if (url != 'https://www.youtube.com/feed/recommended'
5411 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5412 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5413 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5414 if fatal:
5415 raise ExtractorError(msg, expected=True)
5416 self.report_warning(msg, only_once=True)
ac56cf38 5417 if not data:
a25bca9f 5418 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5419 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5420 return data, ytcfg
5421
5422 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5423 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5424 resolve_response = self._extract_response(
5425 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5426 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5427 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5428 for ep_key, ep in endpoints.items():
5429 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5430 if params:
5431 return self._extract_response(
5432 item_id=item_id, query=params, ep=ep, headers=headers,
5433 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5434 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5435 err_note = 'Failed to resolve url (does the playlist exist?)'
5436 if fatal:
5437 raise ExtractorError(err_note, expected=True)
5438 self.report_warning(err_note, item_id)
5439
a6213a49 5440 _SEARCH_PARAMS = None
5441
af5c1c55 5442 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5443 data = {'query': query}
5444 if params is NO_DEFAULT:
5445 params = self._SEARCH_PARAMS
5446 if params:
5447 data['params'] = params
16aa9ea4 5448
5449 content_keys = (
5450 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5451 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5452 # ytmusic search
5453 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5454 ('continuationContents', ),
5455 )
a25bca9f 5456 display_id = f'query "{query}"'
86e5f3ed 5457 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5458 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5459 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5460
a61fd4cf 5461 continuation_list = [None]
a25bca9f 5462 search = None
a6213a49 5463 for page_num in itertools.count(1):
a61fd4cf 5464 data.update(continuation_list[0] or {})
a25bca9f 5465 headers = self.generate_api_headers(
5466 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5467 search = self._extract_response(
a25bca9f 5468 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5469 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5470 slr_contents = traverse_obj(search, *content_keys)
5471 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5472 if not continuation_list[0]:
a6213a49 5473 break
5474
5475
5476class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5477 IE_DESC = 'YouTube Tabs'
5478 _VALID_URL = r'''(?x:
5479 https?://
b032ff0f 5480 (?!consent\.)(?:\w+\.)?
a6213a49 5481 (?:
5482 youtube(?:kids)?\.com|
add96eb9 5483 {invidious}
a6213a49 5484 )/
5485 (?:
5486 (?P<channel_type>channel|c|user|browse)/|
5487 (?P<not_channel>
5488 feed/|hashtag/|
5489 (?:playlist|watch)\?.*?\blist=
5490 )|
add96eb9 5491 (?!(?:{reserved_names})\b) # Direct URLs
a6213a49 5492 )
5493 (?P<id>[^/?\#&]+)
add96eb9 5494 )'''.format(
5495 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5496 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5497 )
a6213a49 5498 IE_NAME = 'youtube:tab'
5499
5500 _TESTS = [{
5501 'note': 'playlists, multipage',
5502 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5503 'playlist_mincount': 94,
5504 'info_dict': {
5505 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5506 'title': 'Igor Kleiner Ph.D. - Playlists',
5507 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5508 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5509 'uploader_id': '@IgorDataScience',
5510 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5511 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5512 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5513 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5514 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
add96eb9 5515 'channel_follower_count': int,
a6213a49 5516 },
5517 }, {
5518 'note': 'playlists, multipage, different order',
5519 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5520 'playlist_mincount': 94,
5521 'info_dict': {
5522 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5523 'title': 'Igor Kleiner Ph.D. - Playlists',
5524 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5525 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5526 'uploader_id': '@IgorDataScience',
5527 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5528 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5529 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5530 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5531 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
add96eb9 5532 'channel_follower_count': int,
a6213a49 5533 },
5534 }, {
5535 'note': 'playlists, series',
5536 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5537 'playlist_mincount': 5,
5538 'info_dict': {
5539 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5540 'title': '3Blue1Brown - Playlists',
8828f457 5541 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5542 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5543 'channel': '3Blue1Brown',
5544 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5545 'uploader_id': '@3blue1brown',
5546 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5547 'uploader': '3Blue1Brown',
976ae3ea 5548 'tags': ['Mathematics'],
14a14335 5549 'channel_follower_count': int,
8213ce28 5550 'channel_is_verified': True,
a6213a49 5551 },
5552 }, {
5553 'note': 'playlists, singlepage',
5554 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5555 'playlist_mincount': 4,
5556 'info_dict': {
5557 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5558 'title': 'ThirstForScience - Playlists',
5559 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5560 'uploader': 'ThirstForScience',
7666b936 5561 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5562 'uploader_id': '@ThirstForScience',
976ae3ea 5563 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
7666b936 5564 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
8828f457 5565 'tags': 'count:12',
976ae3ea 5566 'channel': 'ThirstForScience',
add96eb9 5567 'channel_follower_count': int,
5568 },
a6213a49 5569 }, {
5570 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5571 'only_matching': True,
5572 }, {
5573 'note': 'basic, single video playlist',
5574 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5575 'info_dict': {
a6213a49 5576 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5577 'title': 'youtube-dl public playlist',
976ae3ea 5578 'description': '',
5579 'tags': [],
5580 'view_count': int,
5581 'modified_date': '20201130',
5582 'channel': 'Sergey M.',
5583 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
976ae3ea 5584 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5585 'availability': 'public',
7666b936 5586 'uploader': 'Sergey M.',
5587 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5588 'uploader_id': '@sergeym.6173',
a6213a49 5589 },
5590 'playlist_count': 1,
5591 }, {
5592 'note': 'empty playlist',
5593 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5594 'info_dict': {
a6213a49 5595 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5596 'title': 'youtube-dl empty playlist',
976ae3ea 5597 'tags': [],
5598 'channel': 'Sergey M.',
5599 'description': '',
8828f457 5600 'modified_date': '20230921',
976ae3ea 5601 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5602 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8828f457 5603 'availability': 'unlisted',
7666b936 5604 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5605 'uploader_id': '@sergeym.6173',
5606 'uploader': 'Sergey M.',
a6213a49 5607 },
5608 'playlist_count': 0,
5609 }, {
5610 'note': 'Home tab',
5611 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5612 'info_dict': {
5613 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5614 'title': 'lex will - Home',
5615 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5616 'uploader': 'lex will',
7666b936 5617 'uploader_id': '@lexwill718',
976ae3ea 5618 'channel': 'lex will',
5619 'tags': ['bible', 'history', 'prophesy'],
7666b936 5620 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5621 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5622 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
add96eb9 5623 'channel_follower_count': int,
a6213a49 5624 },
5625 'playlist_mincount': 2,
5626 }, {
5627 'note': 'Videos tab',
5628 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5629 'info_dict': {
5630 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5631 'title': 'lex will - Videos',
5632 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5633 'uploader': 'lex will',
7666b936 5634 'uploader_id': '@lexwill718',
976ae3ea 5635 'tags': ['bible', 'history', 'prophesy'],
5636 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5637 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5638 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5639 'channel': 'lex will',
add96eb9 5640 'channel_follower_count': int,
a6213a49 5641 },
5642 'playlist_mincount': 975,
5643 }, {
5644 'note': 'Videos tab, sorted by popular',
5645 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5646 'info_dict': {
5647 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5648 'title': 'lex will - Videos',
5649 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5650 'uploader': 'lex will',
7666b936 5651 'uploader_id': '@lexwill718',
976ae3ea 5652 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5653 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5654 'channel': 'lex will',
5655 'tags': ['bible', 'history', 'prophesy'],
5656 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
add96eb9 5657 'channel_follower_count': int,
a6213a49 5658 },
5659 'playlist_mincount': 199,
5660 }, {
5661 'note': 'Playlists tab',
5662 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5663 'info_dict': {
5664 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5665 'title': 'lex will - Playlists',
5666 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5667 'uploader': 'lex will',
7666b936 5668 'uploader_id': '@lexwill718',
5669 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5670 'channel': 'lex will',
5671 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5672 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5673 'tags': ['bible', 'history', 'prophesy'],
add96eb9 5674 'channel_follower_count': int,
a6213a49 5675 },
5676 'playlist_mincount': 17,
5677 }, {
5678 'note': 'Community tab',
5679 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5680 'info_dict': {
5681 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5682 'title': 'lex will - Community',
5683 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5684 'channel': 'lex will',
5685 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5686 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5687 'tags': ['bible', 'history', 'prophesy'],
7666b936 5688 'channel_follower_count': int,
5689 'uploader_url': 'https://www.youtube.com/@lexwill718',
5690 'uploader_id': '@lexwill718',
5691 'uploader': 'lex will',
a6213a49 5692 },
5693 'playlist_mincount': 18,
5694 }, {
5695 'note': 'Channels tab',
5696 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5697 'info_dict': {
5698 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5699 'title': 'lex will - Channels',
5700 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5701 'channel': 'lex will',
5702 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5703 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5704 'tags': ['bible', 'history', 'prophesy'],
7666b936 5705 'channel_follower_count': int,
5706 'uploader_url': 'https://www.youtube.com/@lexwill718',
5707 'uploader_id': '@lexwill718',
5708 'uploader': 'lex will',
a6213a49 5709 },
5710 'playlist_mincount': 12,
5711 }, {
5712 'note': 'Search tab',
5713 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5714 'playlist_mincount': 40,
5715 'info_dict': {
5716 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5717 'title': '3Blue1Brown - Search - linear algebra',
8828f457 5718 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5719 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5720 'tags': ['Mathematics'],
5721 'channel': '3Blue1Brown',
5722 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5723 'channel_follower_count': int,
5724 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5725 'uploader_id': '@3blue1brown',
5726 'uploader': '3Blue1Brown',
8213ce28 5727 'channel_is_verified': True,
a6213a49 5728 },
5729 }, {
5730 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5731 'only_matching': True,
5732 }, {
5733 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5734 'only_matching': True,
5735 }, {
5736 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5737 'only_matching': True,
5738 }, {
5739 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5740 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5741 'info_dict': {
5742 'title': '29C3: Not my department',
5743 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
a6213a49 5744 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5745 'tags': [],
976ae3ea 5746 'view_count': int,
5747 'modified_date': '20150605',
5748 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
7666b936 5749 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
976ae3ea 5750 'channel': 'Christiaan008',
c26f9b99 5751 'availability': 'public',
7666b936 5752 'uploader_id': '@ChRiStIaAn008',
5753 'uploader': 'Christiaan008',
5754 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
a6213a49 5755 },
5756 'playlist_count': 96,
5757 }, {
5758 'note': 'Large playlist',
5759 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5760 'info_dict': {
5761 'title': 'Uploads from Cauchemar',
5762 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
7666b936 5763 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5764 'tags': [],
5765 'modified_date': r're:\d{8}',
5766 'channel': 'Cauchemar',
976ae3ea 5767 'view_count': int,
5768 'description': '',
5769 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5770 'availability': 'public',
7666b936 5771 'uploader_id': '@Cauchemar89',
5772 'uploader': 'Cauchemar',
5773 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
a6213a49 5774 },
5775 'playlist_mincount': 1123,
976ae3ea 5776 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5777 }, {
5778 'note': 'even larger playlist, 8832 videos',
5779 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5780 'only_matching': True,
5781 }, {
5782 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5783 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5784 'info_dict': {
5785 'title': 'Uploads from Interstellar Movie',
5786 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5787 'tags': [],
5788 'view_count': int,
5789 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
7666b936 5790 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5791 'channel': 'Interstellar Movie',
5792 'description': '',
5793 'modified_date': r're:\d{8}',
c26f9b99 5794 'availability': 'public',
7666b936 5795 'uploader_id': '@InterstellarMovie',
5796 'uploader': 'Interstellar Movie',
5797 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
a6213a49 5798 },
5799 'playlist_mincount': 21,
5800 }, {
5801 'note': 'Playlist with "show unavailable videos" button',
5802 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5803 'info_dict': {
5804 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5805 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5806 'view_count': int,
5807 'channel': 'Phim Siêu Nhân Nhật Bản',
5808 'tags': [],
976ae3ea 5809 'description': '',
5810 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5811 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5812 'modified_date': r're:\d{8}',
c26f9b99 5813 'availability': 'public',
7666b936 5814 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5815 'uploader_id': '@phimsieunhannhatban',
5816 'uploader': 'Phim Siêu Nhân Nhật Bản',
a6213a49 5817 },
5818 'playlist_mincount': 200,
976ae3ea 5819 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5820 }, {
5821 'note': 'Playlist with unavailable videos in page 7',
5822 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5823 'info_dict': {
5824 'title': 'Uploads from BlankTV',
5825 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5826 'channel': 'BlankTV',
7666b936 5827 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5828 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5829 'view_count': int,
5830 'tags': [],
976ae3ea 5831 'modified_date': r're:\d{8}',
5832 'description': '',
c26f9b99 5833 'availability': 'public',
7666b936 5834 'uploader_id': '@blanktv',
5835 'uploader': 'BlankTV',
5836 'uploader_url': 'https://www.youtube.com/@blanktv',
a6213a49 5837 },
5838 'playlist_mincount': 1000,
976ae3ea 5839 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5840 }, {
5841 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5842 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5843 'info_dict': {
5844 'title': 'Data Analysis with Dr Mike Pound',
5845 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
a6213a49 5846 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5847 'tags': [],
5848 'view_count': int,
5849 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
7666b936 5850 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
976ae3ea 5851 'channel': 'Computerphile',
c26f9b99 5852 'availability': 'public',
6141346d 5853 'modified_date': '20190712',
7666b936 5854 'uploader_id': '@Computerphile',
5855 'uploader': 'Computerphile',
5856 'uploader_url': 'https://www.youtube.com/@Computerphile',
a6213a49 5857 },
5858 'playlist_mincount': 11,
5859 }, {
5860 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5861 'only_matching': True,
5862 }, {
5863 'note': 'Playlist URL that does not actually serve a playlist',
5864 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5865 'info_dict': {
5866 'id': 'FqZTN594JQw',
5867 'ext': 'webm',
5868 'title': "Smiley's People 01 detective, Adventure Series, Action",
a6213a49 5869 'upload_date': '20150526',
5870 'license': 'Standard YouTube License',
5871 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5872 'categories': ['People & Blogs'],
5873 'tags': list,
5874 'view_count': int,
5875 'like_count': int,
a6213a49 5876 },
5877 'params': {
5878 'skip_download': True,
5879 },
5880 'skip': 'This video is not available.',
5881 'add_ie': [YoutubeIE.ie_key()],
5882 }, {
5883 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5884 'only_matching': True,
5885 }, {
5886 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5887 'only_matching': True,
5888 }, {
5889 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5890 'info_dict': {
14a14335 5891 'id': 'hGkQjiJLjWQ', # This will keep changing
a6213a49 5892 'ext': 'mp4',
976ae3ea 5893 'title': str,
a6213a49 5894 'upload_date': r're:\d{8}',
976ae3ea 5895 'description': str,
a6213a49 5896 'categories': ['News & Politics'],
5897 'tags': list,
5898 'like_count': int,
86973308 5899 'release_timestamp': int,
976ae3ea 5900 'channel': 'Sky News',
5901 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5902 'age_limit': 0,
5903 'view_count': int,
86973308 5904 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5905 'playable_in_embed': True,
86973308 5906 'release_date': r're:\d+',
976ae3ea 5907 'availability': 'public',
5908 'live_status': 'is_live',
5909 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5910 'channel_follower_count': int,
5911 'concurrent_view_count': int,
7666b936 5912 'uploader_url': 'https://www.youtube.com/@SkyNews',
5913 'uploader_id': '@SkyNews',
5914 'uploader': 'Sky News',
8213ce28 5915 'channel_is_verified': True,
a6213a49 5916 },
5917 'params': {
5918 'skip_download': True,
5919 },
976ae3ea 5920 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5921 }, {
5922 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5923 'info_dict': {
5924 'id': 'a48o2S1cPoo',
5925 'ext': 'mp4',
5926 'title': 'The Young Turks - Live Main Show',
a6213a49 5927 'upload_date': '20150715',
5928 'license': 'Standard YouTube License',
5929 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5930 'categories': ['News & Politics'],
5931 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5932 'like_count': int,
a6213a49 5933 },
5934 'params': {
5935 'skip_download': True,
5936 },
5937 'only_matching': True,
5938 }, {
5939 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5940 'only_matching': True,
5941 }, {
5942 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5943 'only_matching': True,
5944 }, {
5945 'note': 'A channel that is not live. Should raise error',
5946 'url': 'https://www.youtube.com/user/numberphile/live',
5947 'only_matching': True,
5948 }, {
5949 'url': 'https://www.youtube.com/feed/trending',
5950 'only_matching': True,
5951 }, {
5952 'url': 'https://www.youtube.com/feed/library',
5953 'only_matching': True,
5954 }, {
5955 'url': 'https://www.youtube.com/feed/history',
5956 'only_matching': True,
5957 }, {
5958 'url': 'https://www.youtube.com/feed/subscriptions',
5959 'only_matching': True,
5960 }, {
5961 'url': 'https://www.youtube.com/feed/watch_later',
5962 'only_matching': True,
5963 }, {
5964 'note': 'Recommended - redirects to home page.',
5965 'url': 'https://www.youtube.com/feed/recommended',
5966 'only_matching': True,
5967 }, {
5968 'note': 'inline playlist with not always working continuations',
5969 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5970 'only_matching': True,
5971 }, {
5972 'url': 'https://www.youtube.com/course',
5973 'only_matching': True,
5974 }, {
5975 'url': 'https://www.youtube.com/zsecurity',
5976 'only_matching': True,
5977 }, {
5978 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5979 'only_matching': True,
5980 }, {
5981 'url': 'https://www.youtube.com/TheYoungTurks/live',
5982 'only_matching': True,
5983 }, {
5984 'url': 'https://www.youtube.com/hashtag/cctv9',
5985 'info_dict': {
5986 'id': 'cctv9',
8828f457 5987 'title': 'cctv9 - All',
976ae3ea 5988 'tags': [],
a6213a49 5989 },
4dc23a80 5990 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5991 }, {
5992 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5993 'only_matching': True,
5994 }, {
5995 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5996 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
add96eb9 5997 'only_matching': True,
a6213a49 5998 }, {
5999 'note': '/browse/ should redirect to /channel/',
6000 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
add96eb9 6001 'only_matching': True,
a6213a49 6002 }, {
6003 'note': 'VLPL, should redirect to playlist?list=PL...',
6004 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6005 'info_dict': {
6006 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
a6213a49 6007 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
12a1b225 6008 'title': 'NCS : All Releases 💿',
7666b936 6009 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
976ae3ea 6010 'modified_date': r're:\d{8}',
6011 'view_count': int,
6012 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6013 'tags': [],
6014 'channel': 'NoCopyrightSounds',
c26f9b99 6015 'availability': 'public',
7666b936 6016 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6017 'uploader': 'NoCopyrightSounds',
6018 'uploader_id': '@NoCopyrightSounds',
a6213a49 6019 },
6020 'playlist_mincount': 166,
7666b936 6021 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
a6213a49 6022 }, {
7666b936 6023 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
a6213a49 6024 'note': 'Topic, should redirect to playlist?list=UU...',
6025 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6026 'info_dict': {
6027 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6028 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6029 'tags': [],
6030 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6031 'channel': 'Royalty Free Music - Topic',
6032 'view_count': int,
6033 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
976ae3ea 6034 'modified_date': r're:\d{8}',
976ae3ea 6035 'description': '',
c26f9b99 6036 'availability': 'public',
7666b936 6037 'uploader': 'Royalty Free Music - Topic',
a6213a49 6038 },
a6213a49 6039 'playlist_mincount': 101,
7666b936 6040 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 6041 }, {
86973308
M
6042 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6043 # Treat as a general feed
a6213a49 6044 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6045 'info_dict': {
6046 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6047 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 6048 'tags': [],
a6213a49 6049 },
a6213a49 6050 'playlist_mincount': 9,
6051 }, {
6052 'note': 'Youtube music Album',
6053 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6054 'info_dict': {
6055 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6056 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 6057 'tags': [],
6058 'view_count': int,
6059 'description': '',
6060 'availability': 'unlisted',
6061 'modified_date': r're:\d{8}',
a6213a49 6062 },
6063 'playlist_count': 50,
7666b936 6064 'expected_warnings': ['YouTube Music is not directly supported'],
a6213a49 6065 }, {
6066 'note': 'unlisted single video playlist',
6067 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6068 'info_dict': {
a6213a49 6069 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6070 'title': 'yt-dlp unlisted playlist test',
976ae3ea 6071 'availability': 'unlisted',
6072 'tags': [],
12a1b225 6073 'modified_date': '20220418',
976ae3ea 6074 'channel': 'colethedj',
6075 'view_count': int,
6076 'description': '',
976ae3ea 6077 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6078 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
7666b936 6079 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6080 'uploader_id': '@colethedj1894',
6081 'uploader': 'colethedj',
a6213a49 6082 },
93e12ed7 6083 'playlist': [{
6084 'info_dict': {
6085 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6086 'id': 'BaW_jenozKc',
6087 '_type': 'url',
6088 'ie_key': 'Youtube',
6089 'duration': 10,
6090 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6091 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6092 'view_count': int,
6093 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6094 'channel': 'Philipp Hagemeister',
6095 'uploader_id': '@PhilippHagemeister',
6096 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6097 'uploader': 'Philipp Hagemeister',
add96eb9 6098 },
93e12ed7 6099 }],
a6213a49 6100 'playlist_count': 1,
93e12ed7 6101 'params': {'extract_flat': True},
a6213a49 6102 }, {
6103 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6104 'url': 'https://www.youtube.com/feed/recommended',
6105 'info_dict': {
6106 'id': 'recommended',
6107 'title': 'recommended',
6c73052c 6108 'tags': [],
a6213a49 6109 },
6110 'playlist_mincount': 50,
6111 'params': {
6112 'skip_download': True,
add96eb9 6113 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6114 },
6115 }, {
6116 'note': 'API Fallback: /videos tab, sorted by oldest first',
6117 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6118 'info_dict': {
6119 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6120 'title': 'Cody\'sLab - Videos',
6121 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
976ae3ea 6122 'channel': 'Cody\'sLab',
6123 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6124 'tags': [],
6125 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
add96eb9 6126 'channel_follower_count': int,
a6213a49 6127 },
6128 'playlist_mincount': 650,
6129 'params': {
6130 'skip_download': True,
add96eb9 6131 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6132 },
86973308 6133 'skip': 'Query for sorting no longer works',
a6213a49 6134 }, {
6135 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6136 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6137 'info_dict': {
6138 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6139 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6140 'modified_date': r're:\d{8}',
6141 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6142 'description': '',
6143 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6144 'tags': [],
6145 'channel': 'Royalty Free Music - Topic',
6146 'view_count': int,
c26f9b99 6147 'availability': 'public',
7666b936 6148 'uploader': 'Royalty Free Music - Topic',
a6213a49 6149 },
a6213a49 6150 'playlist_mincount': 101,
6151 'params': {
6152 'skip_download': True,
add96eb9 6153 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6154 },
7666b936 6155 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
7c219ea6 6156 }, {
6157 'note': 'non-standard redirect to regional channel',
6158 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
add96eb9 6159 'only_matching': True,
61d3665d 6160 }, {
6161 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6162 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6163 'info_dict': {
6164 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6165 'modified_date': '20220407',
6166 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6167 'tags': [],
61d3665d 6168 'availability': 'unlisted',
6169 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6170 'channel': 'pukkandan',
6171 'description': 'Test for collaborative playlist',
6172 'title': 'yt-dlp test - collaborative playlist',
12a1b225 6173 'view_count': int,
7666b936 6174 'uploader_url': 'https://www.youtube.com/@pukkandan',
6175 'uploader_id': '@pukkandan',
6176 'uploader': 'pukkandan',
61d3665d 6177 },
add96eb9 6178 'playlist_mincount': 2,
c26f9b99 6179 }, {
6180 'note': 'translated tab name',
6181 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6182 'info_dict': {
6183 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6184 'tags': [],
c26f9b99 6185 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 6186 'description': 'test description',
c26f9b99 6187 'title': 'cole-dlp-test-acc - 再生リスト',
c26f9b99 6188 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6189 'channel': 'cole-dlp-test-acc',
7666b936 6190 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6191 'uploader_id': '@coletdjnz',
6192 'uploader': 'cole-dlp-test-acc',
c26f9b99 6193 },
6194 'playlist_mincount': 1,
6195 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6196 'expected_warnings': ['Preferring "ja"'],
6197 }, {
6198 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6199 'note': 'preferred lang set with playlist with translated video titles',
6200 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6201 'info_dict': {
6202 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6203 'tags': [],
6204 'view_count': int,
6205 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
c26f9b99 6206 'channel': 'cole-dlp-test-acc',
6207 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6208 'description': 'test',
c26f9b99 6209 'title': 'dlp test playlist',
6210 'availability': 'public',
7666b936 6211 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6212 'uploader_id': '@coletdjnz',
6213 'uploader': 'cole-dlp-test-acc',
c26f9b99 6214 },
6215 'playlist_mincount': 1,
6216 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6217 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 6218 }, {
6219 # shorts audio pivot for 2GtVksBMYFM.
6220 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6221 'info_dict': {
6222 'id': 'sfv_audio_pivot',
6223 'title': 'sfv_audio_pivot',
6224 'tags': [],
6225 },
6226 'playlist_mincount': 50,
6227
86973308
M
6228 }, {
6229 # Channel with a real live tab (not to be mistaken with streams tab)
6230 # Do not treat like it should redirect to live stream
6231 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6232 'info_dict': {
6233 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6234 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6235 'tags': [],
6236 },
6237 'playlist_mincount': 20,
6238 }, {
6239 # Tab name is not the same as tab id
6240 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6241 'info_dict': {
6242 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6243 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6244 'tags': [],
6245 },
6246 'playlist_mincount': 8,
6247 }, {
6248 # Home tab id is literally home. Not to get mistaken with featured
6249 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6250 'info_dict': {
6251 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6252 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6253 'tags': [],
6254 },
6255 'playlist_mincount': 8,
6256 }, {
6257 # Should get three playlists for videos, shorts and streams tabs
6258 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6259 'info_dict': {
6260 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 6261 'title': 'Polka Ch. 尾丸ポルカ',
6262 'channel_follower_count': int,
6263 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6264 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
8828f457 6265 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
bd7e919a 6266 'channel': 'Polka Ch. 尾丸ポルカ',
6267 'tags': 'count:35',
7666b936 6268 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6269 'uploader': 'Polka Ch. 尾丸ポルカ',
6270 'uploader_id': '@OmaruPolka',
8828f457 6271 'channel_is_verified': True,
86973308
M
6272 },
6273 'playlist_count': 3,
6274 }, {
6275 # Shorts tab with channel with handle
7666b936 6276 # TODO: fix channel description
86973308
M
6277 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6278 'info_dict': {
6279 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6280 'title': 'Not Just Bikes - Shorts',
8828f457 6281 'tags': 'count:10',
86973308 6282 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
8828f457 6283 'description': 'md5:5e82545b3a041345927a92d0585df247',
86973308 6284 'channel_follower_count': int,
86973308 6285 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
86973308 6286 'channel': 'Not Just Bikes',
7666b936 6287 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6288 'uploader': 'Not Just Bikes',
6289 'uploader_id': '@NotJustBikes',
8828f457 6290 'channel_is_verified': True,
86973308
M
6291 },
6292 'playlist_mincount': 10,
6293 }, {
6294 # Streams tab
6295 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6296 'info_dict': {
6297 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6298 'title': '中村悠一 - Live',
6299 'tags': 'count:7',
6300 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6301 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
86973308 6302 'channel': '中村悠一',
86973308 6303 'channel_follower_count': int,
86973308 6304 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
7666b936 6305 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6306 'uploader_id': '@Yuichi-Nakamura',
6307 'uploader': '中村悠一',
86973308
M
6308 },
6309 'playlist_mincount': 60,
6310 }, {
6311 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6312 # See test_youtube_lists
6313 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6314 'only_matching': True,
6315 }, {
6316 # No uploads and no UCID given. Should fail with no uploads error
6317 # See test_youtube_lists
6318 'url': 'https://www.youtube.com/news',
add96eb9 6319 'only_matching': True,
86973308
M
6320 }, {
6321 # No videos tab but has a shorts tab
6322 'url': 'https://www.youtube.com/c/TKFShorts',
6323 'info_dict': {
6324 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6325 'title': 'Shorts Break - Shorts',
7666b936 6326 'tags': 'count:48',
86973308
M
6327 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6328 'channel': 'Shorts Break',
7666b936 6329 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
86973308 6330 'channel_follower_count': int,
86973308 6331 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
7666b936 6332 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6333 'uploader': 'Shorts Break',
6334 'uploader_id': '@ShortsBreak_Official',
86973308
M
6335 },
6336 'playlist_mincount': 30,
6337 }, {
6338 # Trending Now Tab. tab id is empty
6339 'url': 'https://www.youtube.com/feed/trending',
6340 'info_dict': {
6341 'id': 'trending',
6342 'title': 'trending - Now',
6343 'tags': [],
6344 },
6345 'playlist_mincount': 30,
6346 }, {
6347 # Trending Gaming Tab. tab id is empty
6348 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6349 'info_dict': {
6350 'id': 'trending',
6351 'title': 'trending - Gaming',
6352 'tags': [],
6353 },
6354 'playlist_mincount': 30,
4dc23a80
M
6355 }, {
6356 # Shorts url result in shorts tab
7666b936 6357 # TODO: Fix channel id extraction
4dc23a80
M
6358 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6359 'info_dict': {
6360 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6361 'title': 'cole-dlp-test-acc - Shorts',
4dc23a80 6362 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6363 'description': 'test description',
6364 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6365 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6366 'tags': [],
7666b936 6367 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6368 'uploader_id': '@coletdjnz',
4dc23a80 6369 'uploader': 'cole-dlp-test-acc',
4dc23a80
M
6370 },
6371 'playlist': [{
6372 'info_dict': {
7666b936 6373 # Channel data is not currently available for short renderers (as of 2023-03-01)
4dc23a80
M
6374 '_type': 'url',
6375 'ie_key': 'Youtube',
6376 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6377 'id': 'sSM9J5YH_60',
4dc23a80 6378 'title': 'SHORT short',
4dc23a80
M
6379 'view_count': int,
6380 'thumbnails': list,
add96eb9 6381 },
4dc23a80
M
6382 }],
6383 'params': {'extract_flat': True},
6384 }, {
6385 # Live video status should be extracted
6386 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6387 'info_dict': {
6388 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
add96eb9 6389 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6390 'tags': [],
4dc23a80
M
6391 },
6392 'playlist': [{
6393 'info_dict': {
6394 '_type': 'url',
6395 'ie_key': 'Youtube',
6396 'url': 'startswith:https://www.youtube.com/watch?v=',
6397 'id': str,
6398 'title': str,
6399 'live_status': 'is_live',
6400 'channel_id': str,
6401 'channel_url': str,
6402 'concurrent_view_count': int,
6403 'channel': str,
93e12ed7 6404 'uploader': str,
6405 'uploader_url': str,
14a14335 6406 'uploader_id': str,
8213ce28 6407 'channel_is_verified': bool, # this will keep changing
add96eb9 6408 },
4dc23a80 6409 }],
c7335551 6410 'params': {'extract_flat': True, 'playlist_items': '1'},
add96eb9 6411 'playlist_mincount': 1,
c7335551
M
6412 }, {
6413 # Channel renderer metadata. Contains number of videos on the channel
6414 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6415 'info_dict': {
6416 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6417 'title': 'cole-dlp-test-acc - Channels',
c7335551
M
6418 'channel': 'cole-dlp-test-acc',
6419 'description': 'test description',
6420 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6421 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6422 'tags': [],
7666b936 6423 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6424 'uploader_id': '@coletdjnz',
c7335551 6425 'uploader': 'cole-dlp-test-acc',
c7335551
M
6426 },
6427 'playlist': [{
6428 'info_dict': {
6429 '_type': 'url',
6430 'ie_key': 'YoutubeTab',
6431 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6432 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6433 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6434 'title': 'PewDiePie',
6435 'channel': 'PewDiePie',
6436 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6437 'thumbnails': list,
6438 'channel_follower_count': int,
7666b936 6439 'playlist_count': int,
6440 'uploader': 'PewDiePie',
6441 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6442 'uploader_id': '@PewDiePie',
8213ce28 6443 'channel_is_verified': True,
add96eb9 6444 },
c7335551
M
6445 }],
6446 'params': {'extract_flat': True},
31e18355 6447 }, {
6448 'url': 'https://www.youtube.com/@3blue1brown/about',
6449 'info_dict': {
8828f457 6450 'id': '@3blue1brown',
31e18355 6451 'tags': ['Mathematics'],
8828f457 6452 'title': '3Blue1Brown',
31e18355 6453 'channel_follower_count': int,
6454 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
31e18355 6455 'channel': '3Blue1Brown',
31e18355 6456 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
8828f457 6457 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
7666b936 6458 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6459 'uploader_id': '@3blue1brown',
6460 'uploader': '3Blue1Brown',
8213ce28 6461 'channel_is_verified': True,
31e18355 6462 },
6463 'playlist_count': 0,
447afb9e 6464 }, {
6465 # Podcasts tab, with rich entry playlistRenderers
6466 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6467 'info_dict': {
6468 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6469 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6470 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6471 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6472 'title': '99 Percent Invisible - Podcasts',
6473 'uploader': '99 Percent Invisible',
6474 'channel_follower_count': int,
6475 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6476 'tags': [],
6477 'channel': '99 Percent Invisible',
6478 'uploader_id': '@99percentinvisiblepodcast',
6479 },
8828f457 6480 'playlist_count': 0,
447afb9e 6481 }, {
6482 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6483 'url': 'https://www.youtube.com/@AHimitsu/releases',
6484 'info_dict': {
6485 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6486 'channel': 'A Himitsu',
6487 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6488 'title': 'A Himitsu - Releases',
6489 'uploader_id': '@AHimitsu',
6490 'uploader': 'A Himitsu',
6491 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
8828f457 6492 'tags': 'count:12',
447afb9e 6493 'description': 'I make music',
6494 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6495 'channel_follower_count': int,
8213ce28 6496 'channel_is_verified': True,
447afb9e 6497 },
6498 'playlist_mincount': 10,
fcbc9ed7 6499 }, {
6500 # Playlist with only shorts, shown as reel renderers
6501 # FIXME: future: YouTube currently doesn't give continuation for this,
6502 # may do in future.
6503 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6504 'info_dict': {
6505 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6506 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6507 'view_count': int,
6508 'uploader_id': '@BangyShorts',
6509 'description': '',
6510 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6511 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6512 'channel': 'Bangy Shorts',
6513 'uploader': 'Bangy Shorts',
6514 'tags': [],
6515 'availability': 'public',
8828f457 6516 'modified_date': r're:\d{8}',
fcbc9ed7 6517 'title': 'Uploads from Bangy Shorts',
6518 },
6519 'playlist_mincount': 100,
6520 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
8828f457 6521 }, {
6522 'note': 'Tags containing spaces',
6523 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6524 'playlist_count': 3,
6525 'info_dict': {
6526 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6527 'channel': 'Markiplier',
6528 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6529 'title': 'Markiplier',
6530 'channel_follower_count': int,
6531 'description': 'md5:0c010910558658824402809750dc5d97',
6532 'uploader_id': '@markiplier',
6533 'uploader_url': 'https://www.youtube.com/@markiplier',
6534 'uploader': 'Markiplier',
6535 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6536 'channel_is_verified': True,
6537 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6538 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6539 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6540 'mark fischbach'],
6541 },
a6213a49 6542 }]
6543
6544 @classmethod
6545 def suitable(cls, url):
86e5f3ed 6546 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6547
86973308
M
6548 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6549
6550 def _get_url_mobj(self, url):
6551 mobj = self._URL_RE.match(url).groupdict()
6552 mobj.update((k, '') for k, v in mobj.items() if v is None)
6553 return mobj
6554
6555 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6556 tab_name = (tab.get('title') or '').lower()
6557 tab_url = urljoin(base_url, traverse_obj(
6558 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6559
bd7e919a 6560 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6561 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6562 if tab_id:
bd7e919a 6563 return {
6564 'TAB_ID_SPONSORSHIPS': 'membership',
6565 }.get(tab_id, tab_id), tab_name
86973308
M
6566
6567 # Fallback to tab name if we cannot get the tab id.
6568 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6569 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6570 if tab_name:
6571 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6572 return {
6573 'home': 'featured',
6574 'live': 'streams',
6575 }.get(tab_name, tab_name), tab_name
6576
6577 def _has_tab(self, tabs, tab_id):
6578 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6579
044886c2 6580 def _empty_playlist(self, item_id, data):
6581 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6582
182bda88 6583 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6584 def _real_extract(self, url, smuggled_data):
cd7c66cf 6585 item_id = self._match_id(url)
14f25df2 6586 url = urllib.parse.urlunparse(
6587 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6588 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6589
86973308
M
6590 mobj = self._get_url_mobj(url)
6591 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6592 if is_channel and smuggled_data.get('is_music_url'):
6593 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6594 return self.url_result(
6595 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6596 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6597 mdata = self._extract_tab_endpoint(
6598 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6599 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6600 get_all=False, expected_type=str)
6601 if not murl:
6602 raise ExtractorError('Failed to resolve album to playlist')
6603 return self.url_result(murl, YoutubeTabIE)
6604 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6605 return self.url_result(
6606 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6607
6608 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6609 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6610 url = f'{pre}/videos{post}'
5b28cef7 6611 if smuggled_data.get('is_music_url'):
6612 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
cd7c66cf 6613
6614 # Handle both video/playlist URLs
201c1459 6615 qs = parse_qs(url)
add96eb9 6616 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
fe03a6cd 6617 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6618 if not playlist_id:
fe03a6cd 6619 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6620 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6621 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6622 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6623 return self.url_result(
6624 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6625
86973308
M
6626 if not self._yes_playlist(playlist_id, video_id):
6627 return self.url_result(
6628 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6629
bd7e919a 6630 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6631
7c219ea6 6632 # YouTube may provide a non-standard redirect to the regional channel
6633 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6634 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6635 redirect_url = traverse_obj(
6636 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6637 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6638 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6639 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6640 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6641
bd7e919a 6642 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6643 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6644 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6645 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6646 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6647
044886c2 6648 # /about is no longer a tab
6649 if original_tab_id == 'about':
6650 return self._empty_playlist(item_id, data)
6651
86973308 6652 if not original_tab_id and selected_tab_name:
bd7e919a 6653 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6654 'To download only the videos in a specific tab, pass the tab\'s URL')
6655 if self._has_tab(tabs, 'streams'):
bd7e919a 6656 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6657 if self._has_tab(tabs, 'shorts'):
bd7e919a 6658 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6659 # XXX: Members-only tab should also be extracted
6660
bd7e919a 6661 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6662 # Channel does not have streams, shorts or videos tabs
6663 if item_id[:2] != 'UC':
044886c2 6664 return self._empty_playlist(item_id, data)
86973308
M
6665
6666 # Topic channels don't have /videos. Use the equivalent playlist instead
6667 pl_id = f'UU{item_id[2:]}'
6668 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6669 try:
6670 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6671 except ExtractorError:
044886c2 6672 return self._empty_playlist(item_id, data)
64f36541 6673 else:
86973308
M
6674 item_id, url = pl_id, pl_url
6675 self.to_screen(
6676 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6677
bd7e919a 6678 elif extra_tabs and selected_tab_id != 'videos':
86973308 6679 # When there are shorts/live tabs but not videos tab
bd7e919a 6680 url, data = f'{pre}{post}', None
86973308
M
6681
6682 elif (original_tab_id or 'videos') != selected_tab_id:
6683 if original_tab_id == 'live':
6684 # Live tab should have redirected to the video
6685 # Except in the case the channel has an actual live tab
6686 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6687 raise UserNotLive(video_id=item_id)
86973308
M
6688 elif selected_tab_name:
6689 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6690
6691 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6692 url = f'{pre}{post}'
18db7548 6693
358de58c 6694 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6695 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6696 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6697 self._extract_and_report_alerts(data, only_once=True)
86973308 6698
bd7e919a 6699 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6700 if tabs:
bd7e919a 6701 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6702 entries[0].update({
86973308
M
6703 'extractor_key': YoutubeTabIE.ie_key(),
6704 'extractor': YoutubeTabIE.IE_NAME,
6705 'webpage_url': url,
6706 })
bd7e919a 6707 if self.get_param('playlist_items') == '0':
6708 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6709 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6710 entries.extend(map(self._real_extract, extra_tabs))
6711
6712 if len(entries) == 1:
6713 return entries[0]
6714 elif entries:
6715 metadata = self._extract_metadata_from_tabs(item_id, data)
6716 uploads_url = 'the Uploads (UU) playlist URL'
6717 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6718 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6719 self.to_screen(
6720 'Downloading as multiple playlists, separated by tabs. '
6721 f'To download as a single playlist instead, pass {uploads_url}')
6722 return self.playlist_result(entries, item_id, **metadata)
6723
6724 # Inline playlist
37e57a9f 6725 playlist = traverse_obj(
6726 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6727 if playlist:
ac56cf38 6728 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6729
37e57a9f 6730 video_id = traverse_obj(
6731 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6732 if video_id:
bd7e919a 6733 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6734 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6735 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6736
8bdd16b4 6737 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6738
c5e8d7af 6739
8bdd16b4 6740class YoutubePlaylistIE(InfoExtractor):
96565c7e 6741 IE_DESC = 'YouTube playlists'
8bdd16b4 6742 _VALID_URL = r'''(?x)(?:
6743 (?:https?://)?
6744 (?:\w+\.)?
6745 (?:
6746 (?:
6747 youtube(?:kids)?\.com|
add96eb9 6748 {invidious}
8bdd16b4 6749 )
6750 /.*?\?.*?\blist=
6751 )?
add96eb9 6752 (?P<id>{playlist_id})
6753 )'''.format(
6754 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6755 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6756 )
8bdd16b4 6757 IE_NAME = 'youtube:playlist'
cdc628a4 6758 _TESTS = [{
8bdd16b4 6759 'note': 'issue #673',
6760 'url': 'PLBB231211A4F62143',
cdc628a4 6761 'info_dict': {
8bdd16b4 6762 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6763 'id': 'PLBB231211A4F62143',
976ae3ea 6764 'uploader': 'Wickman',
7666b936 6765 'uploader_id': '@WickmanVT',
11f9be09 6766 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6767 'view_count': int,
7666b936 6768 'uploader_url': 'https://www.youtube.com/@WickmanVT',
976ae3ea 6769 'modified_date': r're:\d{8}',
6770 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6771 'channel': 'Wickman',
6772 'tags': [],
7666b936 6773 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
86973308 6774 'availability': 'public',
8bdd16b4 6775 },
6776 'playlist_mincount': 29,
6777 }, {
6778 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6779 'info_dict': {
6780 'title': 'YDL_safe_search',
6781 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6782 },
6783 'playlist_count': 2,
6784 'skip': 'This playlist is private',
9558dcec 6785 }, {
8bdd16b4 6786 'note': 'embedded',
6787 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6788 'playlist_count': 4,
9558dcec 6789 'info_dict': {
8bdd16b4 6790 'title': 'JODA15',
6791 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6792 'uploader': 'milan',
7666b936 6793 'uploader_id': '@milan5503',
976ae3ea 6794 'description': '',
6795 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6796 'tags': [],
6797 'modified_date': '20140919',
6798 'view_count': int,
6799 'channel': 'milan',
6800 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7666b936 6801 'uploader_url': 'https://www.youtube.com/@milan5503',
c26f9b99 6802 'availability': 'public',
976ae3ea 6803 },
b012271d 6804 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
cdc628a4 6805 }, {
8bdd16b4 6806 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6807 'playlist_mincount': 455,
8bdd16b4 6808 'info_dict': {
6809 'title': '2018 Chinese New Singles (11/6 updated)',
6810 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6811 'uploader': 'LBK',
7666b936 6812 'uploader_id': '@music_king',
11f9be09 6813 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6814 'channel': 'LBK',
6815 'view_count': int,
7666b936 6816 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
976ae3ea 6817 'tags': [],
7666b936 6818 'uploader_url': 'https://www.youtube.com/@music_king',
976ae3ea 6819 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6820 'modified_date': r're:\d{8}',
c26f9b99 6821 'availability': 'public',
976ae3ea 6822 },
6823 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6824 }, {
29f7c58a 6825 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6826 'only_matching': True,
6827 }, {
6828 # music album playlist
6829 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6830 'only_matching': True,
6831 }]
6832
6833 @classmethod
6834 def suitable(cls, url):
201c1459 6835 if YoutubeTabIE.suitable(url):
6836 return False
49a57e70 6837 from ..utils import parse_qs
201c1459 6838 qs = parse_qs(url)
6839 if qs.get('v', [None])[0]:
6840 return False
86e5f3ed 6841 return super().suitable(url)
29f7c58a 6842
6843 def _real_extract(self, url):
6844 playlist_id = self._match_id(url)
46953e7e 6845 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6846 url = update_url_query(
6847 'https://www.youtube.com/playlist',
6848 parse_qs(url) or {'list': playlist_id})
6849 if is_music_url:
6850 url = smuggle_url(url, {'is_music_url': True})
6851 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6852
6853
6854class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6855 IE_DESC = 'youtu.be'
add96eb9 6856 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
29f7c58a 6857 _TESTS = [{
8bdd16b4 6858 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6859 'info_dict': {
6860 'id': 'yeWKywCrFtk',
6861 'ext': 'mp4',
6862 'title': 'Small Scale Baler and Braiding Rugs',
6863 'uploader': 'Backus-Page House Museum',
7666b936 6864 'uploader_id': '@backuspagemuseum',
6865 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
8bdd16b4 6866 'upload_date': '20161008',
6867 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6868 'categories': ['Nonprofits & Activism'],
6869 'tags': list,
6870 'like_count': int,
976ae3ea 6871 'age_limit': 0,
6872 'playable_in_embed': True,
7666b936 6873 'thumbnail': r're:^https?://.*\.webp',
976ae3ea 6874 'channel': 'Backus-Page House Museum',
6875 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6876 'live_status': 'not_live',
6877 'view_count': int,
6878 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6879 'availability': 'public',
6880 'duration': 59,
12a1b225 6881 'comment_count': int,
add96eb9 6882 'channel_follower_count': int,
8bdd16b4 6883 },
6884 'params': {
6885 'noplaylist': True,
6886 'skip_download': True,
6887 },
39e7107d 6888 }, {
8bdd16b4 6889 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6890 'only_matching': True,
cdc628a4
PH
6891 }]
6892
8bdd16b4 6893 def _real_extract(self, url):
5ad28e7f 6894 mobj = self._match_valid_url(url)
29f7c58a 6895 video_id = mobj.group('id')
6896 playlist_id = mobj.group('playlist_id')
8bdd16b4 6897 return self.url_result(
29f7c58a 6898 update_url_query('https://www.youtube.com/watch', {
6899 'v': video_id,
6900 'list': playlist_id,
6901 'feature': 'youtu.be',
6902 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6903
6904
b6ce9bb0 6905class YoutubeLivestreamEmbedIE(InfoExtractor):
6906 IE_DESC = 'YouTube livestream embeds'
6907 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6908 _TESTS = [{
6909 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6910 'only_matching': True,
6911 }]
6912
6913 def _real_extract(self, url):
6914 channel_id = self._match_id(url)
6915 return self.url_result(
6916 f'https://www.youtube.com/channel/{channel_id}/live',
6917 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6918
6919
8bdd16b4 6920class YoutubeYtUserIE(InfoExtractor):
96565c7e 6921 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6922 IE_NAME = 'youtube:user'
8bdd16b4 6923 _VALID_URL = r'ytuser:(?P<id>.+)'
6924 _TESTS = [{
6925 'url': 'ytuser:phihag',
6926 'only_matching': True,
6927 }]
6928
6929 def _real_extract(self, url):
6930 user_id = self._match_id(url)
08270da5 6931 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6932
b05654f0 6933
3d3dddc9 6934class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6935 IE_NAME = 'youtube:favorites'
96565c7e 6936 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6937 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6938 _LOGIN_REQUIRED = True
6939 _TESTS = [{
6940 'url': ':ytfav',
6941 'only_matching': True,
6942 }, {
6943 'url': ':ytfavorites',
6944 'only_matching': True,
6945 }]
6946
6947 def _real_extract(self, url):
6948 return self.url_result(
6949 'https://www.youtube.com/playlist?list=LL',
6950 ie=YoutubeTabIE.ie_key())
6951
6952
ca5300c7 6953class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6954 IE_NAME = 'youtube:notif'
6955 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6956 _VALID_URL = r':ytnotif(?:ication)?s?'
6957 _LOGIN_REQUIRED = True
6958 _TESTS = [{
6959 'url': ':ytnotif',
6960 'only_matching': True,
6961 }, {
6962 'url': ':ytnotifications',
6963 'only_matching': True,
6964 }]
6965
6966 def _extract_notification_menu(self, response, continuation_list):
6967 notification_list = traverse_obj(
6968 response,
6969 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6970 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6971 expected_type=list) or []
6972 continuation_list[0] = None
6973 for item in notification_list:
6974 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6975 if entry:
6976 yield entry
6977 continuation = item.get('continuationItemRenderer')
6978 if continuation:
6979 continuation_list[0] = continuation
6980
6981 def _extract_notification_renderer(self, notification):
6982 video_id = traverse_obj(
6983 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6984 url = f'https://www.youtube.com/watch?v={video_id}'
6985 channel_id = None
6986 if not video_id:
6987 browse_ep = traverse_obj(
6988 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7666b936 6989 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
ca5300c7 6990 post_id = self._search_regex(
6991 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6992 'post id', default=None)
6993 if not channel_id or not post_id:
6994 return
6995 # The direct /post url redirects to this in the browser
6996 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6997
6998 channel = traverse_obj(
6999 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7000 expected_type=str)
c7a7baaa 7001 notification_title = self._get_text(notification, 'shortMessage')
7002 if notification_title:
7003 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7004 # TODO: handle recommended videos
ca5300c7 7005 title = self._search_regex(
c7a7baaa 7006 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 7007 'video title', default=None)
5225df50 7008 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7009 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7010 else None)
ca5300c7 7011 return {
7012 '_type': 'url',
7013 'url': url,
7014 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7015 'video_id': video_id,
7016 'title': title,
7017 'channel_id': channel_id,
7018 'channel': channel,
7666b936 7019 'uploader': channel,
ca5300c7 7020 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 7021 'timestamp': timestamp,
ca5300c7 7022 }
7023
7024 def _notification_menu_entries(self, ytcfg):
7025 continuation_list = [None]
7026 response = None
7027 for page in itertools.count(1):
7028 ctoken = traverse_obj(
7029 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7030 response = self._extract_response(
7031 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7032 ep='notification/get_notification_menu', check_get_keys='actions',
7033 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7034 yield from self._extract_notification_menu(response, continuation_list)
7035 if not continuation_list[0]:
7036 break
7037
7038 def _real_extract(self, url):
7039 display_id = 'notifications'
7040 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7041 self._report_playlist_authcheck(ytcfg)
7042 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7043
7044
a6213a49 7045class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7046 IE_DESC = 'YouTube search'
78caa52a 7047 IE_NAME = 'youtube:search'
b05654f0 7048 _SEARCH_KEY = 'ytsearch'
17d248a5 7049 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
84bbc545 7050 _TESTS = [{
7051 'url': 'ytsearch5:youtube-dl test video',
7052 'playlist_count': 5,
7053 'info_dict': {
7054 'id': 'youtube-dl test video',
7055 'title': 'youtube-dl test video',
add96eb9 7056 },
17d248a5
AB
7057 }, {
7058 'note': 'Suicide/self-harm search warning',
7059 'url': 'ytsearch1:i hate myself and i wanna die',
7060 'playlist_count': 1,
7061 'info_dict': {
7062 'id': 'i hate myself and i wanna die',
7063 'title': 'i hate myself and i wanna die',
add96eb9 7064 },
84bbc545 7065 }]
b05654f0 7066
a61fd4cf 7067
5f7cb91a 7068class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 7069 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 7070 _SEARCH_KEY = 'ytsearchdate'
a6213a49 7071 IE_DESC = 'YouTube search, newest videos first'
17d248a5 7072 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
84bbc545 7073 _TESTS = [{
7074 'url': 'ytsearchdate5:youtube-dl test video',
7075 'playlist_count': 5,
7076 'info_dict': {
7077 'id': 'youtube-dl test video',
7078 'title': 'youtube-dl test video',
add96eb9 7079 },
84bbc545 7080 }]
75dff0ee 7081
c9ae7b95 7082
a6213a49 7083class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 7084 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 7085 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 7086 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 7087 _TESTS = [{
7088 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7089 'playlist_mincount': 5,
7090 'info_dict': {
11f9be09 7091 'id': 'youtube-dl test video',
3462ffa8 7092 'title': 'youtube-dl test video',
add96eb9 7093 },
a61fd4cf 7094 }, {
7095 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7096 'playlist_mincount': 5,
7097 'info_dict': {
7098 'id': 'python',
7099 'title': 'python',
add96eb9 7100 },
ad210f4f 7101 }, {
7102 'url': 'https://www.youtube.com/results?search_query=%23cats',
7103 'playlist_mincount': 1,
7104 'info_dict': {
7105 'id': '#cats',
7106 'title': '#cats',
12a1b225
A
7107 # The test suite does not have support for nested playlists
7108 # 'entries': [{
7109 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7110 # 'title': '#cats',
7111 # }],
ad210f4f 7112 },
c7335551
M
7113 }, {
7114 # Channel results
7115 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7116 'info_dict': {
7117 'id': 'kurzgesagt',
7118 'title': 'kurzgesagt',
7119 },
7120 'playlist': [{
7121 'info_dict': {
7122 '_type': 'url',
7123 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7124 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7125 'ie_key': 'YoutubeTab',
7126 'channel': 'Kurzgesagt – In a Nutshell',
7127 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7128 'title': 'Kurzgesagt – In a Nutshell',
7129 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
14a14335 7130 # No longer available for search as it is set to the handle.
7131 # 'playlist_count': int,
c7335551 7132 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7666b936 7133 'thumbnails': list,
7134 'uploader_id': '@kurzgesagt',
7135 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7136 'uploader': 'Kurzgesagt – In a Nutshell',
8213ce28 7137 'channel_is_verified': True,
14a14335 7138 'channel_follower_count': int,
add96eb9 7139 },
c7335551
M
7140 }],
7141 'params': {'extract_flat': True, 'playlist_items': '1'},
7142 'playlist_mincount': 1,
3462ffa8 7143 }, {
7144 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7145 'only_matching': True,
7146 }]
7147
7148 def _real_extract(self, url):
4dfbf869 7149 qs = parse_qs(url)
386e1dd9 7150 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 7151 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 7152
7153
16aa9ea4 7154class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 7155 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 7156 IE_NAME = 'youtube:music:search_url'
7157 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7158 _TESTS = [{
7159 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7160 'playlist_count': 16,
7161 'info_dict': {
7162 'id': 'royalty free music',
7163 'title': 'royalty free music',
add96eb9 7164 },
16aa9ea4 7165 }, {
7166 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7167 'playlist_mincount': 30,
7168 'info_dict': {
7169 'id': 'royalty free music - songs',
7170 'title': 'royalty free music - songs',
7171 },
add96eb9 7172 'params': {'extract_flat': 'in_playlist'},
16aa9ea4 7173 }, {
7174 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7175 'playlist_mincount': 30,
7176 'info_dict': {
7177 'id': 'royalty free music - community playlists',
7178 'title': 'royalty free music - community playlists',
7179 },
add96eb9 7180 'params': {'extract_flat': 'in_playlist'},
16aa9ea4 7181 }]
7182
7183 _SECTIONS = {
7184 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7185 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7186 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7187 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7188 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7189 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7190 }
7191
7192 def _real_extract(self, url):
7193 qs = parse_qs(url)
7194 query = (qs.get('search_query') or qs.get('q'))[0]
7195 params = qs.get('sp', (None,))[0]
7196 if params:
7197 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7198 else:
add96eb9 7199 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
16aa9ea4 7200 params = self._SECTIONS.get(section)
7201 if not params:
7202 section = None
7203 title = join_nonempty(query, section, delim=' - ')
af5c1c55 7204 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 7205
7206
182bda88 7207class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 7208 """
25f14e9f 7209 Base class for feed extractors
82d02080 7210 Subclasses must re-define the _FEED_NAME property.
d7ae0639 7211 """
b2e8bc1b 7212 _LOGIN_REQUIRED = True
82d02080 7213 _FEED_NAME = 'feeds'
a25bca9f 7214
7215 def _real_initialize(self):
7216 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 7217
82d02080 7218 @classproperty
add96eb9 7219 def IE_NAME(cls):
7220 return f'youtube:{cls._FEED_NAME}'
04cc9617 7221
3853309f 7222 def _real_extract(self, url):
3d3dddc9 7223 return self.url_result(
182bda88 7224 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
7225
7226
ef2f3c7f 7227class YoutubeWatchLaterIE(InfoExtractor):
7228 IE_NAME = 'youtube:watchlater'
96565c7e 7229 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 7230 _VALID_URL = r':ytwatchlater'
bc7a9cd8 7231 _TESTS = [{
8bdd16b4 7232 'url': ':ytwatchlater',
bc7a9cd8
S
7233 'only_matching': True,
7234 }]
25f14e9f
S
7235
7236 def _real_extract(self, url):
ef2f3c7f 7237 return self.url_result(
7238 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 7239
7240
25f14e9f 7241class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 7242 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 7243 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 7244 _FEED_NAME = 'recommended'
45db527f 7245 _LOGIN_REQUIRED = False
3d3dddc9 7246 _TESTS = [{
7247 'url': ':ytrec',
7248 'only_matching': True,
7249 }, {
7250 'url': ':ytrecommended',
7251 'only_matching': True,
7252 }, {
7253 'url': 'https://youtube.com',
7254 'only_matching': True,
7255 }]
1ed5b5c9 7256
1ed5b5c9 7257
25f14e9f 7258class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 7259 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 7260 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 7261 _FEED_NAME = 'subscriptions'
3d3dddc9 7262 _TESTS = [{
7263 'url': ':ytsubs',
7264 'only_matching': True,
7265 }, {
7266 'url': ':ytsubscriptions',
7267 'only_matching': True,
7268 }]
1ed5b5c9 7269
1ed5b5c9 7270
25f14e9f 7271class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 7272 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 7273 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 7274 _FEED_NAME = 'history'
3d3dddc9 7275 _TESTS = [{
7276 'url': ':ythistory',
7277 'only_matching': True,
7278 }]
1ed5b5c9
JMF
7279
7280
80eb0bd9 7281class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 7282 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 7283 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 7284 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 7285 _TESTS = [{
1dd18a88 7286 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 7287 'only_matching': True,
7288 }]
7289
7290 @staticmethod
7291 def _generate_audio_pivot_params(video_id):
7292 """
7293 Generates sfv_audio_pivot browse params for this video id
7294 """
7295 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7296 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7297
7298 def _real_extract(self, url):
7299 video_id = self._match_id(url)
7300 return self.url_result(
7301 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7302 ie=YoutubeTabIE)
7303
7304
15870e90
PH
7305class YoutubeTruncatedURLIE(InfoExtractor):
7306 IE_NAME = 'youtube:truncated_url'
7307 IE_DESC = False # Do not list
975d35db 7308 _VALID_URL = r'''(?x)
b95aab84
PH
7309 (?:https?://)?
7310 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7311 (?:watch\?(?:
c4808c60 7312 feature=[a-z_]+|
b95aab84
PH
7313 annotation_id=annotation_[^&]+|
7314 x-yt-cl=[0-9]+|
c1708b89 7315 hl=[^&]*|
287be8c6 7316 t=[0-9]+
b95aab84
PH
7317 )?
7318 |
7319 attribution_link\?a=[^&]+
7320 )
7321 $
975d35db 7322 '''
15870e90 7323
c4808c60 7324 _TESTS = [{
2d3d2997 7325 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 7326 'only_matching': True,
dc2fc736 7327 }, {
2d3d2997 7328 'url': 'https://www.youtube.com/watch?',
dc2fc736 7329 'only_matching': True,
b95aab84
PH
7330 }, {
7331 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7332 'only_matching': True,
7333 }, {
7334 'url': 'https://www.youtube.com/watch?feature=foo',
7335 'only_matching': True,
c1708b89
PH
7336 }, {
7337 'url': 'https://www.youtube.com/watch?hl=en-GB',
7338 'only_matching': True,
287be8c6
PH
7339 }, {
7340 'url': 'https://www.youtube.com/watch?t=2372',
7341 'only_matching': True,
c4808c60
PH
7342 }]
7343
15870e90
PH
7344 def _real_extract(self, url):
7345 raise ExtractorError(
78caa52a
PH
7346 'Did you forget to quote the URL? Remember that & is a meta '
7347 'character in most shells, so you want to put the URL in quotes, '
3867038a 7348 'like youtube-dl '
2d3d2997 7349 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 7350 ' or simply youtube-dl BaW_jenozKc .',
15870e90 7351 expected=True)
772fd5cc
PH
7352
7353
471d0367 7354class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 7355 IE_NAME = 'youtube:clip'
471d0367 7356 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7357 _TESTS = [{
7358 # FIXME: Other metadata should be extracted from the clip, not from the base video
7359 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7360 'info_dict': {
7361 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7362 'ext': 'mp4',
7363 'section_start': 29.0,
7364 'section_end': 39.7,
7365 'duration': 10.7,
12a1b225
A
7366 'age_limit': 0,
7367 'availability': 'public',
7368 'categories': ['Gaming'],
7369 'channel': 'Scott The Woz',
7370 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7371 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7372 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7373 'like_count': int,
7374 'playable_in_embed': True,
7375 'tags': 'count:17',
7376 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7377 'title': 'Mobile Games on Console - Scott The Woz',
7378 'upload_date': '20210920',
7379 'uploader': 'Scott The Woz',
7666b936 7380 'uploader_id': '@ScottTheWoz',
7381 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
12a1b225
A
7382 'view_count': int,
7383 'live_status': 'not_live',
7666b936 7384 'channel_follower_count': int,
7385 'chapters': 'count:20',
14a14335 7386 'comment_count': int,
7387 'heatmap': 'count:100',
add96eb9 7388 },
471d0367 7389 }]
3cd786db 7390
7391 def _real_extract(self, url):
471d0367 7392 clip_id = self._match_id(url)
7393 _, data = self._extract_webpage(url, clip_id)
7394
7395 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7396 if not video_id:
7397 raise ExtractorError('Unable to find video ID')
7398
7399 clip_data = traverse_obj(data, (
7400 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7401 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7402 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7403 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7404
7405 return {
7406 '_type': 'url_transparent',
7407 'url': f'https://www.youtube.com/watch?v={video_id}',
7408 'ie_key': YoutubeIE.ie_key(),
7409 'id': clip_id,
7410 'section_start': int(clip_data['startTimeMs']) / 1000,
7411 'section_end': int(clip_data['endTimeMs']) / 1000,
7412 }
3cd786db 7413
7414
b032ff0f 7415class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7416 IE_NAME = 'youtube:consent'
7417 IE_DESC = False # Do not list
7418 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7419 _TESTS = [{
7420 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7421 'info_dict': {
7422 'id': 'qVv6vCqciTM',
7423 'ext': 'mp4',
7424 'age_limit': 0,
7666b936 7425 'uploader_id': '@sana_natori',
b032ff0f 7426 'comment_count': int,
7427 'chapters': 'count:13',
7428 'upload_date': '20221223',
7429 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7430 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7666b936 7431 'uploader_url': 'https://www.youtube.com/@sana_natori',
b032ff0f 7432 'like_count': int,
7433 'release_date': '20221223',
7434 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7435 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7436 'view_count': int,
7437 'playable_in_embed': True,
7438 'duration': 4438,
7439 'availability': 'public',
7440 'channel_follower_count': int,
7441 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7442 'categories': ['Entertainment'],
7443 'live_status': 'was_live',
7444 'release_timestamp': 1671793345,
7445 'channel': 'さなちゃんねる',
7446 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7447 'uploader': 'さなちゃんねる',
8213ce28 7448 'channel_is_verified': True,
14a14335 7449 'heatmap': 'count:100',
b032ff0f 7450 },
7451 'add_ie': ['Youtube'],
7452 'params': {'skip_download': 'Youtube'},
7453 }]
7454
7455 def _real_extract(self, url):
7456 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7457 if not redirect_url:
7458 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7459 return self.url_result(redirect_url)
7460
7461
772fd5cc
PH
7462class YoutubeTruncatedIDIE(InfoExtractor):
7463 IE_NAME = 'youtube:truncated_id'
7464 IE_DESC = False # Do not list
b95aab84 7465 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
7466
7467 _TESTS = [{
7468 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7469 'only_matching': True,
7470 }]
7471
7472 def _real_extract(self, url):
7473 video_id = self._match_id(url)
7474 raise ExtractorError(
86e5f3ed 7475 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7476 expected=True)