]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
c305a25c 5import datetime as dt
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8828f457 14import shlex
46383212 15import sys
f8271158 16import threading
8a784c74 17import time
e0df6211 18import traceback
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
3d2623a8 25from ..networking.exceptions import HTTPError, network_exceptions
4bb4a188 26from ..utils import (
f8271158 27 NO_DEFAULT,
28 ExtractorError,
4d37720a 29 LazyList,
693f0600 30 UserNotLive,
720c3099 31 bug_reports_message,
82d02080 32 classproperty,
c5e8d7af 33 clean_html,
d92f5d5a 34 datetime_from_str,
11f9be09 35 dict_get,
a25a4243 36 filesize_from_tbr,
7a32c70d 37 filter_dict,
2d30521a 38 float_or_none,
11f9be09 39 format_field,
ff91cf74 40 get_first,
dd27fd17 41 int_or_none,
641ad5d8 42 is_html,
34921b43 43 join_nonempty,
48416bc4 44 js_to_json,
94278f72 45 mimetype2ext,
11f9be09 46 orderedSet,
6310acf5 47 parse_codecs,
49bd8c66 48 parse_count,
7c80519c 49 parse_duration,
7ea65411 50 parse_iso8601,
4dfbf869 51 parse_qs,
dca3ff4a 52 qualities,
3995d37d 53 remove_start,
cf7e015f 54 smuggle_url,
dbdaaa23 55 str_or_none,
c93d53f5 56 str_to_int,
f3aa3c3f 57 strftime_or_none,
7c365c21 58 traverse_obj,
a25a4243 59 try_call,
556dbe7f 60 try_get,
c5e8d7af
PH
61 unescapeHTML,
62 unified_strdate,
f0d785d3 63 unified_timestamp,
cf7e015f 64 unsmuggle_url,
8bdd16b4 65 update_url_query,
21c340b8 66 url_or_none,
fe93e2c4 67 urljoin,
7c365c21 68 variadic,
c5e8d7af
PH
69)
70
c795c39f 71STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
962ffcf8 72# any clients starting with _ cannot be explicitly requested by the user
000c15a4 73INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
77 'client': {
78 'clientName': 'WEB',
a0c830f4 79 'clientVersion': '2.20220801.00.00',
add96eb9 80 },
000c15a4 81 },
add96eb9 82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
000c15a4 83 },
84 'web_embedded': {
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
87 'client': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 89 'clientVersion': '1.20220731.00.00',
000c15a4 90 },
91 },
add96eb9 92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
000c15a4 93 },
94 'web_music': {
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
98 'client': {
99 'clientName': 'WEB_REMIX',
a0c830f4 100 'clientVersion': '1.20220727.01.00',
add96eb9 101 },
000c15a4 102 },
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
104 },
e7e94f2a 105 'web_creator': {
18c7683d 106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
107 'INNERTUBE_CONTEXT': {
108 'client': {
109 'clientName': 'WEB_CREATOR',
a0c830f4 110 'clientVersion': '1.20220726.00.00',
add96eb9 111 },
e7e94f2a
D
112 },
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
114 },
000c15a4 115 'android': {
18c7683d 116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 117 'INNERTUBE_CONTEXT': {
118 'client': {
119 'clientName': 'ANDROID',
7aad0654 120 'clientVersion': '19.09.37',
50ac0e54 121 'androidSdkVersion': 30,
add96eb9 122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
123 },
000c15a4 124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
add96eb9 126 'REQUIRE_JS_PLAYER': False,
000c15a4 127 },
128 'android_embedded': {
18c7683d 129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
7aad0654 133 'clientVersion': '19.09.37',
50ac0e54 134 'androidSdkVersion': 30,
add96eb9 135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
000c15a4 136 },
137 },
b6de707d 138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
add96eb9 139 'REQUIRE_JS_PLAYER': False,
000c15a4 140 },
141 'android_music': {
18c7683d 142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
7aad0654 146 'clientVersion': '6.42.52',
50ac0e54 147 'androidSdkVersion': 30,
add96eb9 148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
149 },
000c15a4 150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
add96eb9 152 'REQUIRE_JS_PLAYER': False,
000c15a4 153 },
e7e94f2a 154 'android_creator': {
18c7683d 155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
50ac0e54 159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
add96eb9 161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
e7e94f2a
D
162 },
163 },
b6de707d 164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
add96eb9 165 'REQUIRE_JS_PLAYER': False,
e7e94f2a 166 },
18c7683d 167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 169 'ios': {
18c7683d 170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS',
7aad0654 174 'clientVersion': '19.09.3',
18c7683d 175 'deviceModel': 'iPhone14,3',
add96eb9 176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
177 },
000c15a4 178 },
b6de707d 179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
add96eb9 180 'REQUIRE_JS_PLAYER': False,
000c15a4 181 },
182 'ios_embedded': {
000c15a4 183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
7aad0654 186 'clientVersion': '19.09.3',
18c7683d 187 'deviceModel': 'iPhone14,3',
add96eb9 188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
000c15a4 189 },
190 },
b6de707d 191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
add96eb9 192 'REQUIRE_JS_PLAYER': False,
000c15a4 193 },
194 'ios_music': {
18c7683d 195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'IOS_MUSIC',
7aad0654 199 'clientVersion': '6.33.3',
224b5a35 200 'deviceModel': 'iPhone14,3',
add96eb9 201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
000c15a4 202 },
203 },
b6de707d 204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
add96eb9 205 'REQUIRE_JS_PLAYER': False,
000c15a4 206 },
e7e94f2a
D
207 'ios_creator': {
208 'INNERTUBE_CONTEXT': {
209 'client': {
210 'clientName': 'IOS_CREATOR',
224b5a35
SF
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
add96eb9 213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
e7e94f2a
D
214 },
215 },
b6de707d 216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
add96eb9 217 'REQUIRE_JS_PLAYER': False,
e7e94f2a 218 },
3619f78d 219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 221 'mweb': {
18c7683d 222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 223 'INNERTUBE_CONTEXT': {
224 'client': {
225 'clientName': 'MWEB',
a0c830f4 226 'clientVersion': '2.20220801.00.00',
add96eb9 227 },
000c15a4 228 },
add96eb9 229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
e7870111
D
230 },
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
233 'tv_embedded': {
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
236 'client': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
239 },
240 },
add96eb9 241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
e7870111 242 },
cf212d0a
H
243 # This client has pre-merged video+audio 720p/1080p streams
244 'mediaconnect': {
245 'INNERTUBE_CONTEXT': {
246 'client': {
247 'clientName': 'MEDIA_CONNECT_FRONTEND',
248 'clientVersion': '0.1',
249 },
250 },
add96eb9 251 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
cf212d0a 252 },
000c15a4 253}
254
255
e7870111
D
256def _split_innertube_client(client_name):
257 variant, *base = client_name.rsplit('.', 1)
258 if base:
259 return variant, base[0], variant
260 base, *variant = client_name.split('_', 1)
261 return client_name, base, variant[0] if variant else None
262
263
c795c39f
L
264def short_client_name(client_name):
265 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
266 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
267
268
000c15a4 269def build_innertube_clients():
2e4cacd0 270 THIRD_PARTY = {
e7870111 271 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 272 }
1e75d97d 273 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
2e4cacd0 274 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 275
276 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 277 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 278 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 279 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 280 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 281
e7870111 282 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 283 ytcfg['priority'] = 10 * priority(base_client)
284
e48b3875 285 if not variant:
e7870111
D
286 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
287 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
288 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
289 embedscreen['priority'] -= 3
290 elif variant == 'embedded':
e48b3875 291 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 292 ytcfg['priority'] -= 2
e48b3875 293 else:
000c15a4 294 ytcfg['priority'] -= 3
295
296
297build_innertube_clients()
298
299
c26f9b99 300class BadgeType(enum.Enum):
301 AVAILABILITY_UNLISTED = enum.auto()
302 AVAILABILITY_PRIVATE = enum.auto()
303 AVAILABILITY_PUBLIC = enum.auto()
304 AVAILABILITY_PREMIUM = enum.auto()
305 AVAILABILITY_SUBSCRIPTION = enum.auto()
306 LIVE_NOW = enum.auto()
14a14335 307 VERIFIED = enum.auto()
c26f9b99 308
309
de7f3446 310class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 311 """Provide base functions for Youtube extractors"""
e00eb564 312
3462ffa8 313 _RESERVED_NAMES = (
08e29b9f 314 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 315 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 316 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 317 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 318
3619f78d 319 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
320
52efa4b3 321 # _NETRC_MACHINE = 'youtube'
3619f78d 322
b2e8bc1b
JMF
323 # If True it will raise an error if no login info is provided
324 _LOGIN_REQUIRED = False
325
d9190e44
RH
326 _INVIDIOUS_SITES = (
327 # invidious-redirect websites
328 r'(?:www\.)?redirect\.invidious\.io',
329 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 330 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
331 r'(?:www\.)?invidious\.pussthecat\.org',
332 r'(?:www\.)?invidious\.zee\.li',
333 r'(?:www\.)?invidious\.ethibox\.fr',
05799a48
RH
334 r'(?:www\.)?iv\.ggtyler\.dev',
335 r'(?:www\.)?inv\.vern\.i2p',
336 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
337 r'(?:www\.)?inv\.riverside\.rocks',
338 r'(?:www\.)?invidious\.silur\.me',
339 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
340 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
341 r'(?:www\.)?invidious\.slipfox\.xyz',
342 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
343 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
344 r'(?:www\.)?invidious\.tiekoetter\.com',
345 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
346 r'(?:www\.)?invidious\.nerdvpn\.de',
347 r'(?:www\.)?invidious\.weblibre\.org',
348 r'(?:www\.)?inv\.odyssey346\.dev',
349 r'(?:www\.)?invidious\.dhusch\.de',
350 r'(?:www\.)?iv\.melmac\.space',
351 r'(?:www\.)?watch\.thekitty\.zone',
352 r'(?:www\.)?invidious\.privacydev\.net',
353 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
354 r'(?:www\.)?invidious\.drivet\.xyz',
355 r'(?:www\.)?vid\.priv\.au',
356 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
357 r'(?:www\.)?inv\.vern\.cc',
358 r'(?:www\.)?invidious\.esmailelbob\.xyz',
359 r'(?:www\.)?invidious\.sethforprivacy\.com',
360 r'(?:www\.)?yt\.oelrichsgarcia\.de',
361 r'(?:www\.)?yt\.artemislena\.eu',
362 r'(?:www\.)?invidious\.flokinet\.to',
363 r'(?:www\.)?invidious\.baczek\.me',
364 r'(?:www\.)?y\.com\.sb',
365 r'(?:www\.)?invidious\.epicsite\.xyz',
366 r'(?:www\.)?invidious\.lidarshield\.cloud',
367 r'(?:www\.)?yt\.funami\.tech',
d9190e44 368 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
369 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
370 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
371 # youtube-dl invidious instances list
372 r'(?:(?:www|no)\.)?invidiou\.sh',
373 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
374 r'(?:www\.)?invidious\.kabi\.tk',
375 r'(?:www\.)?invidious\.mastodon\.host',
376 r'(?:www\.)?invidious\.zapashcanon\.fr',
377 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
378 r'(?:www\.)?invidious\.tinfoil-hat\.net',
379 r'(?:www\.)?invidious\.himiko\.cloud',
380 r'(?:www\.)?invidious\.reallyancient\.tech',
381 r'(?:www\.)?invidious\.tube',
382 r'(?:www\.)?invidiou\.site',
383 r'(?:www\.)?invidious\.site',
384 r'(?:www\.)?invidious\.xyz',
385 r'(?:www\.)?invidious\.nixnet\.xyz',
386 r'(?:www\.)?invidious\.048596\.xyz',
387 r'(?:www\.)?invidious\.drycat\.fr',
388 r'(?:www\.)?inv\.skyn3t\.in',
389 r'(?:www\.)?tube\.poal\.co',
390 r'(?:www\.)?tube\.connect\.cafe',
391 r'(?:www\.)?vid\.wxzm\.sx',
392 r'(?:www\.)?vid\.mint\.lgbt',
393 r'(?:www\.)?vid\.puffyan\.us',
394 r'(?:www\.)?yewtu\.be',
395 r'(?:www\.)?yt\.elukerio\.org',
396 r'(?:www\.)?yt\.lelux\.fi',
397 r'(?:www\.)?invidious\.ggc-project\.de',
398 r'(?:www\.)?yt\.maisputain\.ovh',
399 r'(?:www\.)?ytprivate\.com',
400 r'(?:www\.)?invidious\.13ad\.de',
401 r'(?:www\.)?invidious\.toot\.koeln',
402 r'(?:www\.)?invidious\.fdn\.fr',
403 r'(?:www\.)?watch\.nettohikari\.com',
404 r'(?:www\.)?invidious\.namazso\.eu',
405 r'(?:www\.)?invidious\.silkky\.cloud',
406 r'(?:www\.)?invidious\.exonip\.de',
407 r'(?:www\.)?invidious\.riverside\.rocks',
408 r'(?:www\.)?invidious\.blamefran\.net',
409 r'(?:www\.)?invidious\.moomoo\.de',
410 r'(?:www\.)?ytb\.trom\.tf',
411 r'(?:www\.)?yt\.cyberhost\.uk',
412 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
413 r'(?:www\.)?qklhadlycap4cnod\.onion',
414 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
415 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
416 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
417 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
418 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
419 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
420 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
421 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
422 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
423 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
424 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
425 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 426 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 427 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 428 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
429 r'(?:www\.)?watch\.whatever\.social',
430 r'(?:www\.)?piped\.garudalinux\.org',
431 r'(?:www\.)?piped\.rivo\.lol',
432 r'(?:www\.)?piped-libre\.kavin\.rocks',
433 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 434 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
435 r'(?:www\.)?il\.ax',
436 r'(?:www\.)?piped\.esmailelbob\.xyz',
437 r'(?:www\.)?piped\.projectsegfau\.lt',
438 r'(?:www\.)?piped\.privacydev\.net',
439 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
440 r'(?:www\.)?piped\.smnz\.de',
441 r'(?:www\.)?piped\.adminforge\.de',
442 r'(?:www\.)?watch\.whatevertinfoil\.de',
443 r'(?:www\.)?piped\.qdi\.fi',
6a9c7a2b 444 r'(?:(?:www|cf)\.)?piped\.video',
bc87dac7 445 r'(?:www\.)?piped\.aeong\.one',
05799a48
RH
446 r'(?:www\.)?piped\.moomoo\.me',
447 r'(?:www\.)?piped\.chauvet\.pro',
448 r'(?:www\.)?watch\.leptons\.xyz',
449 r'(?:www\.)?pd\.vern\.cc',
450 r'(?:www\.)?piped\.hostux\.net',
451 r'(?:www\.)?piped\.lunar\.icu',
78a78fa7
BG
452 # Hyperpipe instances from https://hyperpipe.codeberg.page/
453 r'(?:www\.)?hyperpipe\.surge\.sh',
454 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
455 r'(?:www\.)?listen\.whatever\.social',
456 r'(?:www\.)?music\.adminforge\.de',
d9190e44
RH
457 )
458
c26f9b99 459 # extracted from account/account_menu ep
460 # XXX: These are the supported YouTube UI and API languages,
461 # which is slightly different from languages supported for translation in YouTube studio
462 _SUPPORTED_LANG_CODES = [
463 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
464 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
465 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
466 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
467 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
add96eb9 468 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
c26f9b99 469 ]
470
a057779d 471 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
472
7666b936 473 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
474 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
475
476 def ucid_or_none(self, ucid):
477 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
478
479 def handle_or_none(self, handle):
480 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
481
482 def handle_from_url(self, url):
483 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
484 url, 'channel handle', default=None)
485
486 def ucid_from_url(self, url):
487 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
488 url, 'channel id', default=None)
489
c26f9b99 490 @functools.cached_property
491 def _preferred_lang(self):
492 """
493 Returns a language code supported by YouTube for the user preferred language.
494 Returns None if no preferred language set.
495 """
496 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
497 if not preferred_lang:
498 return
499 if preferred_lang not in self._SUPPORTED_LANG_CODES:
500 raise ExtractorError(
501 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
502 expected=True)
503 elif preferred_lang != 'en':
504 self.report_warning(
505 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
506 return preferred_lang
507
cce889b9 508 def _initialize_consent(self):
509 cookies = self._get_cookies('https://www.youtube.com/')
510 if cookies.get('__Secure-3PSID'):
511 return
378ae9f9 512 socs = cookies.get('SOCS')
513 if socs and not socs.value.startswith('CAA'): # not consented
514 return
515 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
8d81f3e3 516
f3aa3c3f 517 def _initialize_pref(self):
518 cookies = self._get_cookies('https://www.youtube.com/')
519 pref_cookie = cookies.get('PREF')
520 pref = {}
521 if pref_cookie:
522 try:
14f25df2 523 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 524 except ValueError:
525 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 526 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 527 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 528
b2e8bc1b 529 def _real_initialize(self):
f3aa3c3f 530 self._initialize_pref()
cce889b9 531 self._initialize_consent()
a25bca9f 532 self._check_login_required()
533
534 def _check_login_required(self):
24146491 535 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 536 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 537
b7c47b74 538 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
539 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 540
000c15a4 541 def _get_default_ytcfg(self, client='web'):
542 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 543
000c15a4 544 def _get_innertube_host(self, client='web'):
545 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 546
000c15a4 547 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 548 # try_get but with fallback to default ytcfg client values when present
549 _func = lambda y: try_get(y, getter, expected_type)
550 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
551
000c15a4 552 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 553 return self._ytcfg_get_safe(
554 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 555 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 556
000c15a4 557 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 558 return self._ytcfg_get_safe(
559 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 560 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 561
2ae778b8 562 def _select_api_hostname(self, req_api_hostname, default_client=None):
563 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
564 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
565
000c15a4 566 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 567 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 568
000c15a4 569 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 570 context = get_first(
571 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 572 # Enforce language and tz for extraction
573 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 574 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 575 return context
576
cf87314d 577 _SAPISID = None
578
109dd3b2 579 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 580 time_now = round(time.time())
cf87314d 581 if self._SAPISID is None:
582 yt_cookies = self._get_cookies('https://www.youtube.com')
583 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
584 # See: https://github.com/yt-dlp/yt-dlp/issues/393
585 sapisid_cookie = dict_get(
586 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
587 if sapisid_cookie and sapisid_cookie.value:
588 self._SAPISID = sapisid_cookie.value
589 self.write_debug('Extracted SAPISID cookie')
590 # SAPISID cookie is required if not already present
591 if not yt_cookies.get('SAPISID'):
592 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
593 self._set_cookie(
594 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
595 else:
596 self._SAPISID = False
597 if not self._SAPISID:
598 return None
1974e99f 599 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
600 sapisidhash = hashlib.sha1(
86e5f3ed 601 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 602 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
603
604 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 605 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 606 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 607
109dd3b2 608 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 609 data.update(query)
11f9be09 610 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 611 real_headers.update({'content-type': 'application/json'})
612 if headers:
613 real_headers.update(headers)
2ae778b8 614 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
615 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 616 return self._download_json(
2ae778b8 617 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 618 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 619 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 620 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 621
65141660 622 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
623 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 624
99e9e001 625 @staticmethod
626 def _extract_session_index(*data):
627 """
628 Index of current account in account list.
629 See: https://github.com/yt-dlp/yt-dlp/pull/519
630 """
631 for ytcfg in data:
632 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
633 if session_index is not None:
634 return session_index
635
636 # Deprecated?
637 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 638 if ytcfg:
14f25df2 639 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
640 if token:
641 return token
99e9e001 642 if webpage:
643 return self._search_regex(
644 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
645 'identity token', default=None, fatal=False)
a1c5d2ca
M
646
647 @staticmethod
fe93e2c4 648 def _extract_account_syncid(*args):
8ea3f7b9 649 """
650 Extract syncId required to download private playlists of secondary channels
fe93e2c4 651 @params response and/or ytcfg
8ea3f7b9 652 """
fe93e2c4 653 for data in args:
654 # ytcfg includes channel_syncid if on secondary channel
14f25df2 655 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 656 if delegated_sid:
657 return delegated_sid
658 sync_ids = (try_get(
659 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 660 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 661 if len(sync_ids) >= 2 and sync_ids[1]:
662 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
663 # and just "user_syncid||" for primary channel. We only want the channel_syncid
664 return sync_ids[0]
a1c5d2ca 665
ac56cf38 666 @staticmethod
667 def _extract_visitor_data(*args):
668 """
669 Extracts visitorData from an API response or ytcfg
670 Appears to be used to track session state
671 """
9222c381 672 return get_first(
6c73052c 673 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 674 expected_type=str)
ac56cf38 675
2762dbb1 676 @functools.cached_property
99e9e001 677 def is_authenticated(self):
678 return bool(self._generate_sapisidhash_header())
679
11f9be09 680 def extract_ytcfg(self, video_id, webpage):
8c54a305 681 if not webpage:
682 return {}
29f7c58a 683 return self._parse_json(
684 self._search_regex(
685 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 686 default='{}'), video_id, fatal=False) or {}
687
11f9be09 688 def generate_api_headers(
99e9e001 689 self, *, ytcfg=None, account_syncid=None, session_index=None,
690 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
691
2ae778b8 692 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 693 headers = {
14f25df2 694 'X-YouTube-Client-Name': str(
11f9be09 695 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
696 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 697 'Origin': origin,
698 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
699 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 700 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
add96eb9 701 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
99e9e001 702 }
703 if session_index is None:
314ee305 704 session_index = self._extract_session_index(ytcfg)
705 if account_syncid or session_index is not None:
706 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 707
109dd3b2 708 auth = self._generate_sapisidhash_header(origin)
f4f751af 709 if auth is not None:
710 headers['Authorization'] = auth
109dd3b2 711 headers['X-Origin'] = origin
7a32c70d 712 return filter_dict(headers)
29f7c58a 713
a25bca9f 714 def _download_ytcfg(self, client, video_id):
715 url = {
716 'web': 'https://www.youtube.com',
717 'web_music': 'https://music.youtube.com',
add96eb9 718 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
a25bca9f 719 }.get(client)
720 if not url:
721 return {}
722 webpage = self._download_webpage(
723 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
724 return self.extract_ytcfg(video_id, webpage) or {}
725
2d6659b9 726 @staticmethod
727 def _build_api_continuation_query(continuation, ctp=None):
728 query = {
add96eb9 729 'continuation': continuation,
2d6659b9 730 }
731 # TODO: Inconsistency with clickTrackingParams.
732 # Currently we have a fixed ctp contained within context (from ytcfg)
733 # and a ctp in root query for continuation.
734 if ctp:
735 query['clickTracking'] = {'clickTrackingParams': ctp}
736 return query
737
2d6659b9 738 @classmethod
739 def _extract_next_continuation_data(cls, renderer):
740 next_continuation = try_get(
741 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
742 lambda x: x['continuation']['reloadContinuationData']), dict)
743 if not next_continuation:
744 return
745 continuation = next_continuation.get('continuation')
746 if not continuation:
747 return
748 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 749 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 750
751 @classmethod
752 def _extract_continuation_ep_data(cls, continuation_ep: dict):
753 if isinstance(continuation_ep, dict):
754 continuation = try_get(
14f25df2 755 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 756 if not continuation:
757 return
758 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 759 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 760
761 @classmethod
762 def _extract_continuation(cls, renderer):
763 next_continuation = cls._extract_next_continuation_data(renderer)
764 if next_continuation:
765 return next_continuation
fe93e2c4 766
7a32c70d 767 return traverse_obj(renderer, (
768 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
add96eb9 769 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
7a32c70d 770 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 771
fe93e2c4 772 @classmethod
773 def _extract_alerts(cls, data):
109dd3b2 774 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
775 if not isinstance(alert_dict, dict):
776 continue
777 for alert in alert_dict.values():
778 alert_type = alert.get('type')
779 if not alert_type:
780 continue
052e1350 781 message = cls._get_text(alert, 'text')
109dd3b2 782 if message:
783 yield alert_type, message
784
c0ac49bc 785 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 786 errors, warnings = [], []
109dd3b2 787 for alert_type, alert_message in alerts:
641ad5d8 788 if alert_type.lower() == 'error' and fatal:
109dd3b2 789 errors.append([alert_type, alert_message])
a057779d 790 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 791 warnings.append([alert_type, alert_message])
792
793 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 794 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 795 if errors:
add96eb9 796 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
109dd3b2 797
798 def _extract_and_report_alerts(self, data, *args, **kwargs):
799 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
800
14a14335 801 def _extract_badges(self, badge_list: list):
802 """
803 Extract known BadgeType's from a list of badge renderers.
804 @returns [{'type': BadgeType}]
805 """
806 icon_type_map = {
c26f9b99 807 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
808 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
14a14335 809 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
810 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
811 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
8213ce28 812 'CHECK': BadgeType.VERIFIED,
c26f9b99 813 }
814
815 badge_style_map = {
816 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
817 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
14a14335 818 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
8213ce28 819 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
ad54c913 820 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
c26f9b99 821 }
822
823 label_map = {
824 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
825 'private': BadgeType.AVAILABILITY_PRIVATE,
826 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
827 'live': BadgeType.LIVE_NOW,
14a14335 828 'premium': BadgeType.AVAILABILITY_PREMIUM,
8213ce28 829 'verified': BadgeType.VERIFIED,
ad54c913 830 'official artist channel': BadgeType.VERIFIED,
c26f9b99 831 }
832
833 badges = []
14a14335 834 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
c26f9b99 835 badge_type = (
14a14335 836 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
c26f9b99 837 or badge_style_map.get(traverse_obj(badge, 'style'))
838 )
839 if badge_type:
840 badges.append({'type': badge_type})
841 continue
842
843 # fallback, won't work in some languages
14a14335 844 label = traverse_obj(
845 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
c26f9b99 846 for match, label_badge_type in label_map.items():
847 if match in label.lower():
14a14335 848 badges.append({'type': label_badge_type})
849 break
c26f9b99 850
47193e02 851 return badges
852
c26f9b99 853 @staticmethod
854 def _has_badge(badges, badge_type):
855 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
856
47193e02 857 @staticmethod
052e1350 858 def _get_text(data, *path_list, max_runs=None):
859 for path in path_list or [None]:
860 if path is None:
861 obj = [data]
862 else:
863 obj = traverse_obj(data, path, default=[])
864 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
865 obj = [obj]
866 for item in obj:
14f25df2 867 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 868 if text:
869 return text
870 runs = try_get(item, lambda x: x['runs'], list) or []
871 if not runs and isinstance(item, list):
872 runs = item
873
874 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 875 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 876 if text:
877 return text
47193e02 878
f0d785d3 879 def _get_count(self, data, *path_list):
880 count_text = self._get_text(data, *path_list) or ''
881 count = parse_count(count_text)
882 if count is None:
883 count = str_to_int(
884 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
885 return count
886
a709d873 887 @staticmethod
a0d9967f 888 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
a709d873 889 """
890 Extract thumbnails from thumbnails dict
891 @param path_list: path list to level that contains 'thumbnails' key
892 """
893 thumbnails = []
894 for path in path_list or [()]:
a0d9967f 895 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
a709d873 896 thumbnail_url = url_or_none(thumbnail.get('url'))
897 if not thumbnail_url:
898 continue
899 # Sometimes youtube gives a wrong thumbnail URL. See:
900 # https://github.com/yt-dlp/yt-dlp/issues/233
901 # https://github.com/ytdl-org/youtube-dl/issues/28023
902 if 'maxresdefault' in thumbnail_url:
903 thumbnail_url = thumbnail_url.split('?')[0]
904 thumbnails.append({
905 'url': thumbnail_url,
906 'height': int_or_none(thumbnail.get('height')),
907 'width': int_or_none(thumbnail.get('width')),
908 })
909 return thumbnails
910
f3aa3c3f 911 @staticmethod
912 def extract_relative_time(relative_time_text):
913 """
914 Extracts a relative time from string and converts to dt object
2fb35f60 915 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
f3aa3c3f 916 """
2fb35f60 917
5ca095cb 918 # XXX: this could be moved to a general function in utils/_utils.py
2fb35f60 919 # The relative time text strings are roughly the same as what
920 # Javascript's Intl.RelativeTimeFormat function generates.
921 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
922 mobj = re.search(
923 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
924 relative_time_text)
f3aa3c3f 925 if mobj:
f0d785d3 926 start = mobj.group('start')
927 if start:
928 return datetime_from_str(start)
f3aa3c3f 929 try:
add96eb9 930 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
f3aa3c3f 931 except ValueError:
932 return None
933
c26f9b99 934 def _parse_time_text(self, text):
935 if not text:
936 return
c305a25c 937 dt_ = self.extract_relative_time(text)
f3aa3c3f 938 timestamp = None
c305a25c 939 if isinstance(dt_, dt.datetime):
940 timestamp = calendar.timegm(dt_.timetuple())
f0d785d3 941
942 if timestamp is None:
943 timestamp = (
944 unified_timestamp(text) or unified_timestamp(
945 self._search_regex(
17322130 946 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 947 text.lower(), 'time text', default=None)))
f0d785d3 948
c26f9b99 949 if text and timestamp is None and self._preferred_lang in (None, 'en'):
950 self.report_warning(
951 f'Cannot parse localized time text "{text}"', only_once=True)
952 return timestamp
f3aa3c3f 953
109dd3b2 954 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
955 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 956 default_client='web'):
eb5bdbfa 957 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
958 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
959 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
960 icd_rm = next(icd_retries)
961 main_retries = iter(self.RetryManager())
962 main_rm = next(main_retries)
feebf6d0
SS
963 # Manual retry loop for multiple RetryManagers
964 # The proper RetryManager MUST be advanced after an error
b634ba74 965 # and its result MUST be checked if the manager is non fatal
feebf6d0 966 while True:
109dd3b2 967 try:
968 response = self._call_api(
969 ep=ep, fatal=True, headers=headers,
be5c1ae8 970 video_id=item_id, query=query, note=note,
109dd3b2 971 context=self._extract_context(ytcfg, default_client),
972 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 973 api_hostname=api_hostname, default_client=default_client)
109dd3b2 974 except ExtractorError as e:
be5c1ae8 975 if not isinstance(e.cause, network_exceptions):
976 return self._error_or_warning(e, fatal=fatal)
3d2623a8 977 elif not isinstance(e.cause, HTTPError):
eb5bdbfa 978 main_rm.error = e
979 next(main_retries)
be5c1ae8 980 continue
109dd3b2 981
3d2623a8 982 first_bytes = e.cause.response.read(512)
be5c1ae8 983 if not is_html(first_bytes):
984 yt_error = try_get(
985 self._parse_json(
3d2623a8 986 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
be5c1ae8 987 lambda x: x['error']['message'], str)
988 if yt_error:
989 self._report_alerts([('ERROR', yt_error)], fatal=False)
990 # Downloading page may result in intermittent 5xx HTTP error
eb5bdbfa 991 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
be5c1ae8 992 # We also want to catch all other network exceptions since errors in later pages can be troublesome
993 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
3d2623a8 994 if e.cause.status not in (403, 429):
eb5bdbfa 995 main_rm.error = e
996 next(main_retries)
be5c1ae8 997 continue
998 return self._error_or_warning(e, fatal=fatal)
999
1000 try:
1001 self._extract_and_report_alerts(response, only_once=True)
1002 except ExtractorError as e:
eb5bdbfa 1003 # YouTube's servers may return errors we want to retry on in a 200 OK response
be5c1ae8 1004 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1005 if 'unknown error' in e.msg.lower():
eb5bdbfa 1006 main_rm.error = e
1007 next(main_retries)
be5c1ae8 1008 continue
1009 return self._error_or_warning(e, fatal=fatal)
1010 # Youtube sometimes sends incomplete data
1011 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1012 if not traverse_obj(response, *variadic(check_get_keys)):
eb5bdbfa 1013 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1014 should_retry = next(icd_retries, None)
1015 if not should_retry:
1016 return None
be5c1ae8 1017 continue
1018
1019 return response
109dd3b2 1020
9297939e 1021 @staticmethod
1022 def is_music_url(url):
5b28cef7 1023 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
9297939e 1024
30a074c2 1025 def _extract_video(self, renderer):
1026 video_id = renderer.get('videoId')
4dc23a80
M
1027
1028 reel_header_renderer = traverse_obj(renderer, (
1029 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1030 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1031
1032 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 1033 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
1034
1035 duration = int_or_none(renderer.get('lengthSeconds'))
1036 if duration is None:
1037 duration = parse_duration(self._get_text(
1038 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 1039 if duration is None:
4dc23a80 1040 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 1041 duration = parse_duration(self._search_regex(
1042 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1043 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1044 video_id, default=None, group='duration'))
1045
f3aa3c3f 1046 channel_id = traverse_obj(
a44ca5a4 1047 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1048 expected_type=str, get_all=False)
4dc23a80
M
1049 if not channel_id:
1050 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1051
7666b936 1052 channel_id = self.ucid_or_none(channel_id)
1053
f3aa3c3f 1054 overlay_style = traverse_obj(
a44ca5a4 1055 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1056 get_all=False, expected_type=str)
14a14335 1057 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
8213ce28 1058 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
fd2ad7cb 1059 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 1060 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1061 expected_type=str)) or ''
fd2ad7cb 1062 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 1063 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 1064 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 1065
4dc23a80
M
1066 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1067 or self._get_text(reel_header_renderer, 'timestampText') or '')
1068 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1069
867c66ff
M
1070 live_status = (
1071 'is_upcoming' if scheduled_timestamp is not None
1072 else 'was_live' if 'streamed' in time_text.lower()
1073 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1074 else None)
1075
4dc23a80
M
1076 # videoInfo is a string like '50K views • 10 years ago'.
1077 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1078 view_count = (0 if 'no views' in view_count_text.lower()
1079 else self._get_count({'simpleText': view_count_text}))
1080 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1081
93e12ed7 1082 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1083 or self._get_text(reel_header_renderer, 'channelTitleText'))
1084
1085 channel_handle = traverse_obj(renderer, (
1086 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1087 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1088 expected_type=self.handle_from_url, get_all=False)
30a074c2 1089 return {
39ed931e 1090 '_type': 'url',
30a074c2 1091 'ie_key': YoutubeIE.ie_key(),
1092 'id': video_id,
fd2ad7cb 1093 'url': url,
30a074c2 1094 'title': title,
1095 'description': description,
1096 'duration': duration,
f3aa3c3f 1097 'channel_id': channel_id,
93e12ed7 1098 'channel': channel,
4dc23a80 1099 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
93e12ed7 1100 'uploader': channel,
1101 'uploader_id': channel_handle,
1102 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4dc23a80 1103 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 1104 'timestamp': (self._parse_time_text(time_text)
1105 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1106 else None),
f3aa3c3f 1107 'release_timestamp': scheduled_timestamp,
c26f9b99 1108 'availability':
1109 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1110 else self._availability(
1111 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1112 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1113 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 1114 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 1115 view_count_field: view_count,
14a14335 1116 'live_status': live_status,
add96eb9 1117 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
30a074c2 1118 }
1119
0c148415 1120
360e1ca5 1121class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1122 IE_DESC = 'YouTube'
add96eb9 1123 _VALID_URL = r'''(?x)^
c5e8d7af 1124 (
edb53e2d 1125 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1126 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1127 (?:www\.)?deturl\.com/www\.youtube\.com|
1128 (?:www\.)?pwnyoutube\.com|
1129 (?:www\.)?hooktube\.com|
1130 (?:www\.)?yourepeat\.com|
1131 tube\.majestyc\.net|
add96eb9 1132 {invidious}|
bc2ca1bb 1133 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1134 (?:.*?\#/)? # handle anchor (#/) redirect urls
1135 (?: # the various things that can precede the ID:
dad2210c 1136 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1137 |(?: # or the v= param in all its forms
f7000f3a 1138 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1139 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1140 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1141 v=
1142 )
f4b05232 1143 ))
cbaed4bb
S
1144 |(?:
1145 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1146 vid\.plus| # or vid.plus/xxxx
1147 zwearz\.com/watch| # or zwearz.com/watch/xxxx
add96eb9 1148 {invidious}
cbaed4bb 1149 )/
edb53e2d 1150 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1151 )
c5e8d7af 1152 )? # all until now is optional -> you can pass the naked ID
add96eb9 1153 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
c5e8d7af 1154 (?(1).+)? # if we found the ID, everything can follow
add96eb9 1155 (?:\#|$)'''.format(
1156 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1157 )
7c6eb424 1158 _EMBED_REGEX = [
1159 r'''(?x)
1160 (?:
0ca0f881 1161 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1162 data-video-url=|
1163 <embed[^>]+?src=|
1164 embedSWF\(?:\s*|
1165 <object[^>]+data=|
1166 new\s+SWFObject\(
1167 )
1168 (["\'])
1169 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1170 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1171 \1''',
1172 # https://wordpress.org/plugins/lazy-load-for-videos/
1173 r'''(?xs)
1174 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1175 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1176 ]
6368e2e6 1177 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1178
e40c758c 1179 _PLAYER_INFO_RE = (
cc2db878 1180 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1181 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1182 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1183 )
85ec2a33 1184 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
c2d3cb4c 1185 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1186 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1187 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1188 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1189 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1190 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1191 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1192 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1193 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1194 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1195 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1196 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1197 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1198 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1199 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1200 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1201 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1202 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1203
1204
1205 # 3D videos
c2d3cb4c 1206 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1207 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1208 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1209 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1210 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1211 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1212 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1213
96fb5605 1214 # Apple HTTP Live Streaming
11f12195 1215 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1216 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1217 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1218 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1219 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1220 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1221 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1222 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1223
1224 # DASH mp4 video
d23028a8
S
1225 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1226 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1227 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1228 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1229 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1230 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1231 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1232 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1233 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1234 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1235 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1236 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1237
f6f1fc92 1238 # Dash mp4 audio
d23028a8
S
1239 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1240 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1241 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1242 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1243 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1244 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1245 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1246
1247 # Dash webm
d23028a8
S
1248 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1249 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1250 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1251 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1252 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1253 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1254 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1255 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1256 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1257 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1258 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1260 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1261 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1262 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1263 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1264 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1265 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1266 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1267 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1268 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1269 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1270
1271 # Dash webm audio
d23028a8
S
1272 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1273 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1274
0857baad 1275 # Dash webm audio with opus inside
d23028a8
S
1276 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1277 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1278 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1279
ce6b9a2d
PH
1280 # RTMP (unnamed)
1281 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1282
1283 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1284 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1285 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1286 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1287 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1288 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1289 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1290 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1291 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1292 }
29f7c58a 1293 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1294
fd5c4aab
S
1295 _GEO_BYPASS = False
1296
78caa52a 1297 IE_NAME = 'youtube'
2eb88d95
PH
1298 _TESTS = [
1299 {
2d3d2997 1300 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1301 'info_dict': {
1302 'id': 'BaW_jenozKc',
1303 'ext': 'mp4',
3867038a 1304 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
ff9f925b 1305 'channel': 'Philipp Hagemeister',
dd4c4492
S
1306 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1307 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1308 'upload_date': '20121002',
ff9f925b 1309 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1310 'categories': ['Science & Technology'],
3867038a 1311 'tags': ['youtube-dl'],
556dbe7f 1312 'duration': 10,
dbdaaa23 1313 'view_count': int,
3e7c1224 1314 'like_count': int,
ff9f925b 1315 'availability': 'public',
1316 'playable_in_embed': True,
1317 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1318 'live_status': 'not_live',
1319 'age_limit': 0,
7c80519c 1320 'start_time': 1,
297a564b 1321 'end_time': 9,
12a1b225 1322 'comment_count': int,
7666b936 1323 'channel_follower_count': int,
1324 'uploader': 'Philipp Hagemeister',
1325 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1326 'uploader_id': '@PhilippHagemeister',
5caf30db 1327 'heatmap': 'count:100',
96a134de 1328 'timestamp': 1349198244,
add96eb9 1329 },
0e853ca4 1330 },
fccd3771 1331 {
4bc3a23e
PH
1332 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1333 'note': 'Embed-only video (#1746)',
1334 'info_dict': {
1335 'id': 'yZIXLfi8CZQ',
1336 'ext': 'mp4',
1337 'upload_date': '20120608',
1338 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1339 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
94bfcd23 1340 'age_limit': 18,
545cc85d 1341 },
1342 'skip': 'Private video',
fccd3771 1343 },
11b56058 1344 {
8bdd16b4 1345 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1346 'note': 'Use the first video ID in the URL',
1347 'info_dict': {
1348 'id': 'BaW_jenozKc',
1349 'ext': 'mp4',
3867038a 1350 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976ae3ea 1351 'channel': 'Philipp Hagemeister',
1352 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1353 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1354 'upload_date': '20121002',
976ae3ea 1355 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1356 'categories': ['Science & Technology'],
3867038a 1357 'tags': ['youtube-dl'],
556dbe7f 1358 'duration': 10,
dbdaaa23 1359 'view_count': int,
11b56058 1360 'like_count': int,
976ae3ea 1361 'availability': 'public',
1362 'playable_in_embed': True,
1363 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1364 'live_status': 'not_live',
1365 'age_limit': 0,
12a1b225 1366 'comment_count': int,
7666b936 1367 'channel_follower_count': int,
1368 'uploader': 'Philipp Hagemeister',
1369 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1370 'uploader_id': '@PhilippHagemeister',
14a14335 1371 'heatmap': 'count:100',
96a134de 1372 'timestamp': 1349198244,
34a7de29
S
1373 },
1374 'params': {
1375 'skip_download': True,
1376 },
11b56058 1377 },
dd27fd17 1378 {
2d3d2997 1379 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1380 'note': '256k DASH audio (format 141) via DASH manifest',
1381 'info_dict': {
1382 'id': 'a9LDPn-MO4I',
1383 'ext': 'm4a',
1384 'upload_date': '20121002',
4bc3a23e 1385 'description': '',
add96eb9 1386 'title': 'UHDTV TEST 8K VIDEO.mp4',
4919603f 1387 },
4bc3a23e
PH
1388 'params': {
1389 'youtube_include_dash_manifest': True,
1390 'format': '141',
4919603f 1391 },
de3c7fe0 1392 'skip': 'format 141 not served anymore',
dd27fd17 1393 },
8bdd16b4 1394 # DASH manifest with encrypted signature
1395 {
1396 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1397 'info_dict': {
1398 'id': 'IB3lcPjvWLA',
1399 'ext': 'm4a',
1400 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1401 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1402 'duration': 244,
8bdd16b4 1403 'upload_date': '20131011',
cc2db878 1404 'abr': 129.495,
976ae3ea 1405 'like_count': int,
1406 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1407 'playable_in_embed': True,
1408 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1409 'view_count': int,
1410 'track': 'The Spark',
1411 'live_status': 'not_live',
1412 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1413 'channel': 'Afrojack',
976ae3ea 1414 'tags': 'count:19',
1415 'availability': 'public',
1416 'categories': ['Music'],
1417 'age_limit': 0,
1418 'alt_title': 'The Spark',
7666b936 1419 'channel_follower_count': int,
1420 'uploader': 'Afrojack',
1421 'uploader_url': 'https://www.youtube.com/@Afrojack',
1422 'uploader_id': '@Afrojack',
8bdd16b4 1423 },
1424 'params': {
1425 'youtube_include_dash_manifest': True,
1426 'format': '141/bestaudio[ext=m4a]',
1427 },
1428 },
65c2fde2 1429 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1430 {
65c2fde2 1431 'note': 'Embed allowed age-gate video',
2d3d2997 1432 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1433 'info_dict': {
1434 'id': 'HtVdAasjOgU',
1435 'ext': 'mp4',
1436 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1437 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1438 'duration': 142,
c522adb1 1439 'upload_date': '20140605',
34952f09 1440 'age_limit': 18,
976ae3ea 1441 'categories': ['Gaming'],
1442 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1443 'availability': 'needs_auth',
1444 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1445 'like_count': int,
1446 'channel': 'The Witcher',
1447 'live_status': 'not_live',
1448 'tags': 'count:17',
1449 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1450 'playable_in_embed': True,
1451 'view_count': int,
7666b936 1452 'channel_follower_count': int,
1453 'uploader': 'The Witcher',
1454 'uploader_url': 'https://www.youtube.com/@thewitcher',
1455 'uploader_id': '@thewitcher',
14a14335 1456 'comment_count': int,
8213ce28 1457 'channel_is_verified': True,
14a14335 1458 'heatmap': 'count:100',
96a134de 1459 'timestamp': 1401991663,
c522adb1
JMF
1460 },
1461 },
65c2fde2 1462 {
1463 'note': 'Age-gate video with embed allowed in public site',
1464 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1465 'info_dict': {
1466 'id': 'HsUATh_Nc2U',
1467 'ext': 'mp4',
1468 'title': 'Godzilla 2 (Official Video)',
1469 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1470 'upload_date': '20200408',
65c2fde2 1471 'age_limit': 18,
976ae3ea 1472 'availability': 'needs_auth',
1473 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
976ae3ea 1474 'channel': 'FlyingKitty',
1475 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1476 'view_count': int,
1477 'categories': ['Entertainment'],
1478 'live_status': 'not_live',
1479 'tags': ['Flyingkitty', 'godzilla 2'],
1480 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1481 'like_count': int,
1482 'duration': 177,
1483 'playable_in_embed': True,
7666b936 1484 'channel_follower_count': int,
1485 'uploader': 'FlyingKitty',
1486 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1487 'uploader_id': '@FlyingKitty900',
5caf30db 1488 'comment_count': int,
8213ce28 1489 'channel_is_verified': True,
65c2fde2 1490 },
1491 },
1492 {
1493 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1494 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1495 'info_dict': {
1496 'id': 'Tq92D6wQ1mg',
1497 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1498 'ext': 'mp4',
17322130 1499 'upload_date': '20191228',
65c2fde2 1500 'description': 'md5:17eccca93a786d51bc67646756894066',
1501 'age_limit': 18,
976ae3ea 1502 'like_count': int,
1503 'availability': 'needs_auth',
976ae3ea 1504 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1505 'view_count': int,
1506 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1507 'channel': 'Projekt Melody',
1508 'live_status': 'not_live',
1509 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1510 'playable_in_embed': True,
1511 'categories': ['Entertainment'],
1512 'duration': 106,
1513 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1514 'comment_count': int,
7666b936 1515 'channel_follower_count': int,
1516 'uploader': 'Projekt Melody',
1517 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1518 'uploader_id': '@ProjektMelody',
96a134de 1519 'timestamp': 1577508724,
65c2fde2 1520 },
1521 },
1522 {
1523 'note': 'Non-Agegated non-embeddable video',
1524 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1525 'info_dict': {
1526 'id': 'MeJVWBSsPAY',
1527 'ext': 'mp4',
1528 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
65c2fde2 1529 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1530 'upload_date': '20130730',
976ae3ea 1531 'track': 'Such mich find mich',
1532 'age_limit': 0,
1533 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1534 'like_count': int,
1535 'playable_in_embed': False,
1536 'creator': 'OOMPH!',
1537 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1538 'view_count': int,
1539 'alt_title': 'Such mich find mich',
1540 'duration': 210,
1541 'channel': 'Herr Lurik',
1542 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1543 'categories': ['Music'],
1544 'availability': 'public',
976ae3ea 1545 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1546 'live_status': 'not_live',
1547 'artist': 'OOMPH!',
7666b936 1548 'channel_follower_count': int,
1549 'uploader': 'Herr Lurik',
1550 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1551 'uploader_id': '@HerrLurik',
65c2fde2 1552 },
1553 },
1554 {
1555 'note': 'Non-bypassable age-gated video',
1556 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1557 'only_matching': True,
1558 },
8bdd16b4 1559 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1560 # YouTube Red ad is not captured for creator
1561 {
1562 'url': '__2ABJjxzNo',
1563 'info_dict': {
1564 'id': '__2ABJjxzNo',
1565 'ext': 'mp4',
1566 'duration': 266,
1567 'upload_date': '20100430',
545cc85d 1568 'creator': 'deadmau5',
1569 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1570 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1571 'alt_title': 'Some Chords',
976ae3ea 1572 'availability': 'public',
1573 'tags': 'count:14',
1574 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1575 'view_count': int,
1576 'live_status': 'not_live',
1577 'channel': 'deadmau5',
1578 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1579 'like_count': int,
1580 'track': 'Some Chords',
1581 'artist': 'deadmau5',
1582 'playable_in_embed': True,
1583 'age_limit': 0,
1584 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1585 'categories': ['Music'],
1586 'album': 'Some Chords',
7666b936 1587 'channel_follower_count': int,
1588 'uploader': 'deadmau5',
1589 'uploader_url': 'https://www.youtube.com/@deadmau5',
1590 'uploader_id': '@deadmau5',
8bdd16b4 1591 },
1592 'expected_warnings': [
1593 'DASH manifest missing',
add96eb9 1594 ],
8bdd16b4 1595 },
067aa17e 1596 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1597 {
1598 'url': 'lqQg6PlCWgI',
1599 'info_dict': {
1600 'id': 'lqQg6PlCWgI',
1601 'ext': 'mp4',
556dbe7f 1602 'duration': 6085,
90227264 1603 'upload_date': '20150827',
12a1b225 1604 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
cbe2bd91 1605 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1606 'like_count': int,
1607 'release_timestamp': 1343767800,
1608 'playable_in_embed': True,
1609 'categories': ['Sports'],
1610 'release_date': '20120731',
1611 'channel': 'Olympics',
1612 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1613 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1614 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1615 'age_limit': 0,
1616 'availability': 'public',
1617 'live_status': 'was_live',
1618 'view_count': int,
1619 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
7666b936 1620 'channel_follower_count': int,
1621 'uploader': 'Olympics',
1622 'uploader_url': 'https://www.youtube.com/@Olympics',
1623 'uploader_id': '@Olympics',
8213ce28 1624 'channel_is_verified': True,
96a134de 1625 'timestamp': 1440707674,
cbe2bd91
PH
1626 },
1627 'params': {
1628 'skip_download': 'requires avconv',
add96eb9 1629 },
cbe2bd91 1630 },
6271f1ca
PH
1631 # Non-square pixels
1632 {
1633 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1634 'info_dict': {
1635 'id': '_b-2C3KPAM0',
1636 'ext': 'mp4',
1637 'stretched_ratio': 16 / 9.,
556dbe7f 1638 'duration': 85,
6271f1ca 1639 'upload_date': '20110310',
6271f1ca 1640 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
6271f1ca 1641 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1642 'playable_in_embed': True,
1643 'channel': '孫ᄋᄅ',
1644 'age_limit': 0,
1645 'tags': 'count:11',
1646 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1647 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1648 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1649 'view_count': int,
1650 'categories': ['People & Blogs'],
1651 'like_count': int,
1652 'live_status': 'not_live',
1653 'availability': 'unlisted',
12a1b225 1654 'comment_count': int,
7666b936 1655 'channel_follower_count': int,
1656 'uploader': '孫ᄋᄅ',
1657 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1658 'uploader_id': '@AllenMeow',
96a134de 1659 'timestamp': 1299776999,
6271f1ca 1660 },
06b491eb
S
1661 },
1662 # url_encoded_fmt_stream_map is empty string
1663 {
1664 'url': 'qEJwOuvDf7I',
1665 'info_dict': {
1666 'id': 'qEJwOuvDf7I',
f57b7835 1667 'ext': 'webm',
06b491eb
S
1668 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1669 'description': '',
1670 'upload_date': '20150404',
06b491eb
S
1671 },
1672 'params': {
1673 'skip_download': 'requires avconv',
e323cf3f
S
1674 },
1675 'skip': 'This live event has ended.',
06b491eb 1676 },
067aa17e 1677 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1678 {
1679 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1680 'info_dict': {
1681 'id': 'FIl7x6_3R5Y',
eb6793ba 1682 'ext': 'webm',
da77d856
S
1683 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1684 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1685 'duration': 220,
da77d856 1686 'upload_date': '20150625',
eb6793ba 1687 'formats': 'mincount:31',
da77d856 1688 },
eb6793ba 1689 'skip': 'not actual anymore',
2ee8f5d8 1690 },
8a1a26ce
YCH
1691 # DASH manifest with segment_list
1692 {
1693 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1694 'md5': '8ce563a1d667b599d21064e982ab9e31',
1695 'info_dict': {
1696 'id': 'CsmdDsKjzN8',
1697 'ext': 'mp4',
17ee98e1 1698 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce 1699 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
8a1a26ce
YCH
1700 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1701 },
1702 'params': {
1703 'youtube_include_dash_manifest': True,
1704 'format': '135', # bestvideo
be49068d
S
1705 },
1706 'skip': 'This live event has ended.',
2ee8f5d8 1707 },
cf7e015f 1708 {
6368e2e6 1709 # Multifeed videos (multiple cameras), URL can be of any Camera
7666b936 1710 # TODO: fix multifeed titles
6368e2e6 1711 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1712 'info_dict': {
6368e2e6 1713 'id': 'zaPI8MvL8pg',
1714 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1715 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1716 },
1717 'playlist': [{
1718 'info_dict': {
6368e2e6 1719 'id': 'j5yGuxZ8lLU',
cf7e015f 1720 'ext': 'mp4',
6368e2e6 1721 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
6368e2e6 1722 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1723 'duration': 10120,
1724 'channel_follower_count': int,
1725 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1726 'availability': 'public',
1727 'playable_in_embed': True,
1728 'upload_date': '20131105',
6368e2e6 1729 'categories': ['Gaming'],
1730 'live_status': 'was_live',
1731 'tags': 'count:24',
1732 'release_timestamp': 1383701910,
1733 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1734 'comment_count': int,
1735 'age_limit': 0,
1736 'like_count': int,
1737 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1738 'channel': 'WiiLikeToPlay',
1739 'view_count': int,
1740 'release_date': '20131106',
7666b936 1741 'uploader': 'WiiLikeToPlay',
1742 'uploader_id': '@WLTP',
1743 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1744 },
1745 }, {
1746 'info_dict': {
6368e2e6 1747 'id': 'zaPI8MvL8pg',
cf7e015f 1748 'ext': 'mp4',
6368e2e6 1749 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
6368e2e6 1750 'availability': 'public',
1751 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1752 'channel': 'WiiLikeToPlay',
6368e2e6 1753 'channel_follower_count': int,
1754 'description': 'md5:563ccbc698b39298481ca3c571169519',
1755 'duration': 10108,
1756 'age_limit': 0,
1757 'like_count': int,
1758 'tags': 'count:24',
1759 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
6368e2e6 1760 'release_timestamp': 1383701915,
1761 'comment_count': int,
1762 'upload_date': '20131105',
1763 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1764 'release_date': '20131106',
1765 'playable_in_embed': True,
1766 'live_status': 'was_live',
1767 'categories': ['Gaming'],
1768 'view_count': int,
7666b936 1769 'uploader': 'WiiLikeToPlay',
1770 'uploader_id': '@WLTP',
1771 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1772 },
1773 }, {
1774 'info_dict': {
6368e2e6 1775 'id': 'R7r3vfO7Hao',
cf7e015f 1776 'ext': 'mp4',
6368e2e6 1777 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1778 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1779 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1780 'like_count': int,
1781 'availability': 'public',
1782 'playable_in_embed': True,
1783 'upload_date': '20131105',
1784 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1785 'channel_follower_count': int,
1786 'tags': 'count:24',
1787 'release_date': '20131106',
6368e2e6 1788 'comment_count': int,
1789 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1790 'channel': 'WiiLikeToPlay',
1791 'categories': ['Gaming'],
1792 'release_timestamp': 1383701914,
1793 'live_status': 'was_live',
1794 'age_limit': 0,
1795 'duration': 10128,
1796 'view_count': int,
7666b936 1797 'uploader': 'WiiLikeToPlay',
1798 'uploader_id': '@WLTP',
1799 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1800 },
1801 }],
6368e2e6 1802 'params': {'skip_download': True},
96a134de 1803 'skip': 'Not multifeed anymore',
cbaed4bb 1804 },
f9f49d87 1805 {
067aa17e 1806 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1807 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1808 'info_dict': {
1809 'id': 'gVfLd0zydlo',
1810 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1811 },
1812 'playlist_count': 2,
be49068d 1813 'skip': 'Not multifeed anymore',
f9f49d87 1814 },
cbaed4bb 1815 {
2d3d2997 1816 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1817 'only_matching': True,
0e49d9a6 1818 },
6d4fc66b 1819 {
2d3d2997 1820 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1821 'only_matching': True,
1822 },
0e49d9a6 1823 {
067aa17e 1824 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1825 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1826 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1827 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1828 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1829 'info_dict': {
1830 'id': 'lsguqyKfVQg',
1831 'ext': 'mp4',
1832 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1833 'alt_title': 'Dark Walk',
0e49d9a6 1834 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1835 'duration': 133,
0e49d9a6 1836 'upload_date': '20151119',
11f9be09 1837 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1838 'track': 'Dark Walk',
1839 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1840 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1841 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1842 'categories': ['Film & Animation'],
1843 'view_count': int,
1844 'live_status': 'not_live',
1845 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1846 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1847 'tags': 'count:13',
1848 'availability': 'public',
1849 'channel': 'IronSoulElf',
1850 'playable_in_embed': True,
1851 'like_count': int,
1852 'age_limit': 0,
add96eb9 1853 'channel_follower_count': int,
0e49d9a6
LL
1854 },
1855 'params': {
1856 'skip_download': True,
1857 },
1858 },
61f92af1 1859 {
067aa17e 1860 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1861 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1862 'only_matching': True,
1863 },
313dfc45
LL
1864 {
1865 # Video with yt:stretch=17:0
1866 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1867 'info_dict': {
1868 'id': 'Q39EVAstoRM',
1869 'ext': 'mp4',
1870 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1871 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1872 'upload_date': '20151107',
313dfc45
LL
1873 },
1874 'params': {
1875 'skip_download': True,
1876 },
be49068d 1877 'skip': 'This video does not exist.',
313dfc45 1878 },
201c1459 1879 {
1880 # Video with incomplete 'yt:stretch=16:'
1881 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1882 'only_matching': True,
1883 },
7caf9830
S
1884 {
1885 # Video licensed under Creative Commons
1886 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1887 'info_dict': {
1888 'id': 'M4gD1WSo5mA',
1889 'ext': 'mp4',
1890 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1891 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1892 'duration': 721,
17322130 1893 'upload_date': '20150128',
7caf9830 1894 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1895 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1896 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1897 'like_count': int,
1898 'age_limit': 0,
1899 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1900 'channel': 'The Berkman Klein Center for Internet & Society',
1901 'availability': 'public',
1902 'view_count': int,
1903 'categories': ['Education'],
1904 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1905 'live_status': 'not_live',
1906 'playable_in_embed': True,
d5d1df8a 1907 'channel_follower_count': int,
1908 'chapters': list,
7666b936 1909 'uploader': 'The Berkman Klein Center for Internet & Society',
1910 'uploader_id': '@BKCHarvard',
1911 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
96a134de 1912 'timestamp': 1422422076,
7caf9830
S
1913 },
1914 'params': {
1915 'skip_download': True,
1916 },
1917 },
fd050249 1918 {
fd050249
S
1919 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1920 'info_dict': {
1921 'id': 'eQcmzGIKrzg',
1922 'ext': 'mp4',
1923 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1924 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1925 'duration': 4060,
17322130 1926 'upload_date': '20151120',
fd050249 1927 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1928 'playable_in_embed': True,
1929 'tags': 'count:12',
1930 'like_count': int,
1931 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1932 'age_limit': 0,
1933 'availability': 'public',
1934 'categories': ['News & Politics'],
1935 'channel': 'Bernie Sanders',
1936 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1937 'view_count': int,
1938 'live_status': 'not_live',
1939 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1940 'comment_count': int,
d5d1df8a 1941 'channel_follower_count': int,
1942 'chapters': list,
7666b936 1943 'uploader': 'Bernie Sanders',
1944 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1945 'uploader_id': '@BernieSanders',
8213ce28 1946 'channel_is_verified': True,
14a14335 1947 'heatmap': 'count:100',
96a134de 1948 'timestamp': 1447987198,
fd050249
S
1949 },
1950 'params': {
1951 'skip_download': True,
1952 },
1953 },
040ac686
S
1954 {
1955 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1956 'only_matching': True,
7f29cf54
S
1957 },
1958 {
067aa17e 1959 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1960 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1961 'only_matching': True,
6496ccb4
S
1962 },
1963 {
1964 # Rental video preview
1965 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1966 'info_dict': {
1967 'id': 'uGpuVWrhIzE',
1968 'ext': 'mp4',
1969 'title': 'Piku - Trailer',
1970 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1971 'upload_date': '20150811',
6496ccb4
S
1972 'license': 'Standard YouTube License',
1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
eb6793ba 1977 'skip': 'This video is not available.',
022a5d66 1978 },
12afdc2a
S
1979 {
1980 # YouTube Red video with episode data
1981 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1982 'info_dict': {
1983 'id': 'iqKdEhx-dD4',
1984 'ext': 'mp4',
1985 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1986 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1987 'duration': 2085,
12afdc2a 1988 'upload_date': '20170118',
12afdc2a
S
1989 'series': 'Mind Field',
1990 'season_number': 1,
1991 'episode_number': 1,
976ae3ea 1992 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1993 'tags': 'count:12',
1994 'view_count': int,
1995 'availability': 'public',
1996 'age_limit': 0,
1997 'channel': 'Vsauce',
1998 'episode': 'Episode 1',
1999 'categories': ['Entertainment'],
2000 'season': 'Season 1',
2001 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2002 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2003 'like_count': int,
2004 'playable_in_embed': True,
2005 'live_status': 'not_live',
7666b936 2006 'channel_follower_count': int,
2007 'uploader': 'Vsauce',
2008 'uploader_url': 'https://www.youtube.com/@Vsauce',
2009 'uploader_id': '@Vsauce',
14a14335 2010 'comment_count': int,
8213ce28 2011 'channel_is_verified': True,
96a134de 2012 'timestamp': 1484761047,
12afdc2a
S
2013 },
2014 'params': {
2015 'skip_download': True,
2016 },
2017 'expected_warnings': [
2018 'Skipping DASH manifest',
2019 ],
2020 },
c7121fa7
S
2021 {
2022 # The following content has been identified by the YouTube community
2023 # as inappropriate or offensive to some audiences.
2024 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2025 'info_dict': {
2026 'id': '6SJNVb0GnPI',
2027 'ext': 'mp4',
2028 'title': 'Race Differences in Intelligence',
2029 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2030 'duration': 965,
2031 'upload_date': '20140124',
c7121fa7
S
2032 },
2033 'params': {
2034 'skip_download': True,
2035 },
545cc85d 2036 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 2037 },
022a5d66
S
2038 {
2039 # itag 212
2040 'url': '1t24XAntNCY',
2041 'only_matching': True,
fd5c4aab
S
2042 },
2043 {
2044 # geo restricted to JP
2045 'url': 'sJL6WA-aGkQ',
2046 'only_matching': True,
2047 },
cd5a74a2
S
2048 {
2049 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2050 'only_matching': True,
2051 },
bc2ca1bb 2052 {
2053 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2054 'only_matching': True,
2055 },
2056 {
2057 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2058 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2059 'only_matching': True,
2060 },
825cd268
RA
2061 {
2062 # DRM protected
2063 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2064 'only_matching': True,
4fe54c12
S
2065 },
2066 {
2067 # Video with unsupported adaptive stream type formats
2068 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2069 'info_dict': {
2070 'id': 'Z4Vy8R84T1U',
2071 'ext': 'mp4',
2072 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2073 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2074 'duration': 433,
2075 'upload_date': '20130923',
4fe54c12
S
2076 'formats': 'maxcount:10',
2077 },
2078 'params': {
2079 'skip_download': True,
2080 'youtube_include_dash_manifest': False,
2081 },
5429d6a9 2082 'skip': 'not actual anymore',
5caabd3c 2083 },
2084 {
822b9d9c 2085 # Youtube Music Auto-generated description
7666b936 2086 # TODO: fix metadata extraction
5caabd3c 2087 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2088 'info_dict': {
2089 'id': 'MgNrAu2pzNs',
2090 'ext': 'mp4',
2091 'title': 'Voyeur Girl',
2092 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2093 'upload_date': '20190312',
104a7b5a
L
2094 'artists': ['Stephen'],
2095 'creators': ['Stephen'],
5caabd3c 2096 'track': 'Voyeur Girl',
2097 'album': 'it\'s too much love to know my dear',
2098 'release_date': '20190313',
976ae3ea 2099 'alt_title': 'Voyeur Girl',
2100 'view_count': int,
976ae3ea 2101 'playable_in_embed': True,
2102 'like_count': int,
2103 'categories': ['Music'],
2104 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
7666b936 2105 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2106 'uploader': 'Stephen',
976ae3ea 2107 'availability': 'public',
976ae3ea 2108 'duration': 169,
2109 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2110 'age_limit': 0,
2111 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2112 'tags': 'count:11',
2113 'live_status': 'not_live',
add96eb9 2114 'channel_follower_count': int,
5caabd3c 2115 },
2116 'params': {
2117 'skip_download': True,
2118 },
2119 },
66b48727
RA
2120 {
2121 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2122 'only_matching': True,
2123 },
011e75e6
S
2124 {
2125 # invalid -> valid video id redirection
2126 'url': 'DJztXj2GPfl',
2127 'info_dict': {
2128 'id': 'DJztXj2GPfk',
2129 'ext': 'mp4',
2130 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2131 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2132 'upload_date': '20090125',
011e75e6
S
2133 'artist': 'Panjabi MC',
2134 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2135 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2136 },
2137 'params': {
2138 'skip_download': True,
2139 },
545cc85d 2140 'skip': 'Video unavailable',
ea74e00b
DP
2141 },
2142 {
2143 # empty description results in an empty string
2144 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2145 'info_dict': {
2146 'id': 'x41yOUIvK2k',
2147 'ext': 'mp4',
2148 'title': 'IMG 3456',
2149 'description': '',
2150 'upload_date': '20170613',
976ae3ea 2151 'view_count': int,
2152 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
976ae3ea 2153 'like_count': int,
2154 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2155 'tags': [],
2156 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2157 'availability': 'public',
2158 'age_limit': 0,
2159 'categories': ['Pets & Animals'],
2160 'duration': 7,
2161 'playable_in_embed': True,
2162 'live_status': 'not_live',
7666b936 2163 'channel': 'l\'Or Vert asbl',
2164 'channel_follower_count': int,
2165 'uploader': 'l\'Or Vert asbl',
2166 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2167 'uploader_id': '@ElevageOrVert',
96a134de 2168 'timestamp': 1497343210,
ea74e00b
DP
2169 },
2170 'params': {
2171 'skip_download': True,
2172 },
2173 },
a0566bbf 2174 {
29f7c58a 2175 # with '};' inside yt initial data (see [1])
2176 # see [2] for an example with '};' inside ytInitialPlayerResponse
2177 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2178 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2179 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2180 'info_dict': {
2181 'id': 'CHqg6qOn4no',
2182 'ext': 'mp4',
2183 'title': 'Part 77 Sort a list of simple types in c#',
2184 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2185 'upload_date': '20130831',
976ae3ea 2186 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2187 'like_count': int,
976ae3ea 2188 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2189 'live_status': 'not_live',
2190 'categories': ['Education'],
2191 'availability': 'public',
2192 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2193 'tags': 'count:12',
2194 'playable_in_embed': True,
2195 'age_limit': 0,
2196 'view_count': int,
2197 'duration': 522,
2198 'channel': 'kudvenkat',
12a1b225 2199 'comment_count': int,
d5d1df8a 2200 'channel_follower_count': int,
2201 'chapters': list,
7666b936 2202 'uploader': 'kudvenkat',
2203 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2204 'uploader_id': '@Csharp-video-tutorialsBlogspot',
8213ce28 2205 'channel_is_verified': True,
14a14335 2206 'heatmap': 'count:100',
96a134de 2207 'timestamp': 1377976349,
a0566bbf 2208 },
2209 'params': {
2210 'skip_download': True,
2211 },
2212 },
29f7c58a 2213 {
2214 # another example of '};' in ytInitialData
2215 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2216 'only_matching': True,
2217 },
2218 {
2219 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2220 'only_matching': True,
2221 },
545cc85d 2222 {
cc2db878 2223 # https://github.com/ytdl-org/youtube-dl/pull/28094
2224 'url': 'OtqTfy26tG0',
2225 'info_dict': {
2226 'id': 'OtqTfy26tG0',
2227 'ext': 'mp4',
2228 'title': 'Burn Out',
2229 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2230 'upload_date': '20141120',
cc2db878 2231 'artist': 'The Cinematic Orchestra',
2232 'track': 'Burn Out',
2233 'album': 'Every Day',
976ae3ea 2234 'like_count': int,
2235 'live_status': 'not_live',
2236 'alt_title': 'Burn Out',
2237 'duration': 614,
2238 'age_limit': 0,
2239 'view_count': int,
2240 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2241 'creator': 'The Cinematic Orchestra',
2242 'channel': 'The Cinematic Orchestra',
2243 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2244 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2245 'availability': 'public',
2246 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2247 'categories': ['Music'],
2248 'playable_in_embed': True,
7666b936 2249 'channel_follower_count': int,
2250 'uploader': 'The Cinematic Orchestra',
2251 'comment_count': int,
cc2db878 2252 },
2253 'params': {
2254 'skip_download': True,
2255 },
545cc85d 2256 },
bc2ca1bb 2257 {
2258 # controversial video, only works with bpctr when authenticated with cookies
2259 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2260 'only_matching': True,
2261 },
a1a7907b 2262 {
2263 # controversial video, requires bpctr/contentCheckOk
2264 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2265 'info_dict': {
2266 'id': 'SZJvDhaSDnc',
2267 'ext': 'mp4',
2268 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2269 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
a1a7907b 2270 'upload_date': '20140716',
976ae3ea 2271 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2272 'duration': 170,
2273 'categories': ['News & Politics'],
976ae3ea 2274 'view_count': int,
2275 'channel': 'CBS Mornings',
2276 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2277 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2278 'age_limit': 18,
2279 'availability': 'needs_auth',
2280 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2281 'like_count': int,
2282 'live_status': 'not_live',
2283 'playable_in_embed': True,
7666b936 2284 'channel_follower_count': int,
2285 'uploader': 'CBS Mornings',
2286 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2287 'uploader_id': '@CBSMornings',
14a14335 2288 'comment_count': int,
8213ce28 2289 'channel_is_verified': True,
96a134de 2290 'timestamp': 1405513526,
add96eb9 2291 },
a1a7907b 2292 },
f7ad7160 2293 {
2294 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2295 'url': 'cBvYw8_A0vQ',
2296 'info_dict': {
2297 'id': 'cBvYw8_A0vQ',
2298 'ext': 'mp4',
2299 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2300 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2301 'upload_date': '20201120',
976ae3ea 2302 'duration': 1456,
2303 'categories': ['Travel & Events'],
2304 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2305 'view_count': int,
2306 'channel': 'Walk around Japan',
2307 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
96a134de 2308 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
976ae3ea 2309 'age_limit': 0,
2310 'availability': 'public',
2311 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2312 'live_status': 'not_live',
2313 'playable_in_embed': True,
7666b936 2314 'channel_follower_count': int,
2315 'uploader': 'Walk around Japan',
2316 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2317 'uploader_id': '@walkaroundjapan7124',
96a134de 2318 'timestamp': 1605884416,
f7ad7160 2319 },
2320 'params': {
2321 'skip_download': True,
2322 },
0fb983f6 2323 }, {
2324 # Has multiple audio streams
2325 'url': 'WaOKSUlf4TM',
add96eb9 2326 'only_matching': True,
9297939e 2327 }, {
2328 # Requires Premium: has format 141 when requested using YTM url
2329 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
add96eb9 2330 'only_matching': True,
9297939e 2331 }, {
120916da 2332 # multiple subtitles with same lang_code
2333 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2334 'only_matching': True,
109dd3b2 2335 }, {
2336 # Force use android client fallback
2337 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2338 'info_dict': {
2339 'id': 'YOelRv7fMxY',
11f9be09 2340 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2341 'ext': '3gp',
2342 'upload_date': '20210624',
2343 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
109dd3b2 2344 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2345 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2346 'duration': 596,
2347 'categories': ['Entertainment'],
976ae3ea 2348 'view_count': int,
2349 'channel': 'colinfurze',
2350 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2351 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2352 'age_limit': 0,
2353 'availability': 'public',
2354 'like_count': int,
2355 'live_status': 'not_live',
2356 'playable_in_embed': True,
d5d1df8a 2357 'channel_follower_count': int,
2358 'chapters': list,
7666b936 2359 'uploader': 'colinfurze',
2360 'uploader_url': 'https://www.youtube.com/@colinfurze',
2361 'uploader_id': '@colinfurze',
14a14335 2362 'comment_count': int,
8213ce28 2363 'channel_is_verified': True,
14a14335 2364 'heatmap': 'count:100',
109dd3b2 2365 },
2366 'params': {
2367 'format': '17', # 3gp format available on android
2368 'extractor_args': {'youtube': {'player_client': ['android']}},
2369 },
12d8ea82 2370 'skip': 'android client broken',
120916da 2371 },
109dd3b2 2372 {
2373 # Skip download of additional client configs (remix client config in this case)
2374 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2375 'only_matching': True,
2376 'params': {
2377 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2378 },
8fc54b12 2379 }, {
2380 # shorts
2381 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2382 'only_matching': True,
9222c381 2383 }, {
2384 'note': 'Storyboards',
2385 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2386 'info_dict': {
2387 'id': '5KLPxDtMqe8',
2388 'ext': 'mhtml',
2389 'format_id': 'sb0',
2390 'title': 'Your Brain is Plastic',
9222c381 2391 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2392 'upload_date': '20140324',
976ae3ea 2393 'like_count': int,
2394 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2395 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2396 'view_count': int,
2397 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2398 'playable_in_embed': True,
2399 'tags': 'count:12',
976ae3ea 2400 'availability': 'public',
2401 'channel': 'SciShow',
2402 'live_status': 'not_live',
2403 'duration': 248,
2404 'categories': ['Education'],
2405 'age_limit': 0,
d5d1df8a 2406 'channel_follower_count': int,
2407 'chapters': list,
7666b936 2408 'uploader': 'SciShow',
2409 'uploader_url': 'https://www.youtube.com/@SciShow',
2410 'uploader_id': '@SciShow',
14a14335 2411 'comment_count': int,
8213ce28 2412 'channel_is_verified': True,
14a14335 2413 'heatmap': 'count:100',
96a134de 2414 'timestamp': 1395685455,
add96eb9 2415 }, 'params': {'format': 'mhtml', 'skip_download': True},
992f9a73 2416 }, {
2417 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2418 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2419 'info_dict': {
2420 'id': '2NUZ8W2llS4',
2421 'ext': 'mp4',
2422 'title': 'The NP that test your phone performance 🙂',
2423 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
992f9a73 2424 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2425 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2426 'duration': 21,
2427 'view_count': int,
2428 'age_limit': 0,
2429 'categories': ['Gaming'],
2430 'tags': 'count:23',
2431 'playable_in_embed': True,
2432 'live_status': 'not_live',
2433 'upload_date': '20220103',
2434 'like_count': int,
2435 'availability': 'public',
2436 'channel': 'Leon Nguyen',
2437 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2438 'comment_count': int,
7666b936 2439 'channel_follower_count': int,
2440 'uploader': 'Leon Nguyen',
2441 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2442 'uploader_id': '@LeonNguyen',
14a14335 2443 'heatmap': 'count:100',
96a134de 2444 'timestamp': 1641170939,
add96eb9 2445 },
992f9a73 2446 }, {
2447 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2448 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2449 'info_dict': {
2450 'id': 'mzZzzBU6lrM',
2451 'ext': 'mp4',
2452 'title': 'I Met GeorgeNotFound In Real Life...',
7666b936 2453 'description': 'md5:978296ec9783a031738b684d4ebf302d',
992f9a73 2454 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2455 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2456 'duration': 955,
2457 'view_count': int,
2458 'age_limit': 0,
2459 'categories': ['Entertainment'],
2460 'tags': 'count:26',
2461 'playable_in_embed': True,
2462 'live_status': 'not_live',
2463 'release_timestamp': 1641172509,
2464 'release_date': '20220103',
2465 'upload_date': '20220103',
2466 'like_count': int,
2467 'availability': 'public',
2468 'channel': 'Quackity',
2469 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
7666b936 2470 'channel_follower_count': int,
2471 'uploader': 'Quackity',
2472 'uploader_id': '@Quackity',
2473 'uploader_url': 'https://www.youtube.com/@Quackity',
14a14335 2474 'comment_count': int,
8213ce28 2475 'channel_is_verified': True,
14a14335 2476 'heatmap': 'count:100',
96a134de 2477 'timestamp': 1641172509,
add96eb9 2478 },
992f9a73 2479 },
96a134de 2480 { # continuous livestream.
2481 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2482 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
992f9a73 2483 'info_dict': {
96a134de 2484 'id': 'jfKfPfyJRdk',
992f9a73 2485 'ext': 'mp4',
96a134de 2486 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2487 'like_count': int,
2488 'uploader': 'Lofi Girl',
2489 'categories': ['Music'],
2490 'concurrent_view_count': int,
2491 'playable_in_embed': True,
2492 'timestamp': 1657627949,
2493 'release_date': '20220712',
2494 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2495 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
992f9a73 2496 'age_limit': 0,
96a134de 2497 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2498 'release_timestamp': 1657641570,
2499 'uploader_url': 'https://www.youtube.com/@LofiGirl',
992f9a73 2500 'channel_follower_count': int,
96a134de 2501 'channel_is_verified': True,
2502 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
992f9a73 2503 'view_count': int,
96a134de 2504 'live_status': 'is_live',
2505 'tags': 'count:32',
2506 'channel': 'Lofi Girl',
2507 'availability': 'public',
2508 'upload_date': '20220712',
2509 'uploader_id': '@LofiGirl',
992f9a73 2510 },
96a134de 2511 'params': {'skip_download': True},
ee27297f 2512 }, {
2513 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2514 'info_dict': {
2515 'id': 'tjjjtzRLHvA',
2516 'ext': 'mp4',
2517 'title': 'ハッシュタグ無し };if window.ytcsi',
2518 'upload_date': '20220323',
2519 'like_count': int,
2520 'availability': 'unlisted',
7666b936 2521 'channel': 'Lesmiscore',
2522 'thumbnail': r're:^https?://.*\.jpg',
ee27297f 2523 'age_limit': 0,
ee27297f 2524 'categories': ['Music'],
6e634cbe 2525 'view_count': int,
2526 'description': '',
ee27297f 2527 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2528 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2529 'live_status': 'not_live',
2530 'playable_in_embed': True,
2531 'channel_follower_count': int,
2532 'duration': 6,
2533 'tags': [],
7666b936 2534 'uploader_id': '@lesmiscore',
2535 'uploader': 'Lesmiscore',
2536 'uploader_url': 'https://www.youtube.com/@lesmiscore',
96a134de 2537 'timestamp': 1648005313,
add96eb9 2538 },
c26f9b99 2539 }, {
2540 # Prefer primary title+description language metadata by default
2541 # Do not prefer translated description if primary is empty
2542 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2543 'info_dict': {
2544 'id': 'el3E4MbxRqQ',
2545 'ext': 'mp4',
2546 'title': 'dlp test video 2 - primary sv no desc',
2547 'description': '',
2548 'channel': 'cole-dlp-test-acc',
2549 'tags': [],
2550 'view_count': int,
2551 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2552 'like_count': int,
2553 'playable_in_embed': True,
2554 'availability': 'unlisted',
7666b936 2555 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2556 'age_limit': 0,
2557 'duration': 5,
c26f9b99 2558 'live_status': 'not_live',
2559 'upload_date': '20220908',
2560 'categories': ['People & Blogs'],
c26f9b99 2561 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
7666b936 2562 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2563 'uploader_id': '@coletdjnz',
2564 'uploader': 'cole-dlp-test-acc',
96a134de 2565 'timestamp': 1662677394,
c26f9b99 2566 },
add96eb9 2567 'params': {'skip_download': True},
c26f9b99 2568 }, {
2569 # Extractor argument: prefer translated title+description
2570 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2571 'info_dict': {
2572 'id': 'gHKT4uU8Zng',
2573 'ext': 'mp4',
2574 'channel': 'cole-dlp-test-acc',
2575 'tags': [],
2576 'duration': 5,
2577 'live_status': 'not_live',
2578 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
96a134de 2579 'upload_date': '20220729',
c26f9b99 2580 'view_count': int,
2581 'categories': ['People & Blogs'],
7666b936 2582 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2583 'title': 'dlp test video title translated (fr)',
2584 'availability': 'public',
c26f9b99 2585 'age_limit': 0,
2586 'description': 'dlp test video description translated (fr)',
2587 'playable_in_embed': True,
2588 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
7666b936 2589 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2590 'uploader_id': '@coletdjnz',
2591 'uploader': 'cole-dlp-test-acc',
96a134de 2592 'timestamp': 1659073275,
2593 'like_count': int,
c26f9b99 2594 },
2595 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2596 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2597 }, {
2598 'note': '6 channel audio',
2599 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2600 'only_matching': True,
a4894d3e 2601 }, {
2602 'note': 'Multiple HLS formats with same itag',
2603 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2604 'info_dict': {
2605 'id': 'kX3nB4PpJko',
2606 'ext': 'mp4',
2607 'categories': ['Entertainment'],
2608 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
a4894d3e 2609 'live_status': 'not_live',
2610 'duration': 937,
2611 'channel_follower_count': int,
2612 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2613 'title': 'Last To Take Hand Off Jet, Keeps It!',
2614 'channel': 'MrBeast',
2615 'playable_in_embed': True,
2616 'view_count': int,
2617 'upload_date': '20221112',
a4894d3e 2618 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2619 'age_limit': 0,
2620 'availability': 'public',
2621 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2622 'like_count': int,
2623 'tags': [],
7666b936 2624 'uploader': 'MrBeast',
2625 'uploader_url': 'https://www.youtube.com/@MrBeast',
2626 'uploader_id': '@MrBeast',
14a14335 2627 'comment_count': int,
8213ce28 2628 'channel_is_verified': True,
14a14335 2629 'heatmap': 'count:100',
a4894d3e 2630 },
2631 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2632 }, {
2633 'note': 'Audio formats with Dynamic Range Compression',
2634 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2635 'info_dict': {
2636 'id': 'Tq92D6wQ1mg',
7666b936 2637 'ext': 'webm',
9bb85699 2638 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2639 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2640 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2641 'channel_follower_count': int,
2642 'description': 'md5:17eccca93a786d51bc67646756894066',
2643 'upload_date': '20191228',
9bb85699 2644 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2645 'playable_in_embed': True,
2646 'like_count': int,
2647 'categories': ['Entertainment'],
2648 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2649 'age_limit': 18,
2650 'channel': 'Projekt Melody',
9bb85699 2651 'view_count': int,
2652 'availability': 'needs_auth',
2653 'comment_count': int,
2654 'live_status': 'not_live',
9bb85699 2655 'duration': 106,
7666b936 2656 'uploader': 'Projekt Melody',
2657 'uploader_id': '@ProjektMelody',
2658 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
96a134de 2659 'timestamp': 1577508724,
9bb85699 2660 },
2661 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2662 },
2663 {
2664 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2665 'info_dict': {
2666 'id': 'qVv6vCqciTM',
2667 'ext': 'mp4',
2668 'age_limit': 0,
dad2210c 2669 'comment_count': int,
2670 'chapters': 'count:13',
2671 'upload_date': '20221223',
2672 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2673 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
dad2210c 2674 'like_count': int,
2675 'release_date': '20221223',
2676 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2677 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2678 'view_count': int,
2679 'playable_in_embed': True,
2680 'duration': 4438,
2681 'availability': 'public',
2682 'channel_follower_count': int,
2683 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2684 'categories': ['Entertainment'],
2685 'live_status': 'was_live',
2686 'release_timestamp': 1671793345,
2687 'channel': 'さなちゃんねる',
2688 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2689 'uploader': 'さなちゃんねる',
7666b936 2690 'uploader_url': 'https://www.youtube.com/@sana_natori',
2691 'uploader_id': '@sana_natori',
8213ce28 2692 'channel_is_verified': True,
14a14335 2693 'heatmap': 'count:100',
96a134de 2694 'timestamp': 1671798112,
7666b936 2695 },
2696 },
2697 {
2698 # Fallbacks when webpage and web client is unavailable
2699 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2700 'info_dict': {
2701 'id': 'wSSmNUl9Snw',
2702 'ext': 'mp4',
2703 # 'categories': ['Science & Technology'],
2704 'view_count': int,
2705 'chapters': 'count:2',
2706 'channel': 'Scott Manley',
2707 'like_count': int,
2708 'age_limit': 0,
2709 # 'availability': 'public',
2710 'channel_follower_count': int,
2711 'live_status': 'not_live',
2712 'upload_date': '20170831',
2713 'duration': 682,
2714 'tags': 'count:8',
2715 'uploader_url': 'https://www.youtube.com/@scottmanley',
2716 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2717 'uploader': 'Scott Manley',
2718 'uploader_id': '@scottmanley',
2719 'title': 'The Computer Hack That Saved Apollo 14',
2720 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2721 'thumbnail': r're:^https?://.*\.webp',
2722 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2723 'playable_in_embed': True,
14a14335 2724 'comment_count': int,
8213ce28 2725 'channel_is_verified': True,
14a14335 2726 'heatmap': 'count:100',
7666b936 2727 },
2728 'params': {
12d8ea82 2729 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
dad2210c 2730 },
2731 },
2eb88d95
PH
2732 ]
2733
f2e8dbcc 2734 _WEBPAGE_TESTS = [
2735 # YouTube <object> embed
2736 {
2737 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2738 'md5': '873c81d308b979f0e23ee7e620b312a3',
2739 'info_dict': {
2740 'id': 'msN87y-iEx0',
2741 'ext': 'mp4',
2742 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2743 'upload_date': '20080526',
2744 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
f2e8dbcc 2745 'age_limit': 0,
2746 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2747 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2748 'playable_in_embed': True,
2749 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2750 'like_count': int,
2751 'comment_count': int,
2752 'channel': 'Christopher Sykes',
2753 'live_status': 'not_live',
2754 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2755 'availability': 'public',
2756 'duration': 195,
2757 'view_count': int,
2758 'categories': ['Science & Technology'],
2759 'channel_follower_count': int,
7666b936 2760 'uploader': 'Christopher Sykes',
2761 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2762 'uploader_id': '@ChristopherSykesDocumentaries',
14a14335 2763 'heatmap': 'count:100',
96a134de 2764 'timestamp': 1211825920,
f2e8dbcc 2765 },
2766 'params': {
2767 'skip_download': True,
add96eb9 2768 },
f2e8dbcc 2769 },
2770 ]
2771
201c1459 2772 @classmethod
2773 def suitable(cls, url):
4dfbf869 2774 from ..utils import parse_qs
2775
201c1459 2776 qs = parse_qs(url)
2777 if qs.get('list', [None])[0]:
2778 return False
86e5f3ed 2779 return super().suitable(url)
201c1459 2780
e0df6211 2781 def __init__(self, *args, **kwargs):
86e5f3ed 2782 super().__init__(*args, **kwargs)
545cc85d 2783 self._code_cache = {}
83799698 2784 self._player_cache = {}
e0df6211 2785
4d37720a 2786 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2787 lock = threading.Lock()
185bf310 2788 start_time = time.time()
adbc4ec4
THD
2789 formats = [f for f in formats if f.get('is_from_start')]
2790
185bf310 2791 def refetch_manifest(format_id, delay):
2792 nonlocal formats, start_time, is_live
2793 if time.time() <= start_time + delay:
adbc4ec4
THD
2794 return
2795
2796 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2797 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2798 microformats = traverse_obj(
2799 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2800 expected_type=dict)
4d37720a
L
2801 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2802 is_live = live_status == 'is_live'
185bf310 2803 start_time = time.time()
adbc4ec4 2804
185bf310 2805 def mpd_feed(format_id, delay):
adbc4ec4
THD
2806 """
2807 @returns (manifest_url, manifest_stream_number, is_live) or None
2808 """
253ac4ba 2809 for retry in self.RetryManager(fatal=False):
2810 with lock:
2811 refetch_manifest(format_id, delay)
2812
2813 f = next((f for f in formats if f['format_id'] == format_id), None)
2814 if not f:
2815 if not is_live:
2816 retry.error = f'{video_id}: Video is no longer live'
2817 else:
2818 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2819 continue
2820 return f['manifest_url'], f['manifest_stream_number'], is_live
2821 return None
adbc4ec4
THD
2822
2823 for f in formats:
4d37720a
L
2824 f['is_live'] = is_live
2825 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2826 live_start_time, mpd_feed, not is_live and f.copy())
2827 if is_live:
2828 f['fragments'] = gen
2829 f['protocol'] = 'http_dash_segments_generator'
2830 else:
2831 f['fragments'] = LazyList(gen({}))
2832 del f['is_from_start']
adbc4ec4 2833
4d37720a 2834 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2835 FETCH_SPAN, MAX_DURATION = 5, 432000
2836
2837 mpd_url, stream_number, is_live = None, None, True
2838
2839 begin_index = 0
2840 download_start_time = ctx.get('start') or time.time()
2841
2842 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2843 if lack_early_segments:
2844 self.report_warning(bug_reports_message(
2845 'Starting download from the last 120 hours of the live stream since '
2846 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2847 lack_early_segments = True
2848
2849 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2850 fragments, fragment_base_url = None, None
2851
a539f065 2852 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2853 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2854 # Obtain from MPD's maximum seq value
2855 old_mpd_url = mpd_url
185bf310 2856 last_error = ctx.pop('last_error', None)
3d2623a8 2857 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
185bf310 2858 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2859 or (mpd_url, stream_number, False))
2860 if not refresh_sequence:
2861 if expire_fast and not is_live:
2862 return False, last_seq
2863 elif old_mpd_url == mpd_url:
2864 return True, last_seq
4d37720a
L
2865 if manifestless_orig_fmt:
2866 fmt_info = manifestless_orig_fmt
2867 else:
2868 try:
2869 fmts, _ = self._extract_mpd_formats_and_subtitles(
2870 mpd_url, None, note=False, errnote=False, fatal=False)
2871 except ExtractorError:
2872 fmts = None
2873 if not fmts:
2874 no_fragment_score += 2
2875 return False, last_seq
2876 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2877 fragments = fmt_info['fragments']
2878 fragment_base_url = fmt_info['fragment_base_url']
2879 assert fragment_base_url
2880
2881 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2882 return True, _last_seq
2883
4d37720a 2884 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2885 while is_live:
2886 fetch_time = time.time()
2887 if no_fragment_score > 30:
2888 return
2889 if last_segment_url:
2890 # Obtain from "X-Head-Seqnum" header value from each segment
2891 try:
2892 urlh = self._request_webpage(
2893 last_segment_url, None, note=False, errnote=False, fatal=False)
2894 except ExtractorError:
2895 urlh = None
2896 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2897 if last_seq is None:
a539f065 2898 no_fragment_score += 2
adbc4ec4
THD
2899 last_segment_url = None
2900 continue
2901 else:
a539f065
LNO
2902 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2903 no_fragment_score += 2
185bf310 2904 if not should_continue:
adbc4ec4
THD
2905 continue
2906
2907 if known_idx > last_seq:
2908 last_segment_url = None
2909 continue
2910
2911 last_seq += 1
2912
2913 if begin_index < 0 and known_idx < 0:
2914 # skip from the start when it's negative value
2915 known_idx = last_seq + begin_index
2916 if lack_early_segments:
2917 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2918 try:
2919 for idx in range(known_idx, last_seq):
2920 # do not update sequence here or you'll get skipped some part of it
a539f065 2921 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2922 if not should_continue:
adbc4ec4
THD
2923 known_idx = idx - 1
2924 raise ExtractorError('breaking out of outer loop')
add96eb9 2925 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
adbc4ec4
THD
2926 yield {
2927 'url': last_segment_url,
36195c44 2928 'fragment_count': last_seq,
adbc4ec4
THD
2929 }
2930 if known_idx == last_seq:
2931 no_fragment_score += 5
2932 else:
2933 no_fragment_score = 0
2934 known_idx = last_seq
2935 except ExtractorError:
2936 continue
2937
4d37720a
L
2938 if manifestless_orig_fmt:
2939 # Stop at the first iteration if running for post-live manifestless;
2940 # fragment count no longer increase since it starts
2941 break
2942
adbc4ec4
THD
2943 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2944
b6de707d 2945 def _extract_player_url(self, *ytcfgs, webpage=None):
2946 player_url = traverse_obj(
2947 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2948 get_all=False, expected_type=str)
11f9be09 2949 if not player_url:
b6de707d 2950 return
60f393e4 2951 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2952
b6de707d 2953 def _download_player_url(self, video_id, fatal=False):
2954 res = self._download_webpage(
2955 'https://www.youtube.com/iframe_api',
2956 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2957 if res:
2958 player_version = self._search_regex(
2959 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2960 if player_version:
2961 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2962
60064c53
PH
2963 def _signature_cache_id(self, example_sig):
2964 """ Return a string representation of a signature """
14f25df2 2965 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2966
e40c758c
S
2967 @classmethod
2968 def _extract_player_info(cls, player_url):
2969 for player_re in cls._PLAYER_INFO_RE:
2970 id_m = re.search(player_re, player_url)
2971 if id_m:
2972 break
2973 else:
add96eb9 2974 raise ExtractorError(f'Cannot identify player {player_url!r}')
545cc85d 2975 return id_m.group('id')
e40c758c 2976
404f611f 2977 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2978 player_id = self._extract_player_info(player_url)
2979 if player_id not in self._code_cache:
1276a43a 2980 code = self._download_webpage(
109dd3b2 2981 player_url, video_id, fatal=fatal,
2982 note='Downloading player ' + player_id,
add96eb9 2983 errnote=f'Download of {player_url} failed')
1276a43a 2984 if code:
2985 self._code_cache[player_id] = code
404f611f 2986 return self._code_cache.get(player_id)
109dd3b2 2987
e40c758c 2988 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2989 player_id = self._extract_player_info(player_url)
e0df6211 2990
c4417ddb 2991 # Read from filesystem cache
86e5f3ed 2992 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2993 assert os.path.basename(func_id) == func_id
a0e07d31 2994
ae61d108 2995 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2996 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2997
580ce007 2998 if not cache_spec:
2999 code = self._load_player(video_id, player_url)
404f611f 3000 if code:
109dd3b2 3001 res = self._parse_sig_js(code)
ac668111 3002 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 3003 cache_spec = [ord(c) for c in res(test_string)]
9809740b 3004 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 3005
3006 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 3007
60064c53 3008 def _print_sig_code(self, func, example_sig):
404f611f 3009 if not self.get_param('youtube_print_sig_code'):
3010 return
3011
edf3e38e
PH
3012 def gen_sig_code(idxs):
3013 def _genslice(start, end, step):
78caa52a 3014 starts = '' if start == 0 else str(start)
8bcc8756 3015 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 3016 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 3017 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
3018
3019 step = None
7af808a5
PH
3020 # Quelch pyflakes warnings - start will be set when step is set
3021 start = '(Never used)'
edf3e38e
PH
3022 for i, prev in zip(idxs[1:], idxs[:-1]):
3023 if step is not None:
3024 if i - prev == step:
3025 continue
3026 yield _genslice(start, prev, step)
3027 step = None
3028 continue
3029 if i - prev in [-1, 1]:
3030 step = i - prev
3031 start = prev
3032 continue
3033 else:
78caa52a 3034 yield 's[%d]' % prev
edf3e38e 3035 if step is None:
78caa52a 3036 yield 's[%d]' % i
edf3e38e
PH
3037 else:
3038 yield _genslice(start, i, step)
3039
ac668111 3040 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 3041 cache_res = func(test_string)
edf3e38e 3042 cache_spec = [ord(c) for c in cache_res]
78caa52a 3043 expr_code = ' + '.join(gen_sig_code(cache_spec))
add96eb9 3044 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3045 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3046 f' return {expr_code}\n')
69ea8ca4 3047 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 3048
e0df6211
PH
3049 def _parse_sig_js(self, jscode):
3050 funcname = self._search_regex(
abefc03f
S
3051 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3052 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
3053 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3054 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
4823ec9f 3055 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
31ce6e99 3056 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f 3057 # Obsolete patterns
4823ec9f 3058 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 3059 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
3060 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3061 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3062 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f 3063 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 3064 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
3065
3066 jsi = JSInterpreter(jscode)
3067 initial_function = jsi.extract_function(funcname)
e0df6211
PH
3068 return lambda s: initial_function([s])
3069
580ce007 3070 def _cached(self, func, *cache_id):
3071 def inner(*args, **kwargs):
3072 if cache_id not in self._player_cache:
3073 try:
3074 self._player_cache[cache_id] = func(*args, **kwargs)
3075 except ExtractorError as e:
3076 self._player_cache[cache_id] = e
3077 except Exception as e:
3078 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3079
3080 ret = self._player_cache[cache_id]
3081 if isinstance(ret, Exception):
3082 raise ret
3083 return ret
3084 return inner
3085
545cc85d 3086 def _decrypt_signature(self, s, video_id, player_url):
257a2501 3087 """Turn the encrypted s field into a working signature"""
580ce007 3088 extract_sig = self._cached(
3089 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3090 func = extract_sig(video_id, player_url, s)
3091 self._print_sig_code(func, s)
3092 return func(s)
404f611f 3093
3094 def _decrypt_nsig(self, s, video_id, player_url):
3095 """Turn the encrypted n field into a working signature"""
3096 if player_url is None:
3097 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 3098 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 3099
b505e851 3100 try:
3101 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3102 except ExtractorError as e:
3103 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 3104 if self.get_param('youtube_print_sig_code'):
3105 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 3106
25836db6 3107 try:
3108 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3109 ret = extract_nsig(jsi, func_code)(s)
3110 except JSInterpreter.Exception as e:
3111 try:
992dc6b4 3112 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 3113 except ExtractorError:
3114 raise e
3115 self.report_warning(
3116 f'Native nsig extraction failed: Trying with PhantomJS\n'
3117 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 3118 self.write_debug(e, only_once=True)
25836db6 3119
3120 args, func_body = func_code
3121 ret = jsi.execute(
3122 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3123 video_id=video_id, note='Executing signature code').strip()
580ce007 3124
3125 self.write_debug(f'Decrypted nsig {s} => {ret}')
3126 return ret
3127
90a1df30 3128 def _extract_n_function_name(self, jscode):
3129 funcname, idx = self._search_regex(
3130 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3131 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3132 if not idx:
3133 return funcname
3134
3135 return json.loads(js_to_json(self._search_regex(
337734d4 3136 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
90a1df30 3137 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3138
580ce007 3139 def _extract_n_function_code(self, video_id, player_url):
404f611f 3140 player_id = self._extract_player_info(player_url)
05deb747 3141 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3142 jscode = func_code or self._load_player(video_id, player_url)
3143 jsi = JSInterpreter(jscode)
404f611f 3144
3145 if func_code:
580ce007 3146 return jsi, player_id, func_code
404f611f 3147
b505e851 3148 func_name = self._extract_n_function_name(jscode)
3149
3150 # For redundancy
3151 func_code = self._search_regex(
add96eb9 3152 rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
b505e851 3153 # NB: The end of the regex is intentionally kept strict
add96eb9 3154 {{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''',
b505e851 3155 jscode, 'nsig function', group=('var', 'code'), default=None)
3156 if func_code:
3157 func_code = ([func_code[0]], func_code[1])
3158 else:
3159 self.write_debug('Extracting nsig function with jsinterp')
3160 func_code = jsi.extract_function_code(func_name)
3161
580ce007 3162 self.cache.store('youtube-nsig', player_id, func_code)
3163 return jsi, player_id, func_code
3164
3165 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3166 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3167
580ce007 3168 def extract_nsig(s):
25836db6 3169 try:
3170 ret = func([s])
3171 except JSInterpreter.Exception:
3172 raise
3173 except Exception as e:
3174 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3175
f6ca640b 3176 if ret.startswith('enhanced_except_'):
25836db6 3177 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3178 return ret
580ce007 3179
3180 return extract_nsig
e0df6211 3181
109dd3b2 3182 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3183 """
3184 Extract signatureTimestamp (sts)
3185 Required to tell API what sig/player version is in use.
3186 """
3187 sts = None
3188 if isinstance(ytcfg, dict):
3189 sts = int_or_none(ytcfg.get('STS'))
3190
3191 if not sts:
3192 # Attempt to extract from player
3193 if player_url is None:
3194 error_msg = 'Cannot extract signature timestamp without player_url.'
3195 if fatal:
3196 raise ExtractorError(error_msg)
3197 self.report_warning(error_msg)
3198 return
404f611f 3199 code = self._load_player(video_id, player_url, fatal=fatal)
3200 if code:
109dd3b2 3201 sts = int_or_none(self._search_regex(
3202 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3203 'JS player signature timestamp', group='sts', fatal=fatal))
3204 return sts
3205
11f9be09 3206 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3207 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3208 label = 'fully ' if is_full else ''
3209 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3210 expected_type=url_or_none)
3211 if not url:
3212 self.report_warning(f'Unable to mark {label}watched')
3213 return
14f25df2 3214 parsed_url = urllib.parse.urlparse(url)
3215 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3216
3217 # cpn generation algorithm is reverse engineered from base.js.
3218 # In fact it works even with dummy cpn.
3219 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
add96eb9 3220 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
06cc8f10
B
3221
3222 # # more consistent results setting it to right before the end
3223 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3224
3225 qs.update({
3226 'ver': ['2'],
3227 'cpn': [cpn],
3228 'cmt': video_length,
3229 'el': 'detailpage', # otherwise defaults to "shorts"
3230 })
3231
3232 if is_full:
3233 # these seem to mark watchtime "history" in the real world
3234 # they're required, so send in a single value
3235 qs.update({
5318156f 3236 'st': 0,
06cc8f10
B
3237 'et': video_length,
3238 })
3239
14f25df2 3240 url = urllib.parse.urlunparse(
3241 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3242
3243 self._download_webpage(
3244 url, video_id, f'Marking {label}watched',
3245 'Unable to mark watched', fatal=False)
d77ab8e2 3246
bfd973ec 3247 @classmethod
3248 def _extract_from_webpage(cls, url, webpage):
3249 # Invidious Instances
3250 # https://github.com/yt-dlp/yt-dlp/issues/195
3251 # https://github.com/iv-org/invidious/pull/1730
3252 mobj = re.search(
3253 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3254 webpage)
3255 if mobj:
3256 yield cls.url_result(mobj.group('url'), cls)
add96eb9 3257 raise cls.StopExtraction
bfd973ec 3258
3259 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3260
3261 # lazyYT YouTube embed
bfd973ec 3262 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3263 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3264
3265 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3266 for m in re.findall(r'''(?x)<div[^>]+
3267 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3268 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3269 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3270
97665381
PH
3271 @classmethod
3272 def extract_id(cls, url):
ae61d108 3273 video_id = cls.get_temp_id(url)
3274 if not video_id:
3275 raise ExtractorError(f'Invalid URL: {url}')
3276 return video_id
c5e8d7af 3277
7c365c21 3278 def _extract_chapters_from_json(self, data, duration):
3279 chapter_list = traverse_obj(
3280 data, (
3281 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
add96eb9 3282 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
7c365c21 3283 ), expected_type=list)
3284
22ccd542 3285 return self._extract_chapters_helper(
7c365c21 3286 chapter_list,
22ccd542 3287 start_function=lambda chapter: float_or_none(
7c365c21 3288 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
22ccd542 3289 title_function=lambda chapter: traverse_obj(
7c365c21 3290 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3291 duration=duration)
3292
3293 def _extract_chapters_from_engagement_panel(self, data, duration):
3294 content_list = traverse_obj(
8bdd16b4 3295 data,
7c365c21 3296 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3297 expected_type=list)
052e1350 3298 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3299 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3300
1890fc63 3301 return next(filter(None, (
22ccd542 3302 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3303 chapter_time, chapter_title, duration)
1890fc63 3304 for contents in content_list)), [])
7c365c21 3305
03e85ea9 3306 def _extract_heatmap(self, data):
3307 return traverse_obj(data, (
3308 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3309 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3310 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3311 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3312 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3313 'value': ('intensityScoreNormalized', {float_or_none}),
3314 })) or None
5caf30db 3315
8e15177b
JK
3316 def _extract_comment(self, entities, parent=None):
3317 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3318 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3319 return
3320
3321 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3322 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3323
3324 return {
3325 'id': comment_id,
3326 'parent': parent or 'root',
3327 **traverse_obj(comment_entity_payload, {
3328 'text': ('properties', 'content', 'content', {str}),
3329 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3330 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3331 'author': ('author', 'displayName', {str}),
3332 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3333 'author_is_uploader': ('author', 'isCreator', {bool}),
3334 'author_is_verified': ('author', 'isVerified', {bool}),
3335 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
add96eb9 3336 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
8e15177b
JK
3337 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3338 }, get_all=False),
3339 'is_favorited': (None if toolbar_entity_payload is None else
3340 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3341 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3342 'timestamp': self._parse_time_text(time_text),
3343 }
3344
3345 def _extract_comment_old(self, comment_renderer, parent=None):
a1c5d2ca
M
3346 comment_id = comment_renderer.get('commentId')
3347 if not comment_id:
3348 return
fe93e2c4 3349
c35448b7 3350 info = {
3351 'id': comment_id,
3352 'text': self._get_text(comment_renderer, 'contentText'),
3353 'like_count': self._get_count(comment_renderer, 'voteCount'),
3354 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3355 'author': self._get_text(comment_renderer, 'authorText'),
3356 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3357 'parent': parent or 'root',
3358 }
fe93e2c4 3359
c26f9b99 3360 # Timestamp is an estimate calculated from the current time and time_text
3361 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3362 timestamp = self._parse_time_text(time_text)
3363
c35448b7 3364 info.update({
3365 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3366 '_time_text': time_text,
3367 'timestamp': timestamp,
3368 })
fe93e2c4 3369
c35448b7 3370 info['author_url'] = urljoin(
3371 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3372 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3373 expected_type=str, get_all=False))
a1c5d2ca 3374
c35448b7 3375 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3376 if author_is_uploader is not None:
3377 info['author_is_uploader'] = author_is_uploader
3378
3379 comment_abr = traverse_obj(
89bed013 3380 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
c35448b7 3381 if comment_abr is not None:
3382 info['is_favorited'] = 'creatorHeart' in comment_abr
3383
14a14335 3384 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3385 if self._has_badge(badges, BadgeType.VERIFIED):
3386 info['author_is_verified'] = True
c35448b7 3387
3388 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3389 if is_pinned:
3390 info['is_pinned'] = True
3391
3392 return info
a1c5d2ca 3393
46383212 3394 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3395
3396 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3397
3398 def extract_header(contents):
2d6659b9 3399 _continuation = None
3400 for content in contents:
46383212 3401 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3402 expected_comment_count = self._get_count(
3403 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3404
18f8fba7 3405 if expected_comment_count is not None:
46383212 3406 tracker['est_total'] = expected_comment_count
3407 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3408 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3409
3410 sort_menu_item = try_get(
3411 comments_header_renderer,
3412 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3413 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3414
3415 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3416 if not _continuation:
3417 continue
3418
46383212 3419 sort_text = str_or_none(sort_menu_item.get('title'))
3420 if not sort_text:
2d6659b9 3421 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
add96eb9 3422 self.to_screen(f'Sorting comments by {sort_text.lower()}')
2d6659b9 3423 break
a2160aa4 3424 return _continuation
a1c5d2ca 3425
8e15177b 3426 def extract_thread(contents, entity_payloads):
a1c5d2ca 3427 if not parent:
46383212 3428 tracker['current_page_thread'] = 0
a1c5d2ca 3429 for content in contents:
46383212 3430 if not parent and tracker['total_parent_comments'] >= max_parents:
3431 yield
a1c5d2ca 3432 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
a1c5d2ca 3433
8e15177b
JK
3434 # old comment format
3435 if not entity_payloads:
3436 comment_renderer = get_first(
3437 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3438 expected_type=dict, default={})
3439
3440 comment = self._extract_comment_old(comment_renderer, parent)
3441
3442 # new comment format
3443 else:
3444 view_model = (
3445 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3446 or traverse_obj(content, ('commentViewModel', {dict})))
3447 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3448 if not comment_keys:
3449 continue
3450 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3451 comment = self._extract_comment(entities, parent)
3452 if comment:
3453 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3454
a1c5d2ca
M
3455 if not comment:
3456 continue
141a8dff 3457 comment_id = comment['id']
8e15177b 3458
c35448b7 3459 if comment.get('is_pinned'):
141a8dff 3460 tracker['pinned_comment_ids'].add(comment_id)
7f51861b 3461 # Sometimes YouTube may break and give us infinite looping comments.
3462 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
141a8dff 3463 if comment_id in tracker['seen_comment_ids']:
c35448b7 3464 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
141a8dff 3465 # Pinned comments may appear a second time in newest first sort
3466 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3467 continue
4dc4d847 3468 self.report_warning(
3469 'Detected YouTube comments looping. Stopping comment extraction '
3470 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
7f51861b 3471 yield
3472 else:
3473 tracker['seen_comment_ids'].add(comment['id'])
46383212 3474
3475 tracker['running_total'] += 1
3476 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3477 yield comment
46383212 3478
a1c5d2ca
M
3479 # Attempt to get the replies
3480 comment_replies_renderer = try_get(
3481 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3482
3483 if comment_replies_renderer:
46383212 3484 tracker['current_page_thread'] += 1
a1c5d2ca 3485 comment_entries_iter = self._comment_entries(
99e9e001 3486 comment_replies_renderer, ytcfg, video_id,
46383212 3487 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3488 yield from itertools.islice(comment_entries_iter, min(
3489 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3490
46383212 3491 # Keeps track of counts across recursive calls
3492 if not tracker:
add96eb9 3493 tracker = {
3494 'running_total': 0,
3495 'est_total': None,
3496 'current_page_thread': 0,
3497 'total_parent_comments': 0,
3498 'total_reply_comments': 0,
3499 'seen_comment_ids': set(),
3500 'pinned_comment_ids': set(),
3501 }
46383212 3502
3503 # TODO: Deprecated
2d6659b9 3504 # YouTube comments have a max depth of 2
46383212 3505 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3506 if max_depth:
da4db748 3507 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3508 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3509 if max_depth == 1 and parent:
3510 return
a1c5d2ca 3511
add96eb9 3512 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3513 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
2d6659b9 3514
46383212 3515 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3516
46383212 3517 response = None
6e634cbe 3518 is_forced_continuation = False
2d6659b9 3519 is_first_continuation = parent is None
6e634cbe 3520 if is_first_continuation and not continuation:
3521 # Sometimes you can get comments by generating the continuation yourself,
3522 # even if YouTube initially reports them being disabled - e.g. stories comments.
3523 # Note: if the comment section is actually disabled, YouTube may return a response with
3524 # required check_get_keys missing. So we will disable that check initially in this case.
3525 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3526 is_forced_continuation = True
a1c5d2ca 3527
18f8fba7 3528 continuation_items_path = (
3529 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
a1c5d2ca
M
3530 for page_num in itertools.count(0):
3531 if not continuation:
3532 break
46383212 3533 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
18f8fba7 3534 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
2d6659b9 3535 if page_num == 0:
3536 if is_first_continuation:
3537 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3538 else:
2d6659b9 3539 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3540 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3541 else:
add96eb9 3542 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
2d6659b9 3543 ' ' if parent else '', ' replies' if parent else '',
3544 page_num, comment_prog_str)
18f8fba7 3545
3546 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3547 # Ignore check if YouTube says the comment count is 0.
3548 check_get_keys = None
3549 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3550 check_get_keys = [[*continuation_items_path, ..., (
8e15177b 3551 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
e72e48c5
M
3552 try:
3553 response = self._extract_response(
3554 item_id=None, query=continuation,
3555 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
18f8fba7 3556 check_get_keys=check_get_keys)
e72e48c5
M
3557 except ExtractorError as e:
3558 # Ignore incomplete data error for replies if retries didn't work.
3559 # This is to allow any other parent comments and comment threads to be downloaded.
3560 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
4dc4d847 3561 if 'incomplete data' in str(e).lower() and parent:
3562 if self.get_param('ignoreerrors') in (True, 'only_download'):
3563 self.report_warning(
3564 'Received incomplete data for a comment reply thread and retrying did not help. '
3565 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3566 return
3567 else:
3568 raise ExtractorError(
3569 'Incomplete data received for comment reply thread. '
3570 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3571 expected=True)
3572 raise
6e634cbe 3573 is_forced_continuation = False
2d6659b9 3574 continuation = None
8e15177b 3575 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
18f8fba7 3576 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
46383212 3577 if is_first_continuation:
3578 continuation = extract_header(continuation_items)
3579 is_first_continuation = False
2d6659b9 3580 if continuation:
a1c5d2ca 3581 break
46383212 3582 continue
a1c5d2ca 3583
8e15177b 3584 for entry in extract_thread(continuation_items, mutations):
46383212 3585 if not entry:
3586 return
3587 yield entry
3588 continuation = self._extract_continuation({'contents': continuation_items})
3589 if continuation:
2d6659b9 3590 break
a1c5d2ca 3591
6e634cbe 3592 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3593 if message and not parent and tracker['running_total'] == 0:
3594 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3595 raise self.CommentsDisabled
6e634cbe 3596
3597 @staticmethod
3598 def _generate_comment_continuation(video_id):
3599 """
3600 Generates initial comment section continuation token from given video id
3601 """
3602 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3603 return base64.b64encode(token.encode()).decode()
3604
a2160aa4 3605 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3606 """Entry for comment extraction"""
2d6659b9 3607 def _real_comment_extract(contents):
aae16f6e 3608 renderer = next((
3609 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3610 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3611 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3612
a2160aa4 3613 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3614 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3615
109dd3b2 3616 @staticmethod
99e9e001 3617 def _get_checkok_params():
3618 return {'contentCheckOk': True, 'racyCheckOk': True}
3619
3620 @classmethod
3621 def _generate_player_context(cls, sts=None):
109dd3b2 3622 context = {
3623 'html5Preference': 'HTML5_PREF_WANTS',
3624 }
3625 if sts is not None:
3626 context['signatureTimestamp'] = sts
3627 return {
3628 'playbackContext': {
add96eb9 3629 'contentPlaybackContext': context,
a1a7907b 3630 },
add96eb9 3631 **cls._get_checkok_params(),
109dd3b2 3632 }
3633
e7e94f2a
D
3634 @staticmethod
3635 def _is_agegated(player_response):
3636 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3637 return True
e7e94f2a 3638
6839ae1f 3639 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3640 AGE_GATE_REASONS = (
3641 'confirm your age', 'age-restricted', 'inappropriate', # reason
3642 'age_verification_required', 'age_check_required', # status
3643 )
3644 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3645
3646 @staticmethod
3647 def _is_unplayable(player_response):
3648 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3649
50ac0e54 3650 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3651
11f9be09 3652 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3653 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3654 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3655 headers = self.generate_api_headers(
99e9e001 3656 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3657
6e634cbe 3658 yt_query = {
3659 'videoId': video_id,
6e634cbe 3660 }
ba06d77a 3661
546b2c28 3662 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 3663 if pp_arg:
3664 yt_query['params'] = pp_arg
50ac0e54 3665
11f9be09 3666 yt_query.update(self._generate_player_context(sts))
3667 return self._extract_response(
3668 item_id=video_id, ep='player', query=yt_query,
379e44ed 3669 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3670 default_client=client,
add96eb9 3671 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
11f9be09 3672 ) or None
3673
11f9be09 3674 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3675 requested_clients = []
12d8ea82 3676 android_clients = []
3677 default = ['ios', 'web']
000c15a4 3678 allowed_clients = sorted(
add96eb9 3679 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
000c15a4 3680 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3681 for client in self._configuration_arg('player_client'):
12d8ea82 3682 if client == 'default':
d0d012d4 3683 requested_clients.extend(default)
b4c055ba 3684 elif client == 'all':
3685 requested_clients.extend(allowed_clients)
12d8ea82 3686 elif client not in allowed_clients:
b4c055ba 3687 self.report_warning(f'Skipping unsupported client {client}')
12d8ea82 3688 elif client.startswith('android'):
3689 android_clients.append(client)
3690 else:
3691 requested_clients.append(client)
3692 # Force deprioritization of broken Android clients for format de-duplication
3693 requested_clients.extend(android_clients)
11f9be09 3694 if not requested_clients:
d0d012d4 3695 requested_clients = default
cf7e015f 3696
11f9be09 3697 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3698 requested_clients.extend(
e7e94f2a 3699 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3700
11f9be09 3701 return orderedSet(requested_clients)
cf7e015f 3702
5eedc208
SS
3703 def _invalid_player_response(self, pr, video_id):
3704 # YouTube may return a different video player response than expected.
3705 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3706 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3707 return pr_id
3708
50ac0e54 3709 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3710 initial_pr = None
3711 if webpage:
b7c47b74 3712 initial_pr = self._search_json(
3713 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3714
5eedc208
SS
3715 prs = []
3716 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3717 # Android player_response does not have microFormats which are needed for
3718 # extraction of some data. So we return the initial_pr with formats
3719 # stripped out even if not requested by the user
3720 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3721 prs.append({**initial_pr, 'streamingData': None})
3722
ae729626 3723 all_clients = set(clients)
c0bc527b 3724 clients = clients[::-1]
e7e94f2a 3725
ae729626 3726 def append_client(*client_names):
e7870111 3727 """ Append the first client name that exists but not already used """
ae729626 3728 for client_name in client_names:
e7870111
D
3729 actual_client = _split_innertube_client(client_name)[0]
3730 if actual_client in INNERTUBE_CLIENTS:
3731 if actual_client not in all_clients:
ae729626 3732 clients.append(client_name)
e7870111
D
3733 all_clients.add(actual_client)
3734 return
e7e94f2a 3735
b6de707d 3736 tried_iframe_fallback = False
3737 player_url = None
5eedc208 3738 skipped_clients = {}
c0bc527b 3739 while clients:
e7870111 3740 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3741 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3742 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3743 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3744
b6de707d 3745 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3746 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3747 if 'js' in self._configuration_arg('player_skip'):
3748 require_js_player = False
3749 player_url = None
3750
3751 if not player_url and not tried_iframe_fallback and require_js_player:
3752 player_url = self._download_player_url(video_id)
3753 tried_iframe_fallback = True
3754
379e44ed 3755 try:
3756 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3757 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3758 except ExtractorError as e:
5eedc208 3759 self.report_warning(e)
379e44ed 3760 continue
3761
5eedc208
SS
3762 if pr_id := self._invalid_player_response(pr, video_id):
3763 skipped_clients[client] = pr_id
3764 elif pr:
3765 # Save client name for introspection later
3766 name = short_client_name(client)
3767 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3768 sd[STREAMING_DATA_CLIENT_NAME] = name
3769 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3770 f[STREAMING_DATA_CLIENT_NAME] = name
3771 prs.append(pr)
c0bc527b 3772
e7e94f2a 3773 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3774 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3775 append_client(f'{base_client}_creator')
e7e94f2a 3776 elif self._is_agegated(pr):
e7870111
D
3777 if variant == 'tv_embedded':
3778 append_client(f'{base_client}_embedded')
3779 elif not variant:
3780 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3781
5eedc208
SS
3782 if skipped_clients:
3783 self.report_warning(
3784 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3785 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3786 if not prs:
3787 raise ExtractorError(
3788 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3789 elif not prs:
3790 raise ExtractorError('Failed to extract any player response')
b6de707d 3791 return prs, player_url
11f9be09 3792
4d37720a
L
3793 def _needs_live_processing(self, live_status, duration):
3794 if (live_status == 'is_live' and self.get_param('live_from_start')
d949c10c 3795 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
4d37720a
L
3796 return live_status
3797
3798 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
e389d172 3799 CHUNK_SIZE = 10 << 20
9bd85019
H
3800 PREFERRED_LANG_VALUE = 10
3801 original_language = None
a4894d3e 3802 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3803 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3804 q = qualities([
2a9c6dcd 3805 # Normally tiny is the smallest video-only formats. But
3806 # audio-only formats with unknown quality may get tagged as tiny
3807 'tiny',
3808 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
add96eb9 3809 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
d3fc8074 3810 ])
6839ae1f 3811 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
58786a10 3812 format_types = self._configuration_arg('formats')
3813 all_formats = 'duplicate' in format_types
3814 if self._configuration_arg('include_duplicate_formats'):
3815 all_formats = True
3816 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3817 'Use formats=duplicate extractor argument instead')
9297939e 3818
e389d172 3819 def build_fragments(f):
3820 return LazyList({
3821 'url': update_url_query(f['url'], {
add96eb9 3822 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3823 }),
e389d172 3824 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3825
545cc85d 3826 for fmt in streaming_formats:
727029c5 3827 if fmt.get('targetDurationSec'):
545cc85d 3828 continue
321bf820 3829
cc2db878 3830 itag = str_or_none(fmt.get('itag'))
9297939e 3831 audio_track = fmt.get('audioTrack') or {}
9bb85699 3832 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
86cb9221 3833 if not all_formats:
3834 if stream_id in stream_ids:
3835 continue
9297939e 3836
cc2db878 3837 quality = fmt.get('quality')
2a9c6dcd 3838 height = int_or_none(fmt.get('height'))
d3fc8074 3839 if quality == 'tiny' or not quality:
3840 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3841 # The 3gp format (17) in android client has a quality of "small",
3842 # but is actually worse than other formats
3843 if itag == '17':
3844 quality = 'tiny'
3845 if quality:
3846 if itag:
3847 itag_qualities[itag] = quality
3848 if height:
3849 res_qualities[height] = quality
800ec085 3850
3851 is_default = audio_track.get('audioIsDefault')
3852 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3853 language_code = audio_track.get('id', '').split('.')[0]
3854 if language_code and is_default:
3855 original_language = language_code
3856
cc2db878 3857 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3858 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3859 # number of fragment that would subsequently requested with (`&sq=N`)
3860 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3861 continue
3862
545cc85d 3863 fmt_url = fmt.get('url')
3864 if not fmt_url:
14f25df2 3865 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3866 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3867 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3868 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3869 continue
52023f12 3870 try:
add96eb9 3871 fmt_url += '&{}={}'.format(
52023f12 3872 traverse_obj(sc, ('sp', -1)) or 'signature',
add96eb9 3873 self._decrypt_signature(encrypted_sig, video_id, player_url),
52023f12 3874 )
3875 except ExtractorError as e:
580ce007 3876 self.report_warning('Signature extraction failed: Some formats may be missing',
3877 video_id=video_id, only_once=True)
52023f12 3878 self.write_debug(e, only_once=True)
201e9eaa 3879 continue
545cc85d 3880
404f611f 3881 query = parse_qs(fmt_url)
b2916526 3882 if query.get('n'):
404f611f 3883 try:
580ce007 3884 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3885 fmt_url = update_url_query(fmt_url, {
add96eb9 3886 'n': decrypt_nsig(query['n'][0], video_id, player_url),
580ce007 3887 })
404f611f 3888 except ExtractorError as e:
25836db6 3889 phantomjs_hint = ''
3890 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3891 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3892 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3893 if player_url:
3894 self.report_warning(
800ec085 3895 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
17ffed18 3896 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3897 self.write_debug(e, only_once=True)
3898 else:
3899 self.report_warning(
800ec085 3900 'Cannot decrypt nsig without player_url: Some formats may be missing',
17ffed18 3901 video_id=video_id, only_once=True)
800ec085 3902 continue
404f611f 3903
86e3b822 3904 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
a25a4243 3905 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
0ad92dfb 3906 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3907 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3908 # Make sure to avoid false positives with small duration differences.
62b58c09 3909 # E.g. __2ABJjxzNo, ySuUZEjARPY
a25a4243 3910 is_damaged = try_call(lambda: format_duration < duration // 2)
08d30158 3911 if is_damaged:
0f06bcd7 3912 self.report_warning(
3913 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
c795c39f
L
3914
3915 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
12d8ea82 3916 # Android client formats are broken due to integrity check enforcement
3917 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
3918 is_broken = client_name and client_name.startswith(short_client_name('android'))
3919 if is_broken:
3920 self.report_warning(
3921 f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
3922 'They will be deprioritized', only_once=True)
3923
51a07b0d 3924 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
97afb093 3925 fps = int_or_none(fmt.get('fps')) or 0
545cc85d 3926 dct = {
3927 'asr': int_or_none(fmt.get('audioSampleRate')),
3928 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3929 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3930 'format_note': join_nonempty(
9bd85019 3931 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
51a07b0d 3932 name, fmt.get('isDrc') and 'DRC',
a4166234 3933 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3934 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
800ec085 3935 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
86cb9221 3936 (self.get_param('verbose') or all_formats) and client_name,
c795c39f 3937 delim=', '),
91e5e839 3938 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
800ec085 3939 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
97afb093 3940 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
a4166234 3941 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3942 'height': height,
9bb85699 3943 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3944 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3945 'tbr': tbr,
a25a4243 3946 'filesize_approx': filesize_from_tbr(tbr, format_duration),
545cc85d 3947 'url': fmt_url,
2a9c6dcd 3948 'width': int_or_none(fmt.get('width')),
9bd85019
H
3949 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
3950 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
12d8ea82 3951 # Strictly de-prioritize broken, damaged and 3gp formats
3952 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3953 }
60bdb7bd 3954 mime_mobj = re.match(
3955 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3956 if mime_mobj:
3957 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3958 dct.update(parse_codecs(mime_mobj.group(2)))
86cb9221 3959 if itag:
3960 itags[itag].add(('https', dct.get('language')))
3961 stream_ids.append(stream_id)
c9abebb8 3962 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3963 if single_stream and dct.get('ext'):
3964 dct['container'] = dct['ext'] + '_dash'
86cb9221 3965
58786a10 3966 if (all_formats or 'dashy' in format_types) and dct['filesize']:
86cb9221 3967 yield {
3968 **dct,
3969 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
5038f6d7 3970 'protocol': 'http_dash_segments',
e389d172 3971 'fragments': build_fragments(dct),
86cb9221 3972 }
58786a10 3973 if all_formats or 'dashy' not in format_types:
3974 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3975 yield dct
545cc85d 3976
4d37720a 3977 needs_live_processing = self._needs_live_processing(live_status, duration)
58786a10 3978 skip_bad_formats = 'incomplete' not in format_types
3979 if self._configuration_arg('include_incomplete_formats'):
3980 skip_bad_formats = False
3981 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3982 'Use formats=incomplete extractor argument instead')
4d37720a
L
3983
3984 skip_manifests = set(self._configuration_arg('skip'))
3985 if (not self.get_param('youtube_include_hls_manifest', True)
3986 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3987 or needs_live_processing and skip_bad_formats):
3988 skip_manifests.add('hls')
3989
0f06bcd7 3990 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3991 skip_manifests.add('dash')
3992 if self._configuration_arg('include_live_dash'):
3993 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
58786a10 3994 'Use formats=incomplete extractor argument instead')
4d37720a
L
3995 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3996 skip_manifests.add('dash')
5d3a0e79 3997
c795c39f 3998 def process_manifest_format(f, proto, client_name, itag):
a4894d3e 3999 key = (proto, f.get('language'))
86cb9221 4000 if not all_formats and key in itags[itag]:
a4894d3e 4001 return False
4002 itags[itag].add(key)
4003
86cb9221 4004 if itag and all_formats:
4005 f['format_id'] = f'{itag}-{proto}'
4006 elif any(p != proto for p, _ in itags[itag]):
a4894d3e 4007 f['format_id'] = f'{itag}-{proto}'
4008 elif itag:
a0bb6ce5 4009 f['format_id'] = itag
a0bb6ce5 4010
9bd85019
H
4011 if original_language and f.get('language') == original_language:
4012 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4013 f['language_preference'] = PREFERRED_LANG_VALUE
4014
94ed638a 4015 if f.get('source_preference') is None:
4016 f['source_preference'] = -1
4017
1e75d97d 4018 if itag in ('616', '235'):
4019 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
94ed638a 4020 f['source_preference'] += 100
1e75d97d 4021
b25cac65 4022 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 4023 if f['quality'] == -1 and f.get('height'):
4024 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
ad54c913 4025 if self.get_param('verbose') or all_formats:
c795c39f 4026 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
97afb093 4027 if f.get('fps') and f['fps'] <= 1:
4028 del f['fps']
94ed638a 4029
4030 if proto == 'hls' and f.get('has_drm'):
4031 f['has_drm'] = 'maybe'
4032 f['source_preference'] -= 5
a0bb6ce5 4033 return True
2a9c6dcd 4034
c646d76f 4035 subtitles = {}
11f9be09 4036 for sd in streaming_data:
c795c39f
L
4037 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4038
4d37720a 4039 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 4040 if hls_manifest_url:
4d37720a
L
4041 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4042 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 4043 subtitles = self._merge_subtitles(subs, subtitles)
4044 for f in fmts:
c795c39f 4045 if process_manifest_format(f, 'hls', client_name, self._search_regex(
a0bb6ce5 4046 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4047 yield f
545cc85d 4048
4d37720a 4049 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 4050 if dash_manifest_url:
c646d76f 4051 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4052 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4053 for f in formats:
c795c39f 4054 if process_manifest_format(f, 'dash', client_name, f['format_id']):
a0bb6ce5 4055 f['filesize'] = int_or_none(self._search_regex(
4056 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 4057 if needs_live_processing:
adbc4ec4
THD
4058 f['is_from_start'] = True
4059
a0bb6ce5 4060 yield f
c646d76f 4061 yield subtitles
11f9be09 4062
720c3099 4063 def _extract_storyboard(self, player_responses, duration):
4064 spec = get_first(
4065 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 4066 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4067 if not base_url:
720c3099 4068 return
720c3099 4069 L = len(spec) - 1
4070 for i, args in enumerate(spec):
4071 args = args.split('#')
4072 counts = list(map(int_or_none, args[:5]))
4073 if len(args) != 8 or not all(counts):
4074 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4075 continue
4076 width, height, frame_count, cols, rows = counts
4077 N, sigh = args[6:]
4078
4079 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4080 fragment_count = frame_count / (cols * rows)
4081 fragment_duration = duration / fragment_count
4082 yield {
4083 'format_id': f'sb{i}',
4084 'format_note': 'storyboard',
4085 'ext': 'mhtml',
4086 'protocol': 'mhtml',
4087 'acodec': 'none',
4088 'vcodec': 'none',
4089 'url': url,
4090 'width': width,
4091 'height': height,
45e8a04e 4092 'fps': frame_count / duration,
4093 'rows': rows,
4094 'columns': cols,
720c3099 4095 'fragments': [{
b3edc806 4096 'url': url.replace('$M', str(j)),
720c3099 4097 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4098 } for j in range(math.ceil(fragment_count))],
4099 }
4100
adbc4ec4 4101 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 4102 webpage = None
4103 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 4104 query = {'bpctr': '9999999999', 'has_verified': '1'}
546b2c28 4105 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 4106 if pp:
4107 query['pp'] = pp
b6de707d 4108 webpage = self._download_webpage(
50ac0e54 4109 webpage_url, video_id, fatal=False, query=query)
11f9be09 4110
4111 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 4112
b6de707d 4113 player_responses, player_url = self._extract_player_responses(
11f9be09 4114 self._get_requested_clients(url, smuggled_data),
50ac0e54 4115 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 4116
adbc4ec4
THD
4117 return webpage, master_ytcfg, player_responses, player_url
4118
a1b2d843 4119 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
4120 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4121 is_live = get_first(video_details, 'isLive')
4122 if is_live is None:
4123 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
4124 live_content = get_first(video_details, 'isLiveContent')
4125 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
4126 post_live = get_first(video_details, 'isPostLiveDvr')
4127 live_status = ('post_live' if post_live
4128 else 'is_live' if is_live
4129 else 'is_upcoming' if is_upcoming
6678a4f0 4130 else 'was_live' if live_content
4131 else 'not_live' if False in (is_live, live_content)
4132 else None)
6839ae1f 4133 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 4134 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
94ed638a 4135 if all(f.get('has_drm') for f in formats):
4136 # If there are no formats that definitely don't have DRM, all have DRM
4137 for f in formats:
4138 f['has_drm'] = True
adbc4ec4 4139
4d37720a 4140 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
4141
4142 def _real_extract(self, url):
4143 url, smuggled_data = unsmuggle_url(url, {})
4144 video_id = self._match_id(url)
4145
4146 base_url = self.http_scheme() + '//www.youtube.com/'
4147 webpage_url = base_url + 'watch?v=' + video_id
4148
4149 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4150
11f9be09 4151 playability_statuses = traverse_obj(
6839ae1f 4152 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 4153
4154 trailer_video_id = get_first(
4155 playability_statuses,
4156 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4157 expected_type=str)
4158 if trailer_video_id:
4159 return self.url_result(
4160 trailer_video_id, self.ie_key(), trailer_video_id)
4161
4162 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4163 if webpage else (lambda x: None))
4164
6839ae1f 4165 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 4166 microformats = traverse_obj(
4167 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 4168 expected_type=dict)
c26f9b99 4169
4170 translated_title = self._get_text(microformats, (..., 'title'))
4171 video_title = (self._preferred_lang and translated_title
4172 or get_first(video_details, 'title') # primary
4173 or translated_title
4174 or search_meta(['og:title', 'twitter:title', 'title']))
4175 translated_description = self._get_text(microformats, (..., 'description'))
4176 original_description = get_first(video_details, 'shortDescription')
4177 video_description = (
4178 self._preferred_lang and translated_description
4179 # If original description is blank, it will be an empty string.
4180 # Do not prefer translated description in this case.
4181 or original_description if original_description is not None else translated_description)
11f9be09 4182
d89257f3 4183 multifeed_metadata_list = get_first(
4184 player_responses,
4185 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4186 expected_type=str)
4187 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4188 if self.get_param('noplaylist'):
add96eb9 4189 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
d89257f3 4190 else:
4191 entries = []
4192 feed_ids = []
4193 for feed in multifeed_metadata_list.split(','):
4194 # Unquote should take place before split on comma (,) since textual
4195 # fields may contain comma as well (see
4196 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 4197 feed_data = urllib.parse.parse_qs(
ac668111 4198 urllib.parse.unquote_plus(feed))
d89257f3 4199
4200 def feed_entry(name):
4201 return try_get(
14f25df2 4202 feed_data, lambda x: x[name][0], str)
d89257f3 4203
4204 feed_id = feed_entry('id')
4205 if not feed_id:
4206 continue
4207 feed_title = feed_entry('title')
4208 title = video_title
4209 if feed_title:
add96eb9 4210 title += f' ({feed_title})'
d89257f3 4211 entries.append({
4212 '_type': 'url_transparent',
4213 'ie_key': 'Youtube',
4214 'url': smuggle_url(
add96eb9 4215 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
d89257f3 4216 {'force_singlefeed': True}),
4217 'title': title,
4218 })
4219 feed_ids.append(feed_id)
4220 self.to_screen(
add96eb9 4221 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4222 ', '.join(feed_ids), video_id))
d89257f3 4223 return self.playlist_result(
4224 entries, video_id, video_title, video_description)
11f9be09 4225
9da6612b 4226 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4227 or int_or_none(get_first(microformats, 'lengthSeconds'))
4228 or parse_duration(search_meta('duration')) or None)
a1b2d843 4229
4d37720a
L
4230 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4231 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4232 if live_status == 'post_live':
4233 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 4234
545cc85d 4235 if not formats:
11f9be09 4236 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 4237 self.report_drm(video_id)
11f9be09 4238 pemr = get_first(
4239 playability_statuses,
4240 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4241 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4242 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 4243 if subreason:
545cc85d 4244 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 4245 countries = get_first(microformats, 'availableCountries')
545cc85d 4246 if not countries:
4247 regions_allowed = search_meta('regionsAllowed')
4248 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 4249 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 4250 reason += f'. {subreason}'
545cc85d 4251 if reason:
b7da73eb 4252 self.raise_no_formats(reason, expected=True)
bf1317d2 4253
11f9be09 4254 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 4255 if not keywords and webpage:
4256 keywords = [
4257 unescapeHTML(m.group('content'))
4258 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4259 for keyword in keywords:
4260 if keyword.startswith('yt:stretch='):
201c1459 4261 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4262 if mobj:
4263 # NB: float is intentional for forcing float division
4264 w, h = (float(v) for v in mobj.groups())
4265 if w > 0 and h > 0:
4266 ratio = w / h
4267 for f in formats:
4268 if f.get('vcodec') != 'none':
4269 f['stretched_ratio'] = ratio
4270 break
a709d873 4271 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4272 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4273 if thumbnail_url:
4274 thumbnails.append({
4275 'url': thumbnail_url,
ff2751ac 4276 })
fccf5021 4277 original_thumbnails = thumbnails.copy()
4278
0ba692ac 4279 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4280 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4281 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4282 thumbnail_names = [
962ffcf8 4283 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4284 # in resolution, these are not the custom thumbnail. So de-prioritize them
4285 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
add96eb9 4286 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
cca80fe6 4287 ]
cca80fe6 4288 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4289 thumbnails.extend({
4290 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4291 video_id=video_id, name=name, ext=ext,
4d37720a 4292 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4293 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4294 for thumb in thumbnails:
cca80fe6 4295 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4296 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4297 self._remove_duplicate_formats(thumbnails)
fccf5021 4298 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4299
7ea65411 4300 category = get_first(microformats, 'category') or search_meta('genre')
7666b936 4301 channel_id = self.ucid_or_none(str_or_none(
7ea65411 4302 get_first(video_details, 'channelId')
4303 or get_first(microformats, 'externalChannelId')
7666b936 4304 or search_meta('channelId')))
7ea65411 4305 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4306
adbc4ec4
THD
4307 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4308 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4309 if not duration and live_end_time and live_start_time:
4310 duration = live_end_time - live_start_time
4311
4d37720a
L
4312 needs_live_processing = self._needs_live_processing(live_status, duration)
4313
4314 def is_bad_format(fmt):
4315 if needs_live_processing and not fmt.get('is_from_start'):
4316 return True
4317 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4318 and fmt.get('protocol') == 'http_dash_segments'):
4319 return True
4320
4321 for fmt in filter(is_bad_format, formats):
4322 fmt['preference'] = (fmt.get('preference') or -1) - 10
d949c10c 4323 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4d37720a
L
4324
4325 if needs_live_processing:
4326 self._prepare_live_from_start_formats(
4327 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4328
720c3099 4329 formats.extend(self._extract_storyboard(player_responses, duration))
4330
7666b936 4331 channel_handle = self.handle_from_url(owner_profile_url)
4332
545cc85d 4333 info = {
4334 'id': video_id,
39ca3b5c 4335 'title': video_title,
545cc85d 4336 'formats': formats,
4337 'thumbnails': thumbnails,
fccf5021 4338 # The best thumbnail that we are sure exists. Prevents unnecessary
4339 # URL checking if user don't care about getting the best possible thumbnail
4340 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4341 'description': video_description,
545cc85d 4342 'channel_id': channel_id,
7666b936 4343 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
545cc85d 4344 'duration': duration,
4345 'view_count': int_or_none(
11f9be09 4346 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4347 or search_meta('interactionCount')),
11f9be09 4348 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4349 'age_limit': 18 if (
11f9be09 4350 get_first(microformats, 'isFamilySafe') is False
545cc85d 4351 or search_meta('isFamilyFriendly') == 'false'
4352 or search_meta('og:restrictions:age') == '18+') else 0,
4353 'webpage_url': webpage_url,
4354 'categories': [category] if category else None,
4355 'tags': keywords,
11f9be09 4356 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4357 'live_status': live_status,
adbc4ec4 4358 'release_timestamp': live_start_time,
800ec085 4359 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
add96eb9 4360 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
545cc85d 4361 }
b477fc13 4362
c646d76f 4363 subtitles = {}
3944e7af 4364 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4365 if pctr:
ecdc9049 4366 def get_lang_code(track):
4367 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4368 or track.get('languageCode'))
4369
4370 # Converted into dicts to remove duplicates
4371 captions = {
4372 get_lang_code(sub): sub
6839ae1f 4373 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4374 translation_languages = {
4375 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4376 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4377
774d79cc 4378 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4379 lang_subs = container.setdefault(lang_code, [])
545cc85d 4380 for fmt in self._SUBTITLE_FORMATS:
4381 query.update({
4382 'fmt': fmt,
4383 })
4384 lang_subs.append({
4385 'ext': fmt,
60f393e4 4386 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4387 'name': sub_name,
545cc85d 4388 })
7e72694b 4389
07b47084 4390 # NB: Constructing the full subtitle dictionary is slow
4391 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4392 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4393 for lang_code, caption_track in captions.items():
4394 base_url = caption_track.get('baseUrl')
1235d333 4395 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4396 if not base_url:
4397 continue
ecdc9049 4398 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4399 if caption_track.get('kind') != 'asr':
545cc85d 4400 if not lang_code:
4401 continue
4402 process_language(
ecdc9049 4403 subtitles, base_url, lang_code, lang_name, {})
4404 if not caption_track.get('isTranslatable'):
4405 continue
3944e7af 4406 for trans_code, trans_name in translation_languages.items():
4407 if not trans_code:
545cc85d 4408 continue
1235d333 4409 orig_trans_code = trans_code
71eb82d1 4410 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4411 if not get_translated_subs:
18e49408 4412 continue
ecdc9049 4413 trans_code += f'-{lang_code}'
a70635b8 4414 trans_name += format_field(lang_name, None, ' from %s')
1235d333 4415 if lang_code == f'a-{orig_trans_code}':
ff9b0e07 4416 # Set audio language based on original subtitles
4417 for f in formats:
4418 if f.get('acodec') != 'none' and not f.get('language'):
4419 f['language'] = orig_trans_code
4420 # Add an "-orig" label to the original language so that it can be distinguished.
4421 # The subs are returned without "-orig" as well for compatibility
0c8d9e5f 4422 process_language(
d49669ac 4423 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4424 # Setting tlang=lang returns damaged subtitles.
d49669ac 4425 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4426 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4427
4428 info['automatic_captions'] = automatic_captions
4429 info['subtitles'] = subtitles
7e72694b 4430
14f25df2 4431 parsed_url = urllib.parse.urlparse(url)
545cc85d 4432 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4433 query = urllib.parse.parse_qs(component)
545cc85d 4434 for k, v in query.items():
4435 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4436 d_k += '_time'
4437 if d_k not in info and k in s_ks:
add96eb9 4438 info[d_k] = parse_duration(v[0])
822b9d9c
RA
4439
4440 # Youtube Music Auto-generated description
71dc18fa
BT
4441 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4442 # XXX: Causes catastrophic backtracking if description has "·"
4443 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4444 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4445 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
1890fc63 4446 mobj = re.search(
4447 r'''(?xs)
71dc18fa
BT
4448 (?=(?P<track>[^\n·]+))(?P=track)·
4449 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4450 (?=(?P<album>[^\n]+))(?P=album)\n
1890fc63 4451 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4452 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
71dc18fa
BT
4453 (.+?\nArtist\s*:\s*
4454 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4455 )?.+\nAuto-generated\ by\ YouTube\.\s*$
1890fc63 4456 ''', video_description)
822b9d9c 4457 if mobj:
822b9d9c
RA
4458 release_year = mobj.group('release_year')
4459 release_date = mobj.group('release_date')
4460 if release_date:
4461 release_date = release_date.replace('-', '')
4462 if not release_year:
545cc85d 4463 release_year = release_date[:4]
4464 info.update({
4465 'album': mobj.group('album'.strip()),
104a7b5a
L
4466 'artists': ([a] if (a := mobj.group('clean_artist'))
4467 else [a.strip() for a in mobj.group('artist').split('·')]),
545cc85d 4468 'track': mobj.group('track').strip(),
4469 'release_date': release_date,
cc2db878 4470 'release_year': int_or_none(release_year),
545cc85d 4471 })
7e72694b 4472
545cc85d 4473 initial_data = None
4474 if webpage:
56ba69e4 4475 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
607510b9 4476 if not traverse_obj(initial_data, 'contents'):
4477 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4478 initial_data = None
545cc85d 4479 if not initial_data:
99e9e001 4480 query = {'videoId': video_id}
4481 query.update(self._get_checkok_params())
109dd3b2 4482 initial_data = self._extract_response(
4483 item_id=video_id, ep='next', fatal=False,
607510b9 4484 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
99e9e001 4485 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4486 note='Downloading initial data API JSON')
545cc85d 4487
0df111a3 4488 info['comment_count'] = traverse_obj(initial_data, (
4489 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
add96eb9 4490 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
0df111a3 4491 ), (
4492 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
add96eb9 4493 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
071670cb 4494 ), expected_type=self._get_count, get_all=False)
0df111a3 4495
19a03940 4496 try: # This will error if there is no livechat
c60ee3a2 4497 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4498 except (KeyError, IndexError, TypeError):
4499 pass
4500 else:
ecdc9049 4501 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4502 # url is needed to set cookies
4503 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4504 'video_id': video_id,
4505 'ext': 'json',
4d37720a
L
4506 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4507 else 'youtube_live_chat_replay'),
c60ee3a2 4508 }]
545cc85d 4509
4510 if initial_data:
7c365c21 4511 info['chapters'] = (
4512 self._extract_chapters_from_json(initial_data, duration)
4513 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4514 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4515 or None)
545cc85d 4516
03e85ea9 4517 info['heatmap'] = self._extract_heatmap(initial_data)
5caf30db 4518
17322130 4519 contents = traverse_obj(
4520 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4521 expected_type=list, default=[])
4522
4523 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4524 if vpir:
4525 stl = vpir.get('superTitleLink')
4526 if stl:
4527 stl = self._get_text(stl)
4528 if try_get(
4529 vpir,
4530 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4531 info['location'] = stl
4532 else:
affc4fef 4533 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4534 if mobj:
545cc85d 4535 info.update({
17322130 4536 'series': mobj.group(1),
4537 'season_number': int(mobj.group(2)),
4538 'episode_number': int(mobj.group(3)),
545cc85d 4539 })
17322130 4540 for tlb in (try_get(
4541 vpir,
4542 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4543 list) or []):
3ffb2f5b 4544 tbrs = variadic(
4545 traverse_obj(
6839ae1f
SS
4546 tlb, ('toggleButtonRenderer', ...),
4547 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4548 for tbr in tbrs:
4549 for getter, regex in [(
4550 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4551 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4552 lambda x: x['accessibility'],
4553 lambda x: x['accessibilityData']['accessibilityData'],
4554 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4555 label = (try_get(tbr, getter, dict) or {}).get('label')
4556 if label:
4557 mobj = re.match(regex, label)
4558 if mobj:
4559 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4560 break
6b5d93b0
PG
4561
4562 info['like_count'] = traverse_obj(vpir, (
4563 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4564 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4565 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4566 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4567
867c66ff
M
4568 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4569 if vcr:
4570 vc = self._get_count(vcr, 'viewCount')
4571 # Upcoming premieres with waiting count are treated as live here
4572 if vcr.get('isLive'):
4573 info['concurrent_view_count'] = vc
4574 elif info.get('view_count') is None:
4575 info['view_count'] = vc
4576
17322130 4577 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4578 if vsir:
4579 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4580 info.update({
4581 'channel': self._get_text(vor, 'title'),
4582 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4583
7666b936 4584 if not channel_handle:
4585 channel_handle = self.handle_from_url(
4586 traverse_obj(vor, (
4587 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4588 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4589 {str}), get_all=False))
4590
17322130 4591 rows = try_get(
4592 vsir,
4593 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4594 list) or []
4595 multiple_songs = False
4596 for row in rows:
4597 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4598 multiple_songs = True
4599 break
4600 for row in rows:
4601 mrr = row.get('metadataRowRenderer') or {}
4602 mrr_title = mrr.get('title')
4603 if not mrr_title:
4604 continue
4605 mrr_title = self._get_text(mrr, 'title')
4606 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4607 if mrr_title == 'License':
4608 info['license'] = mrr_contents_text
4609 elif not multiple_songs:
4610 if mrr_title == 'Album':
4611 info['album'] = mrr_contents_text
4612 elif mrr_title == 'Artist':
104a7b5a 4613 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
17322130 4614 elif mrr_title == 'Song':
4615 info['track'] = mrr_contents_text
8213ce28 4616 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4617 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4618 info['channel_is_verified'] = True
545cc85d 4619
7666b936 4620 info.update({
4621 'uploader': info.get('channel'),
4622 'uploader_id': channel_handle,
4623 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4624 })
96a134de 4625
4626 # We only want timestamp IF it has time precision AND a timezone
4627 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4628 timestamp = (
4629 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4630 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4631 )
4632 upload_date = (
4633 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4634 (
4635 unified_strdate(get_first(microformats, 'uploadDate'))
4636 or unified_strdate(search_meta('uploadDate'))
4637 ))
4638
4639 # In the case we cannot get the timestamp:
17322130 4640 # The upload date for scheduled, live and past live streams / premieres in microformats
4641 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4642 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
96a134de 4643 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4644 # this should be in UTC, as configured in the cookie/client context
c26f9b99 4645 upload_date = strftime_or_none(
ad54c913 4646 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
96a134de 4647
17322130 4648 info['upload_date'] = upload_date
96a134de 4649 info['timestamp'] = timestamp
992f9a73 4650
ef79d20d 4651 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4652 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
c305a25c 4653 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
bb5a54e6 4654 if upload_datetime >= datetime_from_str('today-2days'):
ef79d20d 4655 for fmt in info['formats']:
4656 if fmt.get('protocol') == 'm3u8_native':
4657 fmt['__needs_testing'] = True
4658
104a7b5a 4659 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
545cc85d 4660 v = info.get(s_k)
4661 if v:
4662 info[d_k] = v
b84071c0 4663
14a14335 4664 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
c26f9b99 4665
4666 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4667 or get_first(video_details, 'isPrivate', expected_type=bool))
4668
4669 info['availability'] = (
4670 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4671 else self._availability(
4672 is_private=is_private,
4673 needs_premium=(
4674 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4675 or False if initial_data and is_private is not None else None),
4676 needs_subscription=(
4677 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4678 or False if initial_data and is_private is not None else None),
4679 needs_auth=info['age_limit'] >= 18,
4680 is_unlisted=None if is_private is None else (
4681 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4682 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4683
a2160aa4 4684 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4685
11f9be09 4686 self.mark_watched(video_id, player_responses)
d77ab8e2 4687
545cc85d 4688 return info
c5e8d7af 4689
a61fd4cf 4690
a6213a49 4691class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4692 @staticmethod
4693 def passthrough_smuggled_data(func):
bd7e919a 4694 def _smuggle(info, smuggled_data):
4695 if info.get('_type') not in ('url', 'url_transparent'):
4696 return info
4697 if smuggled_data.get('is_music_url'):
4698 parsed_url = urllib.parse.urlparse(info['url'])
4699 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4700 smuggled_data.pop('is_music_url')
4701 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4702 if smuggled_data:
4703 info['url'] = smuggle_url(info['url'], smuggled_data)
4704 return info
182bda88 4705
4706 @functools.wraps(func)
4707 def wrapper(self, url):
4708 url, smuggled_data = unsmuggle_url(url, {})
4709 if self.is_music_url(url):
4710 smuggled_data['is_music_url'] = True
4711 info_dict = func(self, url, smuggled_data)
bd7e919a 4712 if smuggled_data:
4713 _smuggle(info_dict, smuggled_data)
4714 if info_dict.get('entries'):
a8c754cc 4715 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4716 return info_dict
4717 return wrapper
4718
8bdd16b4 4719 @staticmethod
cd7c66cf 4720 def _extract_basic_item_renderer(item):
4721 # Modified from _extract_grid_item_renderer
201c1459 4722 known_basic_renderers = (
add96eb9 4723 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
cd7c66cf 4724 )
4725 for key, renderer in item.items():
201c1459 4726 if not isinstance(renderer, dict):
cd7c66cf 4727 continue
201c1459 4728 elif key in known_basic_renderers:
4729 return renderer
4730 elif key.startswith('grid') and key.endswith('Renderer'):
4731 return renderer
8bdd16b4 4732
c7335551 4733 def _extract_channel_renderer(self, renderer):
7666b936 4734 channel_id = self.ucid_or_none(renderer['channelId'])
c7335551 4735 title = self._get_text(renderer, 'title')
7666b936 4736 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
7666b936 4737 channel_handle = self.handle_from_url(
4738 traverse_obj(renderer, (
4739 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4740 ('browseEndpoint', 'canonicalBaseUrl')),
4741 {str}), get_all=False))
14a14335 4742 if not channel_handle:
4743 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4744 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
c7335551
M
4745 return {
4746 '_type': 'url',
4747 'url': channel_url,
4748 'id': channel_id,
4749 'ie_key': YoutubeTabIE.ie_key(),
4750 'channel': title,
7666b936 4751 'uploader': title,
c7335551
M
4752 'channel_id': channel_id,
4753 'channel_url': channel_url,
4754 'title': title,
7666b936 4755 'uploader_id': channel_handle,
4756 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
14a14335 4757 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4758 # However, in feed/channels this is set correctly to the subscriber count
4759 'channel_follower_count': traverse_obj(
4760 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
c7335551 4761 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
14a14335 4762 'playlist_count': (
4763 # videoCountText may be the subscriber count
4764 self._get_count(renderer, 'videoCountText')
4765 if self._get_count(renderer, 'subscriberCountText') is not None else None),
c7335551 4766 'description': self._get_text(renderer, 'descriptionSnippet'),
8213ce28 4767 'channel_is_verified': True if self._has_badge(
4768 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
c7335551
M
4769 }
4770
8bdd16b4 4771 def _grid_entries(self, grid_renderer):
4772 for item in grid_renderer['items']:
4773 if not isinstance(item, dict):
39b62db1 4774 continue
cd7c66cf 4775 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4776 if not isinstance(renderer, dict):
4777 continue
052e1350 4778 title = self._get_text(renderer, 'title')
fe93e2c4 4779
8bdd16b4 4780 # playlist
4781 playlist_id = renderer.get('playlistId')
4782 if playlist_id:
4783 yield self.url_result(
add96eb9 4784 f'https://www.youtube.com/playlist?list={playlist_id}',
8bdd16b4 4785 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4786 video_title=title)
201c1459 4787 continue
8bdd16b4 4788 # video
4789 video_id = renderer.get('videoId')
4790 if video_id:
4791 yield self._extract_video(renderer)
201c1459 4792 continue
8bdd16b4 4793 # channel
4794 channel_id = renderer.get('channelId')
4795 if channel_id:
c7335551 4796 yield self._extract_channel_renderer(renderer)
201c1459 4797 continue
4798 # generic endpoint URL support
4799 ep_url = urljoin('https://www.youtube.com/', try_get(
4800 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4801 str))
201c1459 4802 if ep_url:
4803 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4804 if ie.suitable(ep_url):
4805 yield self.url_result(
4806 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4807 break
8bdd16b4 4808
16aa9ea4 4809 def _music_reponsive_list_entry(self, renderer):
4810 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4811 if video_id:
69a40e4a 4812 title = traverse_obj(renderer, (
4813 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4814 'text', 'runs', 0, 'text'))
16aa9ea4 4815 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
69a40e4a 4816 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
16aa9ea4 4817 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4818 if playlist_id:
4819 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4820 if video_id:
4821 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4822 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4823 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4824 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4825 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4826 if browse_id:
4827 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4828 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4829
3d3dddc9 4830 def _shelf_entries_from_content(self, shelf_renderer):
4831 content = shelf_renderer.get('content')
4832 if not isinstance(content, dict):
8bdd16b4 4833 return
cd7c66cf 4834 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4835 if renderer:
4836 # TODO: add support for nested playlists so each shelf is processed
4837 # as separate playlist
4838 # TODO: this includes only first N items
86e5f3ed 4839 yield from self._grid_entries(renderer)
3d3dddc9 4840 renderer = content.get('horizontalListRenderer')
4841 if renderer:
add96eb9 4842 # TODO: handle case
3d3dddc9 4843 pass
8bdd16b4 4844
29f7c58a 4845 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4846 ep = try_get(
4847 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4848 str)
8bdd16b4 4849 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4850 if shelf_url:
29f7c58a 4851 # Skipping links to another channels, note that checking for
4852 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4853 # will not work
4854 if skip_channels and '/channels?' in shelf_url:
4855 return
052e1350 4856 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4857 yield self.url_result(shelf_url, video_title=title)
4858 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4859 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4860
8bdd16b4 4861 def _playlist_entries(self, video_list_renderer):
4862 for content in video_list_renderer['contents']:
4863 if not isinstance(content, dict):
4864 continue
4865 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4866 if not isinstance(renderer, dict):
4867 continue
4868 video_id = renderer.get('videoId')
4869 if not video_id:
4870 continue
4871 yield self._extract_video(renderer)
07aeced6 4872
3462ffa8 4873 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4874 renderer = traverse_obj(
447afb9e 4875 rich_grid_renderer,
4876 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
3462ffa8 4877 video_id = renderer.get('videoId')
447afb9e 4878 if video_id:
4879 yield self._extract_video(renderer)
4880 return
4881 playlist_id = renderer.get('playlistId')
4882 if playlist_id:
4883 yield self.url_result(
4884 f'https://www.youtube.com/playlist?list={playlist_id}',
4885 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4886 video_title=self._get_text(renderer, 'title'))
3462ffa8 4887 return
3462ffa8 4888
8bdd16b4 4889 def _video_entry(self, video_renderer):
4890 video_id = video_renderer.get('videoId')
4891 if video_id:
4892 return self._extract_video(video_renderer)
dacb3a86 4893
ad210f4f 4894 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4895 url = urljoin('https://youtube.com', traverse_obj(
4896 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4897 if url:
4898 return self.url_result(
4899 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4900
8bdd16b4 4901 def _post_thread_entries(self, post_thread_renderer):
4902 post_renderer = try_get(
4903 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4904 if not post_renderer:
4905 return
4906 # video attachment
4907 video_renderer = try_get(
895b0931 4908 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4909 video_id = video_renderer.get('videoId')
4910 if video_id:
4911 entry = self._extract_video(video_renderer)
8bdd16b4 4912 if entry:
4913 yield entry
895b0931 4914 # playlist attachment
4915 playlist_id = try_get(
14f25df2 4916 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4917 if playlist_id:
4918 yield self.url_result(
add96eb9 4919 f'https://www.youtube.com/playlist?list={playlist_id}',
e28f1c0a 4920 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4921 # inline video links
4922 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4923 for run in runs:
4924 if not isinstance(run, dict):
4925 continue
4926 ep_url = try_get(
14f25df2 4927 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4928 if not ep_url:
4929 continue
4930 if not YoutubeIE.suitable(ep_url):
4931 continue
4932 ep_video_id = YoutubeIE._match_id(ep_url)
4933 if video_id == ep_video_id:
4934 continue
895b0931 4935 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4936
8bdd16b4 4937 def _post_thread_continuation_entries(self, post_thread_continuation):
4938 contents = post_thread_continuation.get('contents')
4939 if not isinstance(contents, list):
4940 return
4941 for content in contents:
4942 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4943 if isinstance(renderer, dict):
4944 yield from self._post_thread_entries(renderer)
8bdd16b4 4945 continue
6b0b0a28 4946 renderer = content.get('videoRenderer')
4947 if isinstance(renderer, dict):
4948 yield self._video_entry(renderer)
07aeced6 4949
39ed931e 4950 r''' # unused
4951 def _rich_grid_entries(self, contents):
4952 for content in contents:
4953 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4954 if video_renderer:
4955 entry = self._video_entry(video_renderer)
4956 if entry:
4957 yield entry
4958 '''
52efa4b3 4959
0a5095fe 4960 def _report_history_entries(self, renderer):
4961 for url in traverse_obj(renderer, (
7a32c70d 4962 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4963 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4964 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4965 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4966
a6213a49 4967 def _extract_entries(self, parent_renderer, continuation_list):
4968 # continuation_list is modified in-place with continuation_list = [continuation_token]
4969 continuation_list[:] = [None]
4970 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4971 for content in contents:
4972 if not isinstance(content, dict):
4973 continue
16aa9ea4 4974 is_renderer = traverse_obj(
4975 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4976 expected_type=dict)
a6213a49 4977 if not is_renderer:
0a5095fe 4978 if content.get('richItemRenderer'):
4979 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4980 yield entry
4981 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4982 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4983 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4984 yield from self._report_history_entries(table)
4985 continuation_list[0] = self._extract_continuation(table)
a6213a49 4986 continue
0a5095fe 4987
a6213a49 4988 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4989 for isr_content in isr_contents:
4990 if not isinstance(isr_content, dict):
8bdd16b4 4991 continue
69184e41 4992
a6213a49 4993 known_renderers = {
4994 'playlistVideoListRenderer': self._playlist_entries,
4995 'gridRenderer': self._grid_entries,
a17526e4 4996 'reelShelfRenderer': self._grid_entries,
4997 'shelfRenderer': self._shelf_entries,
16aa9ea4 4998 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4999 'backstagePostThreadRenderer': self._post_thread_entries,
5000 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 5001 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5002 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
fcbc9ed7 5003 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5004 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
a6213a49 5005 }
5006 for key, renderer in isr_content.items():
5007 if key not in known_renderers:
5008 continue
5009 for entry in known_renderers[key](renderer):
5010 if entry:
5011 yield entry
5012 continuation_list[0] = self._extract_continuation(renderer)
5013 break
70d5c17b 5014
5015 if not continuation_list[0]:
a6213a49 5016 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 5017
a6213a49 5018 if not continuation_list[0]:
5019 continuation_list[0] = self._extract_continuation(parent_renderer)
5020
5021 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5022 continuation_list = [None]
5023 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 5024 tab_content = try_get(tab, lambda x: x['content'], dict)
5025 if not tab_content:
5026 return
3462ffa8 5027 parent_renderer = (
29f7c58a 5028 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5029 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 5030 yield from extract_entries(parent_renderer)
3462ffa8 5031 continuation = continuation_list[0]
1ba6fe9d 5032 seen_continuations = set()
8bdd16b4 5033 for page_num in itertools.count(1):
5034 if not continuation:
5035 break
1ba6fe9d 5036 continuation_token = continuation.get('continuation')
5037 if continuation_token is not None and continuation_token in seen_continuations:
5038 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5039 break
5040 seen_continuations.add(continuation_token)
99e9e001 5041 headers = self.generate_api_headers(
5042 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 5043 response = self._extract_response(
86e5f3ed 5044 item_id=f'{item_id} page {page_num}',
fe93e2c4 5045 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 5046 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
5047
5048 if not response:
8bdd16b4 5049 break
ac56cf38 5050 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5051 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5052 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 5053
a1b535bd 5054 known_renderers = {
e4b98809 5055 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 5056 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5057 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 5058 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 5059 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 5060 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 5061 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 5062 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5063 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 5064 'playlistVideoListContinuation': (self._playlist_entries, None),
5065 'gridContinuation': (self._grid_entries, None),
5066 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5067 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 5068 }
1fb53b94 5069
5070 continuation_items = traverse_obj(response, (
5071 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
add96eb9 5072 'appendContinuationItemsAction', 'continuationItems',
1fb53b94 5073 ), 'continuationContents', get_all=False)
5074 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5075
a1b535bd 5076 video_items_renderer = None
add96eb9 5077 for key in continuation_item:
a1b535bd 5078 if key not in known_renderers:
8bdd16b4 5079 continue
1fb53b94 5080 func, parent_key = known_renderers[key]
5081 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 5082 continuation_list = [None]
1fb53b94 5083 yield from func(video_items_renderer)
9ba5705a 5084 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 5085
5086 if not video_items_renderer:
a1b535bd 5087 break
9558dcec 5088
8bdd16b4 5089 @staticmethod
7c219ea6 5090 def _extract_selected_tab(tabs, fatal=True):
86973308
M
5091 for tab_renderer in tabs:
5092 if tab_renderer.get('selected'):
5093 return tab_renderer
5094 if fatal:
5095 raise ExtractorError('Unable to find selected tab')
5096
5097 @staticmethod
5098 def _extract_tab_renderers(response):
5099 return traverse_obj(
5100 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 5101
ac56cf38 5102 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 5103 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 5104
8bdd16b4 5105 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 5106 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5107 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5108
5109 return self.playlist_result(
5110 self._entries(
5111 selected_tab, metadata['id'], ytcfg,
5112 self._extract_account_syncid(ytcfg, data),
5113 self._extract_visitor_data(data, ytcfg)),
5114 **metadata)
39ed931e 5115
bd7e919a 5116 def _extract_metadata_from_tabs(self, item_id, data):
5117 info = {'id': item_id}
5118
5119 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 5120 if metadata_renderer:
7666b936 5121 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
4823ec9f 5122 ('channelUrl', {self.ucid_from_url}))
bd7e919a 5123 info.update({
7666b936 5124 'channel': metadata_renderer.get('title'),
5125 'channel_id': channel_id,
bd7e919a 5126 })
7666b936 5127 if info['channel_id']:
5128 info['id'] = info['channel_id']
bd7e919a 5129 else:
5130 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 5131
a0d9967f 5132 # pageHeaderViewModel slow rollout began April 2024
5133 page_header_view_model = traverse_obj(data, (
5134 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5135
301d07fc 5136 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5137 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5138 def _get_uncropped(url):
5139 return url_or_none((url or '').split('=')[0] + '=s0')
5140
6141346d 5141 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 5142 if avatar_thumbnails:
5143 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5144 if uncropped_avatar:
5145 avatar_thumbnails.append({
5146 'url': uncropped_avatar,
5147 'id': 'avatar_uncropped',
add96eb9 5148 'preference': 1,
301d07fc 5149 })
5150
a0d9967f 5151 channel_banners = (
5152 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5153 or self._extract_thumbnails(
5154 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
301d07fc 5155 for banner in channel_banners:
5156 banner['preference'] = -10
5157
5158 if channel_banners:
5159 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5160 if uncropped_banner:
5161 channel_banners.append({
5162 'url': uncropped_banner,
5163 'id': 'banner_uncropped',
add96eb9 5164 'preference': -5,
301d07fc 5165 })
5166
bd7e919a 5167 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5168 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5169 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5170
301d07fc 5171 primary_thumbnails = self._extract_thumbnails(
a17526e4 5172 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
5173 playlist_thumbnails = self._extract_thumbnails(
5174 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5175
bd7e919a 5176 info.update({
5177 'title': (traverse_obj(metadata_renderer, 'title')
5178 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5179 or info['id']),
5180 'availability': self._extract_availability(data),
a0d9967f 5181 'channel_follower_count': (
5182 self._get_count(data, ('header', ..., 'subscriberCountText'))
5183 or traverse_obj(page_header_view_model, (
5184 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5185 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
bd7e919a 5186 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
8828f457 5187 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5188 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
bd7e919a 5189 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5190 })
f0d785d3 5191
7666b936 5192 channel_handle = (
5193 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5194 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5195
5196 if channel_handle:
5197 info.update({
5198 'uploader_id': channel_handle,
5199 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5200 })
8213ce28 5201
5202 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5203 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5204 info['channel_is_verified'] = True
6141346d
M
5205 # Playlist stats is a text runs array containing [video count, view count, last updated].
5206 # last updated or (view count and last updated) may be missing.
5207 playlist_stats = get_first(
bd7e919a 5208 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5209
6141346d
M
5210 last_updated_unix = self._parse_time_text(
5211 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5212 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
ad54c913 5213 info['modified_date'] = strftime_or_none(last_updated_unix)
6141346d 5214
bd7e919a 5215 info['view_count'] = self._get_count(playlist_stats, 1)
5216 if info['view_count'] is None: # 0 is allowed
5217 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
31e18355 5218 if info['view_count'] is None:
5219 info['view_count'] = self._get_count(data, (
5220 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5221 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
bd7e919a 5222
5223 info['playlist_count'] = self._get_count(playlist_stats, 0)
5224 if info['playlist_count'] is None: # 0 is allowed
5225 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5226
7666b936 5227 if not info.get('channel_id'):
6141346d 5228 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 5229 if not owner: # Deprecated
6141346d
M
5230 owner = traverse_obj(
5231 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5232 ('videoOwner', 'videoOwnerRenderer', 'title'))
5233 owner_text = self._get_text(owner)
5234 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 5235 info.update({
7666b936 5236 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5237 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
add96eb9 5238 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
bd7e919a 5239 })
6141346d 5240
bd7e919a 5241 info.update({
7666b936 5242 'uploader': info['channel'],
5243 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5244 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
bd7e919a 5245 })
7666b936 5246
bd7e919a 5247 return info
73c4ac2c 5248
6e634cbe 5249 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 5250 first_id = last_id = response = None
2be71994 5251 for page_num in itertools.count(1):
cd7c66cf 5252 videos = list(self._playlist_entries(playlist))
5253 if not videos:
5254 return
2be71994 5255 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5256 if start >= len(videos):
5257 return
24146491 5258 yield from videos[start:]
2be71994 5259 first_id = first_id or videos[0]['id']
5260 last_id = videos[-1]['id']
79360d99 5261 watch_endpoint = try_get(
5262 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 5263 headers = self.generate_api_headers(
5264 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5265 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 5266 query = {
5267 'playlistId': playlist_id,
5268 'videoId': watch_endpoint.get('videoId') or last_id,
5269 'index': watch_endpoint.get('index') or len(videos),
add96eb9 5270 'params': watch_endpoint.get('params') or 'OAE%3D',
79360d99 5271 }
5272 response = self._extract_response(
add96eb9 5273 item_id=f'{playlist_id} page {page_num}',
fe93e2c4 5274 query=query, ep='next', headers=headers, ytcfg=ytcfg,
add96eb9 5275 check_get_keys='contents',
79360d99 5276 )
cd7c66cf 5277 playlist = try_get(
79360d99 5278 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 5279
ac56cf38 5280 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 5281 title = playlist.get('title') or try_get(
14f25df2 5282 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 5283 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 5284
5285 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 5286 playlist_url = urljoin(url, try_get(
5287 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 5288 str))
6e634cbe 5289
5290 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5291 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5292 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5293
5294 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 5295 return self.url_result(
5296 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5297 video_title=title)
cd7c66cf 5298
8bdd16b4 5299 return self.playlist_result(
6e634cbe 5300 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 5301 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 5302
47193e02 5303 def _extract_availability(self, data):
5304 """
5305 Gets the availability of a given playlist/tab.
5306 Note: Unless YouTube tells us explicitly, we do not assume it is public
5307 @param data: response
5308 """
6141346d
M
5309 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5310 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5311 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 5312
14a14335 5313 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
47193e02 5314
5315 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
5316 privacy_setting_icon = get_first(
5317 (playlist_header_renderer, sidebar_renderer),
5318 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5319 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5320 expected_type=str)
5321
5322 microformats_is_unlisted = traverse_obj(
5323 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 5324
c26f9b99 5325 return (
5326 'public' if (
5327 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5328 or player_header_privacy == 'PUBLIC'
5329 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5330 else self._availability(
5331 is_private=(
5332 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5333 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5334 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5335 is_unlisted=(
5336 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5337 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
5338 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5339 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 5340 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5341 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5342 needs_auth=False))
47193e02 5343
5344 @staticmethod
5345 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5346 sidebar_renderer = try_get(
5347 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5348 for item in sidebar_renderer:
5349 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5350 if renderer:
5351 return renderer
5352
ac56cf38 5353 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 5354 """
6141346d 5355 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5356 """
6141346d
M
5357 is_playlist = bool(traverse_obj(
5358 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5359 if not is_playlist:
47193e02 5360 return
11f9be09 5361 headers = self.generate_api_headers(
99e9e001 5362 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5363 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5364 query = {
6141346d 5365 'params': 'wgYCCAA=',
add96eb9 5366 'browseId': f'VL{item_id}',
47193e02 5367 }
5368 return self._extract_response(
5369 item_id=item_id, headers=headers, query=query,
fe93e2c4 5370 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5371 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5372
2762dbb1 5373 @functools.cached_property
a25bca9f 5374 def skip_webpage(self):
5375 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5376
ac56cf38 5377 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5378 webpage, data = None, None
5379 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5380 try:
be5c1ae8 5381 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5382 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5383 except ExtractorError as e:
5384 if isinstance(e.cause, network_exceptions):
3d2623a8 5385 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
be5c1ae8 5386 retry.error = e
5387 continue
5388 self._error_or_warning(e, fatal=fatal)
14fdfea9 5389 break
ac56cf38 5390
be5c1ae8 5391 try:
5392 self._extract_and_report_alerts(data)
5393 except ExtractorError as e:
5394 self._error_or_warning(e, fatal=fatal)
5395 break
ac56cf38 5396
be5c1ae8 5397 # Sometimes youtube returns a webpage with incomplete ytInitialData
5398 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5399 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5400 retry.error = ExtractorError('Incomplete yt initial data received')
f9fb3ce8 5401 data = None
be5c1ae8 5402 continue
ac56cf38 5403
cd7c66cf 5404 return webpage, data
5405
a25bca9f 5406 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5407 """Use if failed to extract ytcfg (and data) from initial webpage"""
5408 if not ytcfg and self.is_authenticated:
5409 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5410 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5411 raise ExtractorError(
5412 f'{msg}. If you are not downloading private content, or '
5413 'your cookies are only for the first account and channel,'
5414 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5415 expected=True)
5416 self.report_warning(msg, only_once=True)
5417
ac56cf38 5418 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5419 data = None
a25bca9f 5420 if not self.skip_webpage:
ac56cf38 5421 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5422 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5423 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5424 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5425 if (url != 'https://www.youtube.com/feed/recommended'
5426 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5427 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5428 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5429 if fatal:
5430 raise ExtractorError(msg, expected=True)
5431 self.report_warning(msg, only_once=True)
ac56cf38 5432 if not data:
a25bca9f 5433 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5434 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5435 return data, ytcfg
5436
5437 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5438 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5439 resolve_response = self._extract_response(
5440 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5441 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5442 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5443 for ep_key, ep in endpoints.items():
5444 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5445 if params:
5446 return self._extract_response(
5447 item_id=item_id, query=params, ep=ep, headers=headers,
5448 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5449 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5450 err_note = 'Failed to resolve url (does the playlist exist?)'
5451 if fatal:
5452 raise ExtractorError(err_note, expected=True)
5453 self.report_warning(err_note, item_id)
5454
a6213a49 5455 _SEARCH_PARAMS = None
5456
af5c1c55 5457 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5458 data = {'query': query}
5459 if params is NO_DEFAULT:
5460 params = self._SEARCH_PARAMS
5461 if params:
5462 data['params'] = params
16aa9ea4 5463
5464 content_keys = (
5465 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5466 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5467 # ytmusic search
5468 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5469 ('continuationContents', ),
5470 )
a25bca9f 5471 display_id = f'query "{query}"'
86e5f3ed 5472 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5473 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5474 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5475
a61fd4cf 5476 continuation_list = [None]
a25bca9f 5477 search = None
a6213a49 5478 for page_num in itertools.count(1):
a61fd4cf 5479 data.update(continuation_list[0] or {})
a25bca9f 5480 headers = self.generate_api_headers(
5481 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5482 search = self._extract_response(
a25bca9f 5483 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5484 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5485 slr_contents = traverse_obj(search, *content_keys)
5486 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5487 if not continuation_list[0]:
a6213a49 5488 break
5489
5490
5491class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5492 IE_DESC = 'YouTube Tabs'
5493 _VALID_URL = r'''(?x:
5494 https?://
b032ff0f 5495 (?!consent\.)(?:\w+\.)?
a6213a49 5496 (?:
5497 youtube(?:kids)?\.com|
add96eb9 5498 {invidious}
a6213a49 5499 )/
5500 (?:
5501 (?P<channel_type>channel|c|user|browse)/|
5502 (?P<not_channel>
5503 feed/|hashtag/|
5504 (?:playlist|watch)\?.*?\blist=
5505 )|
add96eb9 5506 (?!(?:{reserved_names})\b) # Direct URLs
a6213a49 5507 )
5508 (?P<id>[^/?\#&]+)
add96eb9 5509 )'''.format(
5510 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5511 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5512 )
a6213a49 5513 IE_NAME = 'youtube:tab'
5514
5515 _TESTS = [{
5516 'note': 'playlists, multipage',
5517 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5518 'playlist_mincount': 94,
5519 'info_dict': {
5520 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5521 'title': 'Igor Kleiner Ph.D. - Playlists',
5522 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5523 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5524 'uploader_id': '@IgorDataScience',
5525 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5526 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5527 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5528 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5529 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
add96eb9 5530 'channel_follower_count': int,
a6213a49 5531 },
5532 }, {
5533 'note': 'playlists, multipage, different order',
5534 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5535 'playlist_mincount': 94,
5536 'info_dict': {
5537 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5538 'title': 'Igor Kleiner Ph.D. - Playlists',
5539 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5540 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5541 'uploader_id': '@IgorDataScience',
5542 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5543 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5544 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5545 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5546 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
add96eb9 5547 'channel_follower_count': int,
a6213a49 5548 },
5549 }, {
5550 'note': 'playlists, series',
5551 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5552 'playlist_mincount': 5,
5553 'info_dict': {
5554 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5555 'title': '3Blue1Brown - Playlists',
8828f457 5556 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5557 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5558 'channel': '3Blue1Brown',
5559 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5560 'uploader_id': '@3blue1brown',
5561 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5562 'uploader': '3Blue1Brown',
976ae3ea 5563 'tags': ['Mathematics'],
14a14335 5564 'channel_follower_count': int,
8213ce28 5565 'channel_is_verified': True,
a6213a49 5566 },
5567 }, {
5568 'note': 'playlists, singlepage',
5569 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5570 'playlist_mincount': 4,
5571 'info_dict': {
5572 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5573 'title': 'ThirstForScience - Playlists',
5574 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5575 'uploader': 'ThirstForScience',
7666b936 5576 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5577 'uploader_id': '@ThirstForScience',
976ae3ea 5578 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
7666b936 5579 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
8828f457 5580 'tags': 'count:12',
976ae3ea 5581 'channel': 'ThirstForScience',
add96eb9 5582 'channel_follower_count': int,
5583 },
a6213a49 5584 }, {
5585 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5586 'only_matching': True,
5587 }, {
5588 'note': 'basic, single video playlist',
5589 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5590 'info_dict': {
a6213a49 5591 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5592 'title': 'youtube-dl public playlist',
976ae3ea 5593 'description': '',
5594 'tags': [],
5595 'view_count': int,
5596 'modified_date': '20201130',
5597 'channel': 'Sergey M.',
5598 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
976ae3ea 5599 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5600 'availability': 'public',
7666b936 5601 'uploader': 'Sergey M.',
5602 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5603 'uploader_id': '@sergeym.6173',
a6213a49 5604 },
5605 'playlist_count': 1,
5606 }, {
5607 'note': 'empty playlist',
5608 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5609 'info_dict': {
a6213a49 5610 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5611 'title': 'youtube-dl empty playlist',
976ae3ea 5612 'tags': [],
5613 'channel': 'Sergey M.',
5614 'description': '',
8828f457 5615 'modified_date': '20230921',
976ae3ea 5616 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5617 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8828f457 5618 'availability': 'unlisted',
7666b936 5619 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5620 'uploader_id': '@sergeym.6173',
5621 'uploader': 'Sergey M.',
a6213a49 5622 },
5623 'playlist_count': 0,
5624 }, {
5625 'note': 'Home tab',
5626 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5627 'info_dict': {
5628 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5629 'title': 'lex will - Home',
5630 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5631 'uploader': 'lex will',
7666b936 5632 'uploader_id': '@lexwill718',
976ae3ea 5633 'channel': 'lex will',
5634 'tags': ['bible', 'history', 'prophesy'],
7666b936 5635 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5636 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5637 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
add96eb9 5638 'channel_follower_count': int,
a6213a49 5639 },
5640 'playlist_mincount': 2,
5641 }, {
5642 'note': 'Videos tab',
5643 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5644 'info_dict': {
5645 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5646 'title': 'lex will - Videos',
5647 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5648 'uploader': 'lex will',
7666b936 5649 'uploader_id': '@lexwill718',
976ae3ea 5650 'tags': ['bible', 'history', 'prophesy'],
5651 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5652 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5653 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5654 'channel': 'lex will',
add96eb9 5655 'channel_follower_count': int,
a6213a49 5656 },
5657 'playlist_mincount': 975,
5658 }, {
5659 'note': 'Videos tab, sorted by popular',
5660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5661 'info_dict': {
5662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5663 'title': 'lex will - Videos',
5664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5665 'uploader': 'lex will',
7666b936 5666 'uploader_id': '@lexwill718',
976ae3ea 5667 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5668 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5669 'channel': 'lex will',
5670 'tags': ['bible', 'history', 'prophesy'],
5671 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
add96eb9 5672 'channel_follower_count': int,
a6213a49 5673 },
5674 'playlist_mincount': 199,
5675 }, {
5676 'note': 'Playlists tab',
5677 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5678 'info_dict': {
5679 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5680 'title': 'lex will - Playlists',
5681 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5682 'uploader': 'lex will',
7666b936 5683 'uploader_id': '@lexwill718',
5684 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5685 'channel': 'lex will',
5686 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5687 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5688 'tags': ['bible', 'history', 'prophesy'],
add96eb9 5689 'channel_follower_count': int,
a6213a49 5690 },
5691 'playlist_mincount': 17,
5692 }, {
5693 'note': 'Community tab',
5694 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5695 'info_dict': {
5696 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5697 'title': 'lex will - Community',
5698 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5699 'channel': 'lex will',
5700 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5701 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5702 'tags': ['bible', 'history', 'prophesy'],
7666b936 5703 'channel_follower_count': int,
5704 'uploader_url': 'https://www.youtube.com/@lexwill718',
5705 'uploader_id': '@lexwill718',
5706 'uploader': 'lex will',
a6213a49 5707 },
5708 'playlist_mincount': 18,
5709 }, {
5710 'note': 'Channels tab',
5711 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5712 'info_dict': {
5713 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5714 'title': 'lex will - Channels',
5715 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5716 'channel': 'lex will',
5717 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5718 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5719 'tags': ['bible', 'history', 'prophesy'],
7666b936 5720 'channel_follower_count': int,
5721 'uploader_url': 'https://www.youtube.com/@lexwill718',
5722 'uploader_id': '@lexwill718',
5723 'uploader': 'lex will',
a6213a49 5724 },
5725 'playlist_mincount': 12,
5726 }, {
5727 'note': 'Search tab',
5728 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5729 'playlist_mincount': 40,
5730 'info_dict': {
5731 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5732 'title': '3Blue1Brown - Search - linear algebra',
8828f457 5733 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5734 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5735 'tags': ['Mathematics'],
5736 'channel': '3Blue1Brown',
5737 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5738 'channel_follower_count': int,
5739 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5740 'uploader_id': '@3blue1brown',
5741 'uploader': '3Blue1Brown',
8213ce28 5742 'channel_is_verified': True,
a6213a49 5743 },
5744 }, {
5745 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5746 'only_matching': True,
5747 }, {
5748 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5749 'only_matching': True,
5750 }, {
5751 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5752 'only_matching': True,
5753 }, {
5754 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5755 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5756 'info_dict': {
5757 'title': '29C3: Not my department',
5758 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
a6213a49 5759 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5760 'tags': [],
976ae3ea 5761 'view_count': int,
5762 'modified_date': '20150605',
5763 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
7666b936 5764 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
976ae3ea 5765 'channel': 'Christiaan008',
c26f9b99 5766 'availability': 'public',
7666b936 5767 'uploader_id': '@ChRiStIaAn008',
5768 'uploader': 'Christiaan008',
5769 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
a6213a49 5770 },
5771 'playlist_count': 96,
5772 }, {
5773 'note': 'Large playlist',
5774 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5775 'info_dict': {
5776 'title': 'Uploads from Cauchemar',
5777 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
7666b936 5778 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5779 'tags': [],
5780 'modified_date': r're:\d{8}',
5781 'channel': 'Cauchemar',
976ae3ea 5782 'view_count': int,
5783 'description': '',
5784 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5785 'availability': 'public',
7666b936 5786 'uploader_id': '@Cauchemar89',
5787 'uploader': 'Cauchemar',
5788 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
a6213a49 5789 },
5790 'playlist_mincount': 1123,
976ae3ea 5791 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5792 }, {
5793 'note': 'even larger playlist, 8832 videos',
5794 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5795 'only_matching': True,
5796 }, {
5797 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5798 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5799 'info_dict': {
5800 'title': 'Uploads from Interstellar Movie',
5801 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5802 'tags': [],
5803 'view_count': int,
5804 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
7666b936 5805 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5806 'channel': 'Interstellar Movie',
5807 'description': '',
5808 'modified_date': r're:\d{8}',
c26f9b99 5809 'availability': 'public',
7666b936 5810 'uploader_id': '@InterstellarMovie',
5811 'uploader': 'Interstellar Movie',
5812 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
a6213a49 5813 },
5814 'playlist_mincount': 21,
5815 }, {
5816 'note': 'Playlist with "show unavailable videos" button',
5817 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5818 'info_dict': {
5819 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5820 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5821 'view_count': int,
5822 'channel': 'Phim Siêu Nhân Nhật Bản',
5823 'tags': [],
976ae3ea 5824 'description': '',
5825 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5826 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5827 'modified_date': r're:\d{8}',
c26f9b99 5828 'availability': 'public',
7666b936 5829 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5830 'uploader_id': '@phimsieunhannhatban',
5831 'uploader': 'Phim Siêu Nhân Nhật Bản',
a6213a49 5832 },
5833 'playlist_mincount': 200,
976ae3ea 5834 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5835 }, {
5836 'note': 'Playlist with unavailable videos in page 7',
5837 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5838 'info_dict': {
5839 'title': 'Uploads from BlankTV',
5840 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5841 'channel': 'BlankTV',
7666b936 5842 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5843 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5844 'view_count': int,
5845 'tags': [],
976ae3ea 5846 'modified_date': r're:\d{8}',
5847 'description': '',
c26f9b99 5848 'availability': 'public',
7666b936 5849 'uploader_id': '@blanktv',
5850 'uploader': 'BlankTV',
5851 'uploader_url': 'https://www.youtube.com/@blanktv',
a6213a49 5852 },
5853 'playlist_mincount': 1000,
976ae3ea 5854 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5855 }, {
5856 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5857 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5858 'info_dict': {
5859 'title': 'Data Analysis with Dr Mike Pound',
5860 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
a6213a49 5861 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5862 'tags': [],
5863 'view_count': int,
5864 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
7666b936 5865 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
976ae3ea 5866 'channel': 'Computerphile',
c26f9b99 5867 'availability': 'public',
6141346d 5868 'modified_date': '20190712',
7666b936 5869 'uploader_id': '@Computerphile',
5870 'uploader': 'Computerphile',
5871 'uploader_url': 'https://www.youtube.com/@Computerphile',
a6213a49 5872 },
5873 'playlist_mincount': 11,
5874 }, {
5875 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5876 'only_matching': True,
5877 }, {
5878 'note': 'Playlist URL that does not actually serve a playlist',
5879 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5880 'info_dict': {
5881 'id': 'FqZTN594JQw',
5882 'ext': 'webm',
5883 'title': "Smiley's People 01 detective, Adventure Series, Action",
a6213a49 5884 'upload_date': '20150526',
5885 'license': 'Standard YouTube License',
5886 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5887 'categories': ['People & Blogs'],
5888 'tags': list,
5889 'view_count': int,
5890 'like_count': int,
a6213a49 5891 },
5892 'params': {
5893 'skip_download': True,
5894 },
5895 'skip': 'This video is not available.',
5896 'add_ie': [YoutubeIE.ie_key()],
5897 }, {
5898 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5899 'only_matching': True,
5900 }, {
5901 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5902 'only_matching': True,
5903 }, {
5904 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5905 'info_dict': {
14a14335 5906 'id': 'hGkQjiJLjWQ', # This will keep changing
a6213a49 5907 'ext': 'mp4',
976ae3ea 5908 'title': str,
a6213a49 5909 'upload_date': r're:\d{8}',
976ae3ea 5910 'description': str,
a6213a49 5911 'categories': ['News & Politics'],
5912 'tags': list,
5913 'like_count': int,
86973308 5914 'release_timestamp': int,
976ae3ea 5915 'channel': 'Sky News',
5916 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5917 'age_limit': 0,
5918 'view_count': int,
86973308 5919 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5920 'playable_in_embed': True,
86973308 5921 'release_date': r're:\d+',
976ae3ea 5922 'availability': 'public',
5923 'live_status': 'is_live',
5924 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5925 'channel_follower_count': int,
5926 'concurrent_view_count': int,
7666b936 5927 'uploader_url': 'https://www.youtube.com/@SkyNews',
5928 'uploader_id': '@SkyNews',
5929 'uploader': 'Sky News',
8213ce28 5930 'channel_is_verified': True,
a6213a49 5931 },
5932 'params': {
5933 'skip_download': True,
5934 },
976ae3ea 5935 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5936 }, {
5937 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5938 'info_dict': {
5939 'id': 'a48o2S1cPoo',
5940 'ext': 'mp4',
5941 'title': 'The Young Turks - Live Main Show',
a6213a49 5942 'upload_date': '20150715',
5943 'license': 'Standard YouTube License',
5944 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5945 'categories': ['News & Politics'],
5946 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5947 'like_count': int,
a6213a49 5948 },
5949 'params': {
5950 'skip_download': True,
5951 },
5952 'only_matching': True,
5953 }, {
5954 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5955 'only_matching': True,
5956 }, {
5957 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5958 'only_matching': True,
5959 }, {
5960 'note': 'A channel that is not live. Should raise error',
5961 'url': 'https://www.youtube.com/user/numberphile/live',
5962 'only_matching': True,
5963 }, {
5964 'url': 'https://www.youtube.com/feed/trending',
5965 'only_matching': True,
5966 }, {
5967 'url': 'https://www.youtube.com/feed/library',
5968 'only_matching': True,
5969 }, {
5970 'url': 'https://www.youtube.com/feed/history',
5971 'only_matching': True,
5972 }, {
5973 'url': 'https://www.youtube.com/feed/subscriptions',
5974 'only_matching': True,
5975 }, {
5976 'url': 'https://www.youtube.com/feed/watch_later',
5977 'only_matching': True,
5978 }, {
5979 'note': 'Recommended - redirects to home page.',
5980 'url': 'https://www.youtube.com/feed/recommended',
5981 'only_matching': True,
5982 }, {
5983 'note': 'inline playlist with not always working continuations',
5984 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5985 'only_matching': True,
5986 }, {
5987 'url': 'https://www.youtube.com/course',
5988 'only_matching': True,
5989 }, {
5990 'url': 'https://www.youtube.com/zsecurity',
5991 'only_matching': True,
5992 }, {
5993 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5994 'only_matching': True,
5995 }, {
5996 'url': 'https://www.youtube.com/TheYoungTurks/live',
5997 'only_matching': True,
5998 }, {
5999 'url': 'https://www.youtube.com/hashtag/cctv9',
6000 'info_dict': {
6001 'id': 'cctv9',
8828f457 6002 'title': 'cctv9 - All',
976ae3ea 6003 'tags': [],
a6213a49 6004 },
4dc23a80 6005 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 6006 }, {
6007 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6008 'only_matching': True,
6009 }, {
6010 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6011 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
add96eb9 6012 'only_matching': True,
a6213a49 6013 }, {
6014 'note': '/browse/ should redirect to /channel/',
6015 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
add96eb9 6016 'only_matching': True,
a6213a49 6017 }, {
6018 'note': 'VLPL, should redirect to playlist?list=PL...',
6019 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6020 'info_dict': {
6021 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
a6213a49 6022 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
12a1b225 6023 'title': 'NCS : All Releases 💿',
7666b936 6024 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
976ae3ea 6025 'modified_date': r're:\d{8}',
6026 'view_count': int,
6027 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6028 'tags': [],
6029 'channel': 'NoCopyrightSounds',
c26f9b99 6030 'availability': 'public',
7666b936 6031 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6032 'uploader': 'NoCopyrightSounds',
6033 'uploader_id': '@NoCopyrightSounds',
a6213a49 6034 },
6035 'playlist_mincount': 166,
7666b936 6036 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
a6213a49 6037 }, {
7666b936 6038 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
a6213a49 6039 'note': 'Topic, should redirect to playlist?list=UU...',
6040 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6041 'info_dict': {
6042 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6043 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6044 'tags': [],
6045 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6046 'channel': 'Royalty Free Music - Topic',
6047 'view_count': int,
6048 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
976ae3ea 6049 'modified_date': r're:\d{8}',
976ae3ea 6050 'description': '',
c26f9b99 6051 'availability': 'public',
7666b936 6052 'uploader': 'Royalty Free Music - Topic',
a6213a49 6053 },
a6213a49 6054 'playlist_mincount': 101,
7666b936 6055 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 6056 }, {
86973308
M
6057 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6058 # Treat as a general feed
a6213a49 6059 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6060 'info_dict': {
6061 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6062 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 6063 'tags': [],
a6213a49 6064 },
a6213a49 6065 'playlist_mincount': 9,
6066 }, {
6067 'note': 'Youtube music Album',
6068 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6069 'info_dict': {
6070 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6071 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 6072 'tags': [],
6073 'view_count': int,
6074 'description': '',
6075 'availability': 'unlisted',
6076 'modified_date': r're:\d{8}',
a6213a49 6077 },
6078 'playlist_count': 50,
7666b936 6079 'expected_warnings': ['YouTube Music is not directly supported'],
a6213a49 6080 }, {
6081 'note': 'unlisted single video playlist',
6082 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6083 'info_dict': {
a6213a49 6084 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6085 'title': 'yt-dlp unlisted playlist test',
976ae3ea 6086 'availability': 'unlisted',
6087 'tags': [],
12a1b225 6088 'modified_date': '20220418',
976ae3ea 6089 'channel': 'colethedj',
6090 'view_count': int,
6091 'description': '',
976ae3ea 6092 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6093 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
7666b936 6094 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6095 'uploader_id': '@colethedj1894',
6096 'uploader': 'colethedj',
a6213a49 6097 },
93e12ed7 6098 'playlist': [{
6099 'info_dict': {
6100 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6101 'id': 'BaW_jenozKc',
6102 '_type': 'url',
6103 'ie_key': 'Youtube',
6104 'duration': 10,
6105 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6106 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6107 'view_count': int,
6108 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6109 'channel': 'Philipp Hagemeister',
6110 'uploader_id': '@PhilippHagemeister',
6111 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6112 'uploader': 'Philipp Hagemeister',
add96eb9 6113 },
93e12ed7 6114 }],
a6213a49 6115 'playlist_count': 1,
93e12ed7 6116 'params': {'extract_flat': True},
a6213a49 6117 }, {
6118 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6119 'url': 'https://www.youtube.com/feed/recommended',
6120 'info_dict': {
6121 'id': 'recommended',
6122 'title': 'recommended',
6c73052c 6123 'tags': [],
a6213a49 6124 },
6125 'playlist_mincount': 50,
6126 'params': {
6127 'skip_download': True,
add96eb9 6128 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6129 },
6130 }, {
6131 'note': 'API Fallback: /videos tab, sorted by oldest first',
6132 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6133 'info_dict': {
6134 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6135 'title': 'Cody\'sLab - Videos',
6136 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
976ae3ea 6137 'channel': 'Cody\'sLab',
6138 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6139 'tags': [],
6140 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
add96eb9 6141 'channel_follower_count': int,
a6213a49 6142 },
6143 'playlist_mincount': 650,
6144 'params': {
6145 'skip_download': True,
add96eb9 6146 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6147 },
86973308 6148 'skip': 'Query for sorting no longer works',
a6213a49 6149 }, {
6150 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6151 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6152 'info_dict': {
6153 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6154 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6155 'modified_date': r're:\d{8}',
6156 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6157 'description': '',
6158 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6159 'tags': [],
6160 'channel': 'Royalty Free Music - Topic',
6161 'view_count': int,
c26f9b99 6162 'availability': 'public',
7666b936 6163 'uploader': 'Royalty Free Music - Topic',
a6213a49 6164 },
a6213a49 6165 'playlist_mincount': 101,
6166 'params': {
6167 'skip_download': True,
add96eb9 6168 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
a6213a49 6169 },
7666b936 6170 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
7c219ea6 6171 }, {
6172 'note': 'non-standard redirect to regional channel',
6173 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
add96eb9 6174 'only_matching': True,
61d3665d 6175 }, {
6176 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6177 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6178 'info_dict': {
6179 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6180 'modified_date': '20220407',
6181 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6182 'tags': [],
61d3665d 6183 'availability': 'unlisted',
6184 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6185 'channel': 'pukkandan',
6186 'description': 'Test for collaborative playlist',
6187 'title': 'yt-dlp test - collaborative playlist',
12a1b225 6188 'view_count': int,
7666b936 6189 'uploader_url': 'https://www.youtube.com/@pukkandan',
6190 'uploader_id': '@pukkandan',
6191 'uploader': 'pukkandan',
61d3665d 6192 },
add96eb9 6193 'playlist_mincount': 2,
c26f9b99 6194 }, {
6195 'note': 'translated tab name',
6196 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6197 'info_dict': {
6198 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6199 'tags': [],
c26f9b99 6200 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 6201 'description': 'test description',
c26f9b99 6202 'title': 'cole-dlp-test-acc - 再生リスト',
c26f9b99 6203 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6204 'channel': 'cole-dlp-test-acc',
7666b936 6205 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6206 'uploader_id': '@coletdjnz',
6207 'uploader': 'cole-dlp-test-acc',
c26f9b99 6208 },
6209 'playlist_mincount': 1,
6210 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6211 'expected_warnings': ['Preferring "ja"'],
6212 }, {
6213 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6214 'note': 'preferred lang set with playlist with translated video titles',
6215 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6216 'info_dict': {
6217 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6218 'tags': [],
6219 'view_count': int,
6220 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
c26f9b99 6221 'channel': 'cole-dlp-test-acc',
6222 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6223 'description': 'test',
c26f9b99 6224 'title': 'dlp test playlist',
6225 'availability': 'public',
7666b936 6226 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6227 'uploader_id': '@coletdjnz',
6228 'uploader': 'cole-dlp-test-acc',
c26f9b99 6229 },
6230 'playlist_mincount': 1,
6231 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6232 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 6233 }, {
6234 # shorts audio pivot for 2GtVksBMYFM.
6235 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6236 'info_dict': {
6237 'id': 'sfv_audio_pivot',
6238 'title': 'sfv_audio_pivot',
6239 'tags': [],
6240 },
6241 'playlist_mincount': 50,
6242
86973308
M
6243 }, {
6244 # Channel with a real live tab (not to be mistaken with streams tab)
6245 # Do not treat like it should redirect to live stream
6246 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6247 'info_dict': {
6248 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6249 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6250 'tags': [],
6251 },
6252 'playlist_mincount': 20,
6253 }, {
6254 # Tab name is not the same as tab id
6255 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6256 'info_dict': {
6257 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6258 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6259 'tags': [],
6260 },
6261 'playlist_mincount': 8,
6262 }, {
6263 # Home tab id is literally home. Not to get mistaken with featured
6264 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6265 'info_dict': {
6266 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6267 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6268 'tags': [],
6269 },
6270 'playlist_mincount': 8,
6271 }, {
6272 # Should get three playlists for videos, shorts and streams tabs
6273 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6274 'info_dict': {
6275 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 6276 'title': 'Polka Ch. 尾丸ポルカ',
6277 'channel_follower_count': int,
6278 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6279 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
8828f457 6280 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
bd7e919a 6281 'channel': 'Polka Ch. 尾丸ポルカ',
6282 'tags': 'count:35',
7666b936 6283 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6284 'uploader': 'Polka Ch. 尾丸ポルカ',
6285 'uploader_id': '@OmaruPolka',
8828f457 6286 'channel_is_verified': True,
86973308
M
6287 },
6288 'playlist_count': 3,
6289 }, {
6290 # Shorts tab with channel with handle
7666b936 6291 # TODO: fix channel description
86973308
M
6292 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6293 'info_dict': {
6294 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6295 'title': 'Not Just Bikes - Shorts',
8828f457 6296 'tags': 'count:10',
86973308 6297 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
8828f457 6298 'description': 'md5:5e82545b3a041345927a92d0585df247',
86973308 6299 'channel_follower_count': int,
86973308 6300 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
86973308 6301 'channel': 'Not Just Bikes',
7666b936 6302 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6303 'uploader': 'Not Just Bikes',
6304 'uploader_id': '@NotJustBikes',
8828f457 6305 'channel_is_verified': True,
86973308
M
6306 },
6307 'playlist_mincount': 10,
6308 }, {
6309 # Streams tab
6310 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6311 'info_dict': {
6312 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6313 'title': '中村悠一 - Live',
6314 'tags': 'count:7',
6315 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6316 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
86973308 6317 'channel': '中村悠一',
86973308 6318 'channel_follower_count': int,
86973308 6319 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
7666b936 6320 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6321 'uploader_id': '@Yuichi-Nakamura',
6322 'uploader': '中村悠一',
86973308
M
6323 },
6324 'playlist_mincount': 60,
6325 }, {
6326 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6327 # See test_youtube_lists
6328 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6329 'only_matching': True,
6330 }, {
6331 # No uploads and no UCID given. Should fail with no uploads error
6332 # See test_youtube_lists
6333 'url': 'https://www.youtube.com/news',
add96eb9 6334 'only_matching': True,
86973308
M
6335 }, {
6336 # No videos tab but has a shorts tab
6337 'url': 'https://www.youtube.com/c/TKFShorts',
6338 'info_dict': {
6339 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6340 'title': 'Shorts Break - Shorts',
7666b936 6341 'tags': 'count:48',
86973308
M
6342 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6343 'channel': 'Shorts Break',
7666b936 6344 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
86973308 6345 'channel_follower_count': int,
86973308 6346 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
7666b936 6347 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6348 'uploader': 'Shorts Break',
6349 'uploader_id': '@ShortsBreak_Official',
86973308
M
6350 },
6351 'playlist_mincount': 30,
6352 }, {
6353 # Trending Now Tab. tab id is empty
6354 'url': 'https://www.youtube.com/feed/trending',
6355 'info_dict': {
6356 'id': 'trending',
6357 'title': 'trending - Now',
6358 'tags': [],
6359 },
6360 'playlist_mincount': 30,
6361 }, {
6362 # Trending Gaming Tab. tab id is empty
6363 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6364 'info_dict': {
6365 'id': 'trending',
6366 'title': 'trending - Gaming',
6367 'tags': [],
6368 },
6369 'playlist_mincount': 30,
4dc23a80
M
6370 }, {
6371 # Shorts url result in shorts tab
7666b936 6372 # TODO: Fix channel id extraction
4dc23a80
M
6373 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6374 'info_dict': {
6375 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6376 'title': 'cole-dlp-test-acc - Shorts',
4dc23a80 6377 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6378 'description': 'test description',
6379 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6380 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6381 'tags': [],
7666b936 6382 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6383 'uploader_id': '@coletdjnz',
4dc23a80 6384 'uploader': 'cole-dlp-test-acc',
4dc23a80
M
6385 },
6386 'playlist': [{
6387 'info_dict': {
7666b936 6388 # Channel data is not currently available for short renderers (as of 2023-03-01)
4dc23a80
M
6389 '_type': 'url',
6390 'ie_key': 'Youtube',
6391 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6392 'id': 'sSM9J5YH_60',
4dc23a80 6393 'title': 'SHORT short',
4dc23a80
M
6394 'view_count': int,
6395 'thumbnails': list,
add96eb9 6396 },
4dc23a80
M
6397 }],
6398 'params': {'extract_flat': True},
6399 }, {
6400 # Live video status should be extracted
6401 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6402 'info_dict': {
6403 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
add96eb9 6404 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6405 'tags': [],
4dc23a80
M
6406 },
6407 'playlist': [{
6408 'info_dict': {
6409 '_type': 'url',
6410 'ie_key': 'Youtube',
6411 'url': 'startswith:https://www.youtube.com/watch?v=',
6412 'id': str,
6413 'title': str,
6414 'live_status': 'is_live',
6415 'channel_id': str,
6416 'channel_url': str,
6417 'concurrent_view_count': int,
6418 'channel': str,
93e12ed7 6419 'uploader': str,
6420 'uploader_url': str,
14a14335 6421 'uploader_id': str,
8213ce28 6422 'channel_is_verified': bool, # this will keep changing
add96eb9 6423 },
4dc23a80 6424 }],
c7335551 6425 'params': {'extract_flat': True, 'playlist_items': '1'},
add96eb9 6426 'playlist_mincount': 1,
c7335551
M
6427 }, {
6428 # Channel renderer metadata. Contains number of videos on the channel
6429 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6430 'info_dict': {
6431 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6432 'title': 'cole-dlp-test-acc - Channels',
c7335551
M
6433 'channel': 'cole-dlp-test-acc',
6434 'description': 'test description',
6435 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6436 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6437 'tags': [],
7666b936 6438 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6439 'uploader_id': '@coletdjnz',
c7335551 6440 'uploader': 'cole-dlp-test-acc',
c7335551
M
6441 },
6442 'playlist': [{
6443 'info_dict': {
6444 '_type': 'url',
6445 'ie_key': 'YoutubeTab',
6446 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6447 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6448 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6449 'title': 'PewDiePie',
6450 'channel': 'PewDiePie',
6451 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6452 'thumbnails': list,
6453 'channel_follower_count': int,
7666b936 6454 'playlist_count': int,
6455 'uploader': 'PewDiePie',
6456 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6457 'uploader_id': '@PewDiePie',
8213ce28 6458 'channel_is_verified': True,
add96eb9 6459 },
c7335551
M
6460 }],
6461 'params': {'extract_flat': True},
31e18355 6462 }, {
6463 'url': 'https://www.youtube.com/@3blue1brown/about',
6464 'info_dict': {
8828f457 6465 'id': '@3blue1brown',
31e18355 6466 'tags': ['Mathematics'],
8828f457 6467 'title': '3Blue1Brown',
31e18355 6468 'channel_follower_count': int,
6469 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
31e18355 6470 'channel': '3Blue1Brown',
31e18355 6471 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
8828f457 6472 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
7666b936 6473 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6474 'uploader_id': '@3blue1brown',
6475 'uploader': '3Blue1Brown',
8213ce28 6476 'channel_is_verified': True,
31e18355 6477 },
6478 'playlist_count': 0,
447afb9e 6479 }, {
6480 # Podcasts tab, with rich entry playlistRenderers
6481 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6482 'info_dict': {
6483 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6484 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6485 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6486 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6487 'title': '99 Percent Invisible - Podcasts',
6488 'uploader': '99 Percent Invisible',
6489 'channel_follower_count': int,
6490 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6491 'tags': [],
6492 'channel': '99 Percent Invisible',
6493 'uploader_id': '@99percentinvisiblepodcast',
6494 },
8828f457 6495 'playlist_count': 0,
447afb9e 6496 }, {
6497 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6498 'url': 'https://www.youtube.com/@AHimitsu/releases',
6499 'info_dict': {
6500 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6501 'channel': 'A Himitsu',
6502 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6503 'title': 'A Himitsu - Releases',
6504 'uploader_id': '@AHimitsu',
6505 'uploader': 'A Himitsu',
6506 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
8828f457 6507 'tags': 'count:12',
447afb9e 6508 'description': 'I make music',
6509 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6510 'channel_follower_count': int,
8213ce28 6511 'channel_is_verified': True,
447afb9e 6512 },
6513 'playlist_mincount': 10,
fcbc9ed7 6514 }, {
6515 # Playlist with only shorts, shown as reel renderers
6516 # FIXME: future: YouTube currently doesn't give continuation for this,
6517 # may do in future.
6518 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6519 'info_dict': {
6520 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6521 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6522 'view_count': int,
6523 'uploader_id': '@BangyShorts',
6524 'description': '',
6525 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6526 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6527 'channel': 'Bangy Shorts',
6528 'uploader': 'Bangy Shorts',
6529 'tags': [],
6530 'availability': 'public',
8828f457 6531 'modified_date': r're:\d{8}',
fcbc9ed7 6532 'title': 'Uploads from Bangy Shorts',
6533 },
6534 'playlist_mincount': 100,
6535 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
8828f457 6536 }, {
6537 'note': 'Tags containing spaces',
6538 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6539 'playlist_count': 3,
6540 'info_dict': {
6541 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6542 'channel': 'Markiplier',
6543 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6544 'title': 'Markiplier',
6545 'channel_follower_count': int,
6546 'description': 'md5:0c010910558658824402809750dc5d97',
6547 'uploader_id': '@markiplier',
6548 'uploader_url': 'https://www.youtube.com/@markiplier',
6549 'uploader': 'Markiplier',
6550 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6551 'channel_is_verified': True,
6552 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6553 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6554 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6555 'mark fischbach'],
6556 },
a6213a49 6557 }]
6558
6559 @classmethod
6560 def suitable(cls, url):
86e5f3ed 6561 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6562
86973308
M
6563 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6564
6565 def _get_url_mobj(self, url):
6566 mobj = self._URL_RE.match(url).groupdict()
6567 mobj.update((k, '') for k, v in mobj.items() if v is None)
6568 return mobj
6569
6570 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6571 tab_name = (tab.get('title') or '').lower()
6572 tab_url = urljoin(base_url, traverse_obj(
6573 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6574
bd7e919a 6575 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6576 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6577 if tab_id:
bd7e919a 6578 return {
6579 'TAB_ID_SPONSORSHIPS': 'membership',
6580 }.get(tab_id, tab_id), tab_name
86973308
M
6581
6582 # Fallback to tab name if we cannot get the tab id.
6583 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6584 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6585 if tab_name:
6586 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6587 return {
6588 'home': 'featured',
6589 'live': 'streams',
6590 }.get(tab_name, tab_name), tab_name
6591
6592 def _has_tab(self, tabs, tab_id):
6593 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6594
044886c2 6595 def _empty_playlist(self, item_id, data):
6596 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6597
182bda88 6598 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6599 def _real_extract(self, url, smuggled_data):
cd7c66cf 6600 item_id = self._match_id(url)
14f25df2 6601 url = urllib.parse.urlunparse(
6602 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6603 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6604
86973308
M
6605 mobj = self._get_url_mobj(url)
6606 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6607 if is_channel and smuggled_data.get('is_music_url'):
6608 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6609 return self.url_result(
6610 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6611 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6612 mdata = self._extract_tab_endpoint(
6613 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6614 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6615 get_all=False, expected_type=str)
6616 if not murl:
6617 raise ExtractorError('Failed to resolve album to playlist')
6618 return self.url_result(murl, YoutubeTabIE)
6619 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6620 return self.url_result(
6621 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6622
6623 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6624 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6625 url = f'{pre}/videos{post}'
5b28cef7 6626 if smuggled_data.get('is_music_url'):
6627 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
cd7c66cf 6628
6629 # Handle both video/playlist URLs
201c1459 6630 qs = parse_qs(url)
add96eb9 6631 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
fe03a6cd 6632 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6633 if not playlist_id:
fe03a6cd 6634 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6635 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6636 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6637 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6638 return self.url_result(
6639 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6640
86973308
M
6641 if not self._yes_playlist(playlist_id, video_id):
6642 return self.url_result(
6643 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6644
bd7e919a 6645 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6646
7c219ea6 6647 # YouTube may provide a non-standard redirect to the regional channel
6648 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6649 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6650 redirect_url = traverse_obj(
6651 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6652 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6653 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6654 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6655 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6656
bd7e919a 6657 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6658 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6659 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6660 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6661 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6662
044886c2 6663 # /about is no longer a tab
6664 if original_tab_id == 'about':
6665 return self._empty_playlist(item_id, data)
6666
86973308 6667 if not original_tab_id and selected_tab_name:
bd7e919a 6668 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6669 'To download only the videos in a specific tab, pass the tab\'s URL')
6670 if self._has_tab(tabs, 'streams'):
bd7e919a 6671 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6672 if self._has_tab(tabs, 'shorts'):
bd7e919a 6673 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6674 # XXX: Members-only tab should also be extracted
6675
bd7e919a 6676 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6677 # Channel does not have streams, shorts or videos tabs
6678 if item_id[:2] != 'UC':
044886c2 6679 return self._empty_playlist(item_id, data)
86973308
M
6680
6681 # Topic channels don't have /videos. Use the equivalent playlist instead
6682 pl_id = f'UU{item_id[2:]}'
6683 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6684 try:
6685 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6686 except ExtractorError:
044886c2 6687 return self._empty_playlist(item_id, data)
64f36541 6688 else:
86973308
M
6689 item_id, url = pl_id, pl_url
6690 self.to_screen(
6691 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6692
bd7e919a 6693 elif extra_tabs and selected_tab_id != 'videos':
86973308 6694 # When there are shorts/live tabs but not videos tab
bd7e919a 6695 url, data = f'{pre}{post}', None
86973308
M
6696
6697 elif (original_tab_id or 'videos') != selected_tab_id:
6698 if original_tab_id == 'live':
6699 # Live tab should have redirected to the video
6700 # Except in the case the channel has an actual live tab
6701 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6702 raise UserNotLive(video_id=item_id)
86973308
M
6703 elif selected_tab_name:
6704 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6705
6706 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6707 url = f'{pre}{post}'
18db7548 6708
358de58c 6709 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6710 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6711 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6712 self._extract_and_report_alerts(data, only_once=True)
86973308 6713
bd7e919a 6714 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6715 if tabs:
bd7e919a 6716 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6717 entries[0].update({
86973308
M
6718 'extractor_key': YoutubeTabIE.ie_key(),
6719 'extractor': YoutubeTabIE.IE_NAME,
6720 'webpage_url': url,
6721 })
bd7e919a 6722 if self.get_param('playlist_items') == '0':
6723 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6724 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6725 entries.extend(map(self._real_extract, extra_tabs))
6726
6727 if len(entries) == 1:
6728 return entries[0]
6729 elif entries:
6730 metadata = self._extract_metadata_from_tabs(item_id, data)
6731 uploads_url = 'the Uploads (UU) playlist URL'
6732 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6733 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6734 self.to_screen(
6735 'Downloading as multiple playlists, separated by tabs. '
6736 f'To download as a single playlist instead, pass {uploads_url}')
6737 return self.playlist_result(entries, item_id, **metadata)
6738
6739 # Inline playlist
37e57a9f 6740 playlist = traverse_obj(
6741 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6742 if playlist:
ac56cf38 6743 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6744
37e57a9f 6745 video_id = traverse_obj(
6746 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6747 if video_id:
bd7e919a 6748 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6749 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6750 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6751
8bdd16b4 6752 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6753
c5e8d7af 6754
8bdd16b4 6755class YoutubePlaylistIE(InfoExtractor):
96565c7e 6756 IE_DESC = 'YouTube playlists'
8bdd16b4 6757 _VALID_URL = r'''(?x)(?:
6758 (?:https?://)?
6759 (?:\w+\.)?
6760 (?:
6761 (?:
6762 youtube(?:kids)?\.com|
add96eb9 6763 {invidious}
8bdd16b4 6764 )
6765 /.*?\?.*?\blist=
6766 )?
add96eb9 6767 (?P<id>{playlist_id})
6768 )'''.format(
6769 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6770 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6771 )
8bdd16b4 6772 IE_NAME = 'youtube:playlist'
cdc628a4 6773 _TESTS = [{
8bdd16b4 6774 'note': 'issue #673',
6775 'url': 'PLBB231211A4F62143',
cdc628a4 6776 'info_dict': {
8bdd16b4 6777 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6778 'id': 'PLBB231211A4F62143',
976ae3ea 6779 'uploader': 'Wickman',
7666b936 6780 'uploader_id': '@WickmanVT',
11f9be09 6781 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6782 'view_count': int,
7666b936 6783 'uploader_url': 'https://www.youtube.com/@WickmanVT',
976ae3ea 6784 'modified_date': r're:\d{8}',
6785 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6786 'channel': 'Wickman',
6787 'tags': [],
7666b936 6788 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
86973308 6789 'availability': 'public',
8bdd16b4 6790 },
6791 'playlist_mincount': 29,
6792 }, {
6793 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6794 'info_dict': {
6795 'title': 'YDL_safe_search',
6796 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6797 },
6798 'playlist_count': 2,
6799 'skip': 'This playlist is private',
9558dcec 6800 }, {
8bdd16b4 6801 'note': 'embedded',
6802 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6803 'playlist_count': 4,
9558dcec 6804 'info_dict': {
8bdd16b4 6805 'title': 'JODA15',
6806 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6807 'uploader': 'milan',
7666b936 6808 'uploader_id': '@milan5503',
976ae3ea 6809 'description': '',
6810 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6811 'tags': [],
6812 'modified_date': '20140919',
6813 'view_count': int,
6814 'channel': 'milan',
6815 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7666b936 6816 'uploader_url': 'https://www.youtube.com/@milan5503',
c26f9b99 6817 'availability': 'public',
976ae3ea 6818 },
b012271d 6819 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
cdc628a4 6820 }, {
8bdd16b4 6821 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6822 'playlist_mincount': 455,
8bdd16b4 6823 'info_dict': {
6824 'title': '2018 Chinese New Singles (11/6 updated)',
6825 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6826 'uploader': 'LBK',
7666b936 6827 'uploader_id': '@music_king',
11f9be09 6828 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6829 'channel': 'LBK',
6830 'view_count': int,
7666b936 6831 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
976ae3ea 6832 'tags': [],
7666b936 6833 'uploader_url': 'https://www.youtube.com/@music_king',
976ae3ea 6834 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6835 'modified_date': r're:\d{8}',
c26f9b99 6836 'availability': 'public',
976ae3ea 6837 },
6838 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6839 }, {
29f7c58a 6840 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6841 'only_matching': True,
6842 }, {
6843 # music album playlist
6844 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6845 'only_matching': True,
6846 }]
6847
6848 @classmethod
6849 def suitable(cls, url):
201c1459 6850 if YoutubeTabIE.suitable(url):
6851 return False
49a57e70 6852 from ..utils import parse_qs
201c1459 6853 qs = parse_qs(url)
6854 if qs.get('v', [None])[0]:
6855 return False
86e5f3ed 6856 return super().suitable(url)
29f7c58a 6857
6858 def _real_extract(self, url):
6859 playlist_id = self._match_id(url)
46953e7e 6860 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6861 url = update_url_query(
6862 'https://www.youtube.com/playlist',
6863 parse_qs(url) or {'list': playlist_id})
6864 if is_music_url:
6865 url = smuggle_url(url, {'is_music_url': True})
6866 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6867
6868
6869class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6870 IE_DESC = 'youtu.be'
add96eb9 6871 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
29f7c58a 6872 _TESTS = [{
8bdd16b4 6873 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6874 'info_dict': {
6875 'id': 'yeWKywCrFtk',
6876 'ext': 'mp4',
6877 'title': 'Small Scale Baler and Braiding Rugs',
6878 'uploader': 'Backus-Page House Museum',
7666b936 6879 'uploader_id': '@backuspagemuseum',
6880 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
8bdd16b4 6881 'upload_date': '20161008',
6882 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6883 'categories': ['Nonprofits & Activism'],
6884 'tags': list,
6885 'like_count': int,
976ae3ea 6886 'age_limit': 0,
6887 'playable_in_embed': True,
7666b936 6888 'thumbnail': r're:^https?://.*\.webp',
976ae3ea 6889 'channel': 'Backus-Page House Museum',
6890 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6891 'live_status': 'not_live',
6892 'view_count': int,
6893 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6894 'availability': 'public',
6895 'duration': 59,
12a1b225 6896 'comment_count': int,
add96eb9 6897 'channel_follower_count': int,
8bdd16b4 6898 },
6899 'params': {
6900 'noplaylist': True,
6901 'skip_download': True,
6902 },
39e7107d 6903 }, {
8bdd16b4 6904 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6905 'only_matching': True,
cdc628a4
PH
6906 }]
6907
8bdd16b4 6908 def _real_extract(self, url):
5ad28e7f 6909 mobj = self._match_valid_url(url)
29f7c58a 6910 video_id = mobj.group('id')
6911 playlist_id = mobj.group('playlist_id')
8bdd16b4 6912 return self.url_result(
29f7c58a 6913 update_url_query('https://www.youtube.com/watch', {
6914 'v': video_id,
6915 'list': playlist_id,
6916 'feature': 'youtu.be',
6917 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6918
6919
b6ce9bb0 6920class YoutubeLivestreamEmbedIE(InfoExtractor):
6921 IE_DESC = 'YouTube livestream embeds'
6922 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6923 _TESTS = [{
6924 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6925 'only_matching': True,
6926 }]
6927
6928 def _real_extract(self, url):
6929 channel_id = self._match_id(url)
6930 return self.url_result(
6931 f'https://www.youtube.com/channel/{channel_id}/live',
6932 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6933
6934
8bdd16b4 6935class YoutubeYtUserIE(InfoExtractor):
96565c7e 6936 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6937 IE_NAME = 'youtube:user'
8bdd16b4 6938 _VALID_URL = r'ytuser:(?P<id>.+)'
6939 _TESTS = [{
6940 'url': 'ytuser:phihag',
6941 'only_matching': True,
6942 }]
6943
6944 def _real_extract(self, url):
6945 user_id = self._match_id(url)
08270da5 6946 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6947
b05654f0 6948
3d3dddc9 6949class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6950 IE_NAME = 'youtube:favorites'
96565c7e 6951 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6952 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6953 _LOGIN_REQUIRED = True
6954 _TESTS = [{
6955 'url': ':ytfav',
6956 'only_matching': True,
6957 }, {
6958 'url': ':ytfavorites',
6959 'only_matching': True,
6960 }]
6961
6962 def _real_extract(self, url):
6963 return self.url_result(
6964 'https://www.youtube.com/playlist?list=LL',
6965 ie=YoutubeTabIE.ie_key())
6966
6967
ca5300c7 6968class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6969 IE_NAME = 'youtube:notif'
6970 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6971 _VALID_URL = r':ytnotif(?:ication)?s?'
6972 _LOGIN_REQUIRED = True
6973 _TESTS = [{
6974 'url': ':ytnotif',
6975 'only_matching': True,
6976 }, {
6977 'url': ':ytnotifications',
6978 'only_matching': True,
6979 }]
6980
6981 def _extract_notification_menu(self, response, continuation_list):
6982 notification_list = traverse_obj(
6983 response,
6984 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6985 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6986 expected_type=list) or []
6987 continuation_list[0] = None
6988 for item in notification_list:
6989 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6990 if entry:
6991 yield entry
6992 continuation = item.get('continuationItemRenderer')
6993 if continuation:
6994 continuation_list[0] = continuation
6995
6996 def _extract_notification_renderer(self, notification):
6997 video_id = traverse_obj(
6998 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6999 url = f'https://www.youtube.com/watch?v={video_id}'
7000 channel_id = None
7001 if not video_id:
7002 browse_ep = traverse_obj(
7003 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7666b936 7004 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
ca5300c7 7005 post_id = self._search_regex(
7006 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7007 'post id', default=None)
7008 if not channel_id or not post_id:
7009 return
7010 # The direct /post url redirects to this in the browser
7011 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7012
7013 channel = traverse_obj(
7014 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7015 expected_type=str)
c7a7baaa 7016 notification_title = self._get_text(notification, 'shortMessage')
7017 if notification_title:
7018 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7019 # TODO: handle recommended videos
ca5300c7 7020 title = self._search_regex(
c7a7baaa 7021 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 7022 'video title', default=None)
5225df50 7023 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7024 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7025 else None)
ca5300c7 7026 return {
7027 '_type': 'url',
7028 'url': url,
7029 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7030 'video_id': video_id,
7031 'title': title,
7032 'channel_id': channel_id,
7033 'channel': channel,
7666b936 7034 'uploader': channel,
ca5300c7 7035 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 7036 'timestamp': timestamp,
ca5300c7 7037 }
7038
7039 def _notification_menu_entries(self, ytcfg):
7040 continuation_list = [None]
7041 response = None
7042 for page in itertools.count(1):
7043 ctoken = traverse_obj(
7044 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7045 response = self._extract_response(
7046 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7047 ep='notification/get_notification_menu', check_get_keys='actions',
7048 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7049 yield from self._extract_notification_menu(response, continuation_list)
7050 if not continuation_list[0]:
7051 break
7052
7053 def _real_extract(self, url):
7054 display_id = 'notifications'
7055 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7056 self._report_playlist_authcheck(ytcfg)
7057 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7058
7059
a6213a49 7060class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7061 IE_DESC = 'YouTube search'
78caa52a 7062 IE_NAME = 'youtube:search'
b05654f0 7063 _SEARCH_KEY = 'ytsearch'
17d248a5 7064 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
84bbc545 7065 _TESTS = [{
7066 'url': 'ytsearch5:youtube-dl test video',
7067 'playlist_count': 5,
7068 'info_dict': {
7069 'id': 'youtube-dl test video',
7070 'title': 'youtube-dl test video',
add96eb9 7071 },
17d248a5
AB
7072 }, {
7073 'note': 'Suicide/self-harm search warning',
7074 'url': 'ytsearch1:i hate myself and i wanna die',
7075 'playlist_count': 1,
7076 'info_dict': {
7077 'id': 'i hate myself and i wanna die',
7078 'title': 'i hate myself and i wanna die',
add96eb9 7079 },
84bbc545 7080 }]
b05654f0 7081
a61fd4cf 7082
5f7cb91a 7083class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 7084 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 7085 _SEARCH_KEY = 'ytsearchdate'
a6213a49 7086 IE_DESC = 'YouTube search, newest videos first'
17d248a5 7087 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
84bbc545 7088 _TESTS = [{
7089 'url': 'ytsearchdate5:youtube-dl test video',
7090 'playlist_count': 5,
7091 'info_dict': {
7092 'id': 'youtube-dl test video',
7093 'title': 'youtube-dl test video',
add96eb9 7094 },
84bbc545 7095 }]
75dff0ee 7096
c9ae7b95 7097
a6213a49 7098class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 7099 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 7100 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 7101 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 7102 _TESTS = [{
7103 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7104 'playlist_mincount': 5,
7105 'info_dict': {
11f9be09 7106 'id': 'youtube-dl test video',
3462ffa8 7107 'title': 'youtube-dl test video',
add96eb9 7108 },
a61fd4cf 7109 }, {
7110 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7111 'playlist_mincount': 5,
7112 'info_dict': {
7113 'id': 'python',
7114 'title': 'python',
add96eb9 7115 },
ad210f4f 7116 }, {
7117 'url': 'https://www.youtube.com/results?search_query=%23cats',
7118 'playlist_mincount': 1,
7119 'info_dict': {
7120 'id': '#cats',
7121 'title': '#cats',
12a1b225
A
7122 # The test suite does not have support for nested playlists
7123 # 'entries': [{
7124 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7125 # 'title': '#cats',
7126 # }],
ad210f4f 7127 },
c7335551
M
7128 }, {
7129 # Channel results
7130 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7131 'info_dict': {
7132 'id': 'kurzgesagt',
7133 'title': 'kurzgesagt',
7134 },
7135 'playlist': [{
7136 'info_dict': {
7137 '_type': 'url',
7138 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7139 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7140 'ie_key': 'YoutubeTab',
7141 'channel': 'Kurzgesagt – In a Nutshell',
7142 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7143 'title': 'Kurzgesagt – In a Nutshell',
7144 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
14a14335 7145 # No longer available for search as it is set to the handle.
7146 # 'playlist_count': int,
c7335551 7147 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7666b936 7148 'thumbnails': list,
7149 'uploader_id': '@kurzgesagt',
7150 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7151 'uploader': 'Kurzgesagt – In a Nutshell',
8213ce28 7152 'channel_is_verified': True,
14a14335 7153 'channel_follower_count': int,
add96eb9 7154 },
c7335551
M
7155 }],
7156 'params': {'extract_flat': True, 'playlist_items': '1'},
7157 'playlist_mincount': 1,
3462ffa8 7158 }, {
7159 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7160 'only_matching': True,
7161 }]
7162
7163 def _real_extract(self, url):
4dfbf869 7164 qs = parse_qs(url)
386e1dd9 7165 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 7166 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 7167
7168
16aa9ea4 7169class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 7170 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 7171 IE_NAME = 'youtube:music:search_url'
7172 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7173 _TESTS = [{
7174 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7175 'playlist_count': 16,
7176 'info_dict': {
7177 'id': 'royalty free music',
7178 'title': 'royalty free music',
add96eb9 7179 },
16aa9ea4 7180 }, {
7181 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7182 'playlist_mincount': 30,
7183 'info_dict': {
7184 'id': 'royalty free music - songs',
7185 'title': 'royalty free music - songs',
7186 },
add96eb9 7187 'params': {'extract_flat': 'in_playlist'},
16aa9ea4 7188 }, {
7189 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7190 'playlist_mincount': 30,
7191 'info_dict': {
7192 'id': 'royalty free music - community playlists',
7193 'title': 'royalty free music - community playlists',
7194 },
add96eb9 7195 'params': {'extract_flat': 'in_playlist'},
16aa9ea4 7196 }]
7197
7198 _SECTIONS = {
7199 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7200 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7201 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7202 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7203 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7204 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7205 }
7206
7207 def _real_extract(self, url):
7208 qs = parse_qs(url)
7209 query = (qs.get('search_query') or qs.get('q'))[0]
7210 params = qs.get('sp', (None,))[0]
7211 if params:
7212 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7213 else:
add96eb9 7214 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
16aa9ea4 7215 params = self._SECTIONS.get(section)
7216 if not params:
7217 section = None
7218 title = join_nonempty(query, section, delim=' - ')
af5c1c55 7219 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 7220
7221
182bda88 7222class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 7223 """
25f14e9f 7224 Base class for feed extractors
82d02080 7225 Subclasses must re-define the _FEED_NAME property.
d7ae0639 7226 """
b2e8bc1b 7227 _LOGIN_REQUIRED = True
82d02080 7228 _FEED_NAME = 'feeds'
a25bca9f 7229
7230 def _real_initialize(self):
7231 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 7232
82d02080 7233 @classproperty
add96eb9 7234 def IE_NAME(cls):
7235 return f'youtube:{cls._FEED_NAME}'
04cc9617 7236
3853309f 7237 def _real_extract(self, url):
3d3dddc9 7238 return self.url_result(
182bda88 7239 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
7240
7241
ef2f3c7f 7242class YoutubeWatchLaterIE(InfoExtractor):
7243 IE_NAME = 'youtube:watchlater'
96565c7e 7244 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 7245 _VALID_URL = r':ytwatchlater'
bc7a9cd8 7246 _TESTS = [{
8bdd16b4 7247 'url': ':ytwatchlater',
bc7a9cd8
S
7248 'only_matching': True,
7249 }]
25f14e9f
S
7250
7251 def _real_extract(self, url):
ef2f3c7f 7252 return self.url_result(
7253 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 7254
7255
25f14e9f 7256class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 7257 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 7258 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 7259 _FEED_NAME = 'recommended'
45db527f 7260 _LOGIN_REQUIRED = False
3d3dddc9 7261 _TESTS = [{
7262 'url': ':ytrec',
7263 'only_matching': True,
7264 }, {
7265 'url': ':ytrecommended',
7266 'only_matching': True,
7267 }, {
7268 'url': 'https://youtube.com',
7269 'only_matching': True,
7270 }]
1ed5b5c9 7271
1ed5b5c9 7272
25f14e9f 7273class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 7274 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 7275 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 7276 _FEED_NAME = 'subscriptions'
3d3dddc9 7277 _TESTS = [{
7278 'url': ':ytsubs',
7279 'only_matching': True,
7280 }, {
7281 'url': ':ytsubscriptions',
7282 'only_matching': True,
7283 }]
1ed5b5c9 7284
1ed5b5c9 7285
25f14e9f 7286class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 7287 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 7288 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 7289 _FEED_NAME = 'history'
3d3dddc9 7290 _TESTS = [{
7291 'url': ':ythistory',
7292 'only_matching': True,
7293 }]
1ed5b5c9
JMF
7294
7295
80eb0bd9 7296class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 7297 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 7298 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 7299 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 7300 _TESTS = [{
1dd18a88 7301 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 7302 'only_matching': True,
7303 }]
7304
7305 @staticmethod
7306 def _generate_audio_pivot_params(video_id):
7307 """
7308 Generates sfv_audio_pivot browse params for this video id
7309 """
7310 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7311 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7312
7313 def _real_extract(self, url):
7314 video_id = self._match_id(url)
7315 return self.url_result(
7316 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7317 ie=YoutubeTabIE)
7318
7319
15870e90
PH
7320class YoutubeTruncatedURLIE(InfoExtractor):
7321 IE_NAME = 'youtube:truncated_url'
7322 IE_DESC = False # Do not list
975d35db 7323 _VALID_URL = r'''(?x)
b95aab84
PH
7324 (?:https?://)?
7325 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7326 (?:watch\?(?:
c4808c60 7327 feature=[a-z_]+|
b95aab84
PH
7328 annotation_id=annotation_[^&]+|
7329 x-yt-cl=[0-9]+|
c1708b89 7330 hl=[^&]*|
287be8c6 7331 t=[0-9]+
b95aab84
PH
7332 )?
7333 |
7334 attribution_link\?a=[^&]+
7335 )
7336 $
975d35db 7337 '''
15870e90 7338
c4808c60 7339 _TESTS = [{
2d3d2997 7340 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 7341 'only_matching': True,
dc2fc736 7342 }, {
2d3d2997 7343 'url': 'https://www.youtube.com/watch?',
dc2fc736 7344 'only_matching': True,
b95aab84
PH
7345 }, {
7346 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7347 'only_matching': True,
7348 }, {
7349 'url': 'https://www.youtube.com/watch?feature=foo',
7350 'only_matching': True,
c1708b89
PH
7351 }, {
7352 'url': 'https://www.youtube.com/watch?hl=en-GB',
7353 'only_matching': True,
287be8c6
PH
7354 }, {
7355 'url': 'https://www.youtube.com/watch?t=2372',
7356 'only_matching': True,
c4808c60
PH
7357 }]
7358
15870e90
PH
7359 def _real_extract(self, url):
7360 raise ExtractorError(
78caa52a
PH
7361 'Did you forget to quote the URL? Remember that & is a meta '
7362 'character in most shells, so you want to put the URL in quotes, '
3867038a 7363 'like youtube-dl '
2d3d2997 7364 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 7365 ' or simply youtube-dl BaW_jenozKc .',
15870e90 7366 expected=True)
772fd5cc
PH
7367
7368
471d0367 7369class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 7370 IE_NAME = 'youtube:clip'
471d0367 7371 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7372 _TESTS = [{
7373 # FIXME: Other metadata should be extracted from the clip, not from the base video
7374 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7375 'info_dict': {
7376 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7377 'ext': 'mp4',
7378 'section_start': 29.0,
7379 'section_end': 39.7,
7380 'duration': 10.7,
12a1b225
A
7381 'age_limit': 0,
7382 'availability': 'public',
7383 'categories': ['Gaming'],
7384 'channel': 'Scott The Woz',
7385 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7386 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7387 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7388 'like_count': int,
7389 'playable_in_embed': True,
7390 'tags': 'count:17',
7391 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7392 'title': 'Mobile Games on Console - Scott The Woz',
7393 'upload_date': '20210920',
7394 'uploader': 'Scott The Woz',
7666b936 7395 'uploader_id': '@ScottTheWoz',
7396 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
12a1b225
A
7397 'view_count': int,
7398 'live_status': 'not_live',
7666b936 7399 'channel_follower_count': int,
7400 'chapters': 'count:20',
14a14335 7401 'comment_count': int,
7402 'heatmap': 'count:100',
add96eb9 7403 },
471d0367 7404 }]
3cd786db 7405
7406 def _real_extract(self, url):
471d0367 7407 clip_id = self._match_id(url)
7408 _, data = self._extract_webpage(url, clip_id)
7409
7410 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7411 if not video_id:
7412 raise ExtractorError('Unable to find video ID')
7413
7414 clip_data = traverse_obj(data, (
7415 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7416 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7417 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7418 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7419
7420 return {
7421 '_type': 'url_transparent',
7422 'url': f'https://www.youtube.com/watch?v={video_id}',
7423 'ie_key': YoutubeIE.ie_key(),
7424 'id': clip_id,
7425 'section_start': int(clip_data['startTimeMs']) / 1000,
7426 'section_end': int(clip_data['endTimeMs']) / 1000,
7427 }
3cd786db 7428
7429
b032ff0f 7430class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7431 IE_NAME = 'youtube:consent'
7432 IE_DESC = False # Do not list
7433 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7434 _TESTS = [{
7435 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7436 'info_dict': {
7437 'id': 'qVv6vCqciTM',
7438 'ext': 'mp4',
7439 'age_limit': 0,
7666b936 7440 'uploader_id': '@sana_natori',
b032ff0f 7441 'comment_count': int,
7442 'chapters': 'count:13',
7443 'upload_date': '20221223',
7444 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7445 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7666b936 7446 'uploader_url': 'https://www.youtube.com/@sana_natori',
b032ff0f 7447 'like_count': int,
7448 'release_date': '20221223',
7449 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7450 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7451 'view_count': int,
7452 'playable_in_embed': True,
7453 'duration': 4438,
7454 'availability': 'public',
7455 'channel_follower_count': int,
7456 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7457 'categories': ['Entertainment'],
7458 'live_status': 'was_live',
7459 'release_timestamp': 1671793345,
7460 'channel': 'さなちゃんねる',
7461 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7462 'uploader': 'さなちゃんねる',
8213ce28 7463 'channel_is_verified': True,
14a14335 7464 'heatmap': 'count:100',
b032ff0f 7465 },
7466 'add_ie': ['Youtube'],
7467 'params': {'skip_download': 'Youtube'},
7468 }]
7469
7470 def _real_extract(self, url):
7471 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7472 if not redirect_url:
7473 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7474 return self.url_result(redirect_url)
7475
7476
772fd5cc
PH
7477class YoutubeTruncatedIDIE(InfoExtractor):
7478 IE_NAME = 'youtube:truncated_id'
7479 IE_DESC = False # Do not list
b95aab84 7480 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
7481
7482 _TESTS = [{
7483 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7484 'only_matching': True,
7485 }]
7486
7487 def _real_extract(self, url):
7488 video_id = self._match_id(url)
7489 raise ExtractorError(
86e5f3ed 7490 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7491 expected=True)