]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[ie, cleanup] No `from` stdlib imports in extractors (#8978)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
fe93e2c4 5import datetime
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8828f457 14import shlex
46383212 15import sys
f8271158 16import threading
8a784c74 17import time
e0df6211 18import traceback
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
3d2623a8 25from ..networking.exceptions import HTTPError, network_exceptions
4bb4a188 26from ..utils import (
f8271158 27 NO_DEFAULT,
28 ExtractorError,
4d37720a 29 LazyList,
693f0600 30 UserNotLive,
720c3099 31 bug_reports_message,
82d02080 32 classproperty,
c5e8d7af 33 clean_html,
d92f5d5a 34 datetime_from_str,
11f9be09 35 dict_get,
a25a4243 36 filesize_from_tbr,
7a32c70d 37 filter_dict,
2d30521a 38 float_or_none,
11f9be09 39 format_field,
ff91cf74 40 get_first,
dd27fd17 41 int_or_none,
641ad5d8 42 is_html,
34921b43 43 join_nonempty,
48416bc4 44 js_to_json,
94278f72 45 mimetype2ext,
11f9be09 46 orderedSet,
6310acf5 47 parse_codecs,
49bd8c66 48 parse_count,
7c80519c 49 parse_duration,
7ea65411 50 parse_iso8601,
4dfbf869 51 parse_qs,
dca3ff4a 52 qualities,
3995d37d 53 remove_start,
cf7e015f 54 smuggle_url,
dbdaaa23 55 str_or_none,
c93d53f5 56 str_to_int,
f3aa3c3f 57 strftime_or_none,
7c365c21 58 traverse_obj,
a25a4243 59 try_call,
556dbe7f 60 try_get,
c5e8d7af
PH
61 unescapeHTML,
62 unified_strdate,
f0d785d3 63 unified_timestamp,
cf7e015f 64 unsmuggle_url,
8bdd16b4 65 update_url_query,
21c340b8 66 url_or_none,
fe93e2c4 67 urljoin,
7c365c21 68 variadic,
c5e8d7af
PH
69)
70
c795c39f 71STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
962ffcf8 72# any clients starting with _ cannot be explicitly requested by the user
000c15a4 73INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
77 'client': {
78 'clientName': 'WEB',
a0c830f4 79 'clientVersion': '2.20220801.00.00',
000c15a4 80 }
81 },
82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
83 },
84 'web_embedded': {
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
87 'client': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 89 'clientVersion': '1.20220731.00.00',
000c15a4 90 },
91 },
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
93 },
94 'web_music': {
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
98 'client': {
99 'clientName': 'WEB_REMIX',
a0c830f4 100 'clientVersion': '1.20220727.01.00',
000c15a4 101 }
102 },
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
104 },
e7e94f2a 105 'web_creator': {
18c7683d 106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
107 'INNERTUBE_CONTEXT': {
108 'client': {
109 'clientName': 'WEB_CREATOR',
a0c830f4 110 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
111 }
112 },
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
114 },
000c15a4 115 'android': {
18c7683d 116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 117 'INNERTUBE_CONTEXT': {
118 'client': {
119 'clientName': 'ANDROID',
7aad0654 120 'clientVersion': '19.09.37',
50ac0e54 121 'androidSdkVersion': 30,
7aad0654 122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
000c15a4 123 }
124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 126 'REQUIRE_JS_PLAYER': False
000c15a4 127 },
128 'android_embedded': {
18c7683d 129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
7aad0654 133 'clientVersion': '19.09.37',
50ac0e54 134 'androidSdkVersion': 30,
7aad0654 135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
000c15a4 136 },
137 },
b6de707d 138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False
000c15a4 140 },
141 'android_music': {
18c7683d 142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
7aad0654 146 'clientVersion': '6.42.52',
50ac0e54 147 'androidSdkVersion': 30,
7aad0654 148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'
000c15a4 149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a 154 'android_creator': {
18c7683d 155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
50ac0e54 159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
162 },
163 },
b6de707d 164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
165 'REQUIRE_JS_PLAYER': False
e7e94f2a 166 },
18c7683d 167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 169 'ios': {
18c7683d 170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS',
7aad0654 174 'clientVersion': '19.09.3',
18c7683d 175 'deviceModel': 'iPhone14,3',
7aad0654 176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 177 }
178 },
b6de707d 179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
180 'REQUIRE_JS_PLAYER': False
000c15a4 181 },
182 'ios_embedded': {
000c15a4 183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
7aad0654 186 'clientVersion': '19.09.3',
18c7683d 187 'deviceModel': 'iPhone14,3',
7aad0654 188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 189 },
190 },
b6de707d 191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
192 'REQUIRE_JS_PLAYER': False
000c15a4 193 },
194 'ios_music': {
18c7683d 195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'IOS_MUSIC',
7aad0654 199 'clientVersion': '6.33.3',
224b5a35 200 'deviceModel': 'iPhone14,3',
7aad0654 201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 202 },
203 },
b6de707d 204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
205 'REQUIRE_JS_PLAYER': False
000c15a4 206 },
e7e94f2a
D
207 'ios_creator': {
208 'INNERTUBE_CONTEXT': {
209 'client': {
210 'clientName': 'IOS_CREATOR',
224b5a35
SF
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
214 },
215 },
b6de707d 216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
217 'REQUIRE_JS_PLAYER': False
e7e94f2a 218 },
3619f78d 219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 221 'mweb': {
18c7683d 222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 223 'INNERTUBE_CONTEXT': {
224 'client': {
225 'clientName': 'MWEB',
a0c830f4 226 'clientVersion': '2.20220801.00.00',
000c15a4 227 }
228 },
229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
230 },
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
233 'tv_embedded': {
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
236 'client': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
239 },
240 },
241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
242 },
000c15a4 243}
244
245
e7870111
D
246def _split_innertube_client(client_name):
247 variant, *base = client_name.rsplit('.', 1)
248 if base:
249 return variant, base[0], variant
250 base, *variant = client_name.split('_', 1)
251 return client_name, base, variant[0] if variant else None
252
253
c795c39f
L
254def short_client_name(client_name):
255 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
256 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
257
258
000c15a4 259def build_innertube_clients():
2e4cacd0 260 THIRD_PARTY = {
e7870111 261 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 262 }
1e75d97d 263 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
2e4cacd0 264 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 265
266 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 267 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 268 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 269 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 270 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 271
e7870111 272 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 273 ytcfg['priority'] = 10 * priority(base_client)
274
e48b3875 275 if not variant:
e7870111
D
276 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
277 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
278 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
279 embedscreen['priority'] -= 3
280 elif variant == 'embedded':
e48b3875 281 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 282 ytcfg['priority'] -= 2
e48b3875 283 else:
000c15a4 284 ytcfg['priority'] -= 3
285
286
287build_innertube_clients()
288
289
c26f9b99 290class BadgeType(enum.Enum):
291 AVAILABILITY_UNLISTED = enum.auto()
292 AVAILABILITY_PRIVATE = enum.auto()
293 AVAILABILITY_PUBLIC = enum.auto()
294 AVAILABILITY_PREMIUM = enum.auto()
295 AVAILABILITY_SUBSCRIPTION = enum.auto()
296 LIVE_NOW = enum.auto()
14a14335 297 VERIFIED = enum.auto()
c26f9b99 298
299
de7f3446 300class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 301 """Provide base functions for Youtube extractors"""
e00eb564 302
3462ffa8 303 _RESERVED_NAMES = (
08e29b9f 304 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 305 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 306 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 307 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 308
3619f78d 309 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
310
52efa4b3 311 # _NETRC_MACHINE = 'youtube'
3619f78d 312
b2e8bc1b
JMF
313 # If True it will raise an error if no login info is provided
314 _LOGIN_REQUIRED = False
315
d9190e44
RH
316 _INVIDIOUS_SITES = (
317 # invidious-redirect websites
318 r'(?:www\.)?redirect\.invidious\.io',
319 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 320 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
321 r'(?:www\.)?invidious\.pussthecat\.org',
322 r'(?:www\.)?invidious\.zee\.li',
323 r'(?:www\.)?invidious\.ethibox\.fr',
05799a48
RH
324 r'(?:www\.)?iv\.ggtyler\.dev',
325 r'(?:www\.)?inv\.vern\.i2p',
326 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
327 r'(?:www\.)?inv\.riverside\.rocks',
328 r'(?:www\.)?invidious\.silur\.me',
329 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
330 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
331 r'(?:www\.)?invidious\.slipfox\.xyz',
332 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
333 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
334 r'(?:www\.)?invidious\.tiekoetter\.com',
335 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
336 r'(?:www\.)?invidious\.nerdvpn\.de',
337 r'(?:www\.)?invidious\.weblibre\.org',
338 r'(?:www\.)?inv\.odyssey346\.dev',
339 r'(?:www\.)?invidious\.dhusch\.de',
340 r'(?:www\.)?iv\.melmac\.space',
341 r'(?:www\.)?watch\.thekitty\.zone',
342 r'(?:www\.)?invidious\.privacydev\.net',
343 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
344 r'(?:www\.)?invidious\.drivet\.xyz',
345 r'(?:www\.)?vid\.priv\.au',
346 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
347 r'(?:www\.)?inv\.vern\.cc',
348 r'(?:www\.)?invidious\.esmailelbob\.xyz',
349 r'(?:www\.)?invidious\.sethforprivacy\.com',
350 r'(?:www\.)?yt\.oelrichsgarcia\.de',
351 r'(?:www\.)?yt\.artemislena\.eu',
352 r'(?:www\.)?invidious\.flokinet\.to',
353 r'(?:www\.)?invidious\.baczek\.me',
354 r'(?:www\.)?y\.com\.sb',
355 r'(?:www\.)?invidious\.epicsite\.xyz',
356 r'(?:www\.)?invidious\.lidarshield\.cloud',
357 r'(?:www\.)?yt\.funami\.tech',
d9190e44 358 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
359 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
360 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
361 # youtube-dl invidious instances list
362 r'(?:(?:www|no)\.)?invidiou\.sh',
363 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
364 r'(?:www\.)?invidious\.kabi\.tk',
365 r'(?:www\.)?invidious\.mastodon\.host',
366 r'(?:www\.)?invidious\.zapashcanon\.fr',
367 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
368 r'(?:www\.)?invidious\.tinfoil-hat\.net',
369 r'(?:www\.)?invidious\.himiko\.cloud',
370 r'(?:www\.)?invidious\.reallyancient\.tech',
371 r'(?:www\.)?invidious\.tube',
372 r'(?:www\.)?invidiou\.site',
373 r'(?:www\.)?invidious\.site',
374 r'(?:www\.)?invidious\.xyz',
375 r'(?:www\.)?invidious\.nixnet\.xyz',
376 r'(?:www\.)?invidious\.048596\.xyz',
377 r'(?:www\.)?invidious\.drycat\.fr',
378 r'(?:www\.)?inv\.skyn3t\.in',
379 r'(?:www\.)?tube\.poal\.co',
380 r'(?:www\.)?tube\.connect\.cafe',
381 r'(?:www\.)?vid\.wxzm\.sx',
382 r'(?:www\.)?vid\.mint\.lgbt',
383 r'(?:www\.)?vid\.puffyan\.us',
384 r'(?:www\.)?yewtu\.be',
385 r'(?:www\.)?yt\.elukerio\.org',
386 r'(?:www\.)?yt\.lelux\.fi',
387 r'(?:www\.)?invidious\.ggc-project\.de',
388 r'(?:www\.)?yt\.maisputain\.ovh',
389 r'(?:www\.)?ytprivate\.com',
390 r'(?:www\.)?invidious\.13ad\.de',
391 r'(?:www\.)?invidious\.toot\.koeln',
392 r'(?:www\.)?invidious\.fdn\.fr',
393 r'(?:www\.)?watch\.nettohikari\.com',
394 r'(?:www\.)?invidious\.namazso\.eu',
395 r'(?:www\.)?invidious\.silkky\.cloud',
396 r'(?:www\.)?invidious\.exonip\.de',
397 r'(?:www\.)?invidious\.riverside\.rocks',
398 r'(?:www\.)?invidious\.blamefran\.net',
399 r'(?:www\.)?invidious\.moomoo\.de',
400 r'(?:www\.)?ytb\.trom\.tf',
401 r'(?:www\.)?yt\.cyberhost\.uk',
402 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
403 r'(?:www\.)?qklhadlycap4cnod\.onion',
404 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
405 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
406 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
407 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
408 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
409 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
410 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
411 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
412 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
413 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
414 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
415 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 416 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 417 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 418 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
419 r'(?:www\.)?watch\.whatever\.social',
420 r'(?:www\.)?piped\.garudalinux\.org',
421 r'(?:www\.)?piped\.rivo\.lol',
422 r'(?:www\.)?piped-libre\.kavin\.rocks',
423 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 424 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
425 r'(?:www\.)?il\.ax',
426 r'(?:www\.)?piped\.esmailelbob\.xyz',
427 r'(?:www\.)?piped\.projectsegfau\.lt',
428 r'(?:www\.)?piped\.privacydev\.net',
429 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
430 r'(?:www\.)?piped\.smnz\.de',
431 r'(?:www\.)?piped\.adminforge\.de',
432 r'(?:www\.)?watch\.whatevertinfoil\.de',
433 r'(?:www\.)?piped\.qdi\.fi',
6a9c7a2b 434 r'(?:(?:www|cf)\.)?piped\.video',
bc87dac7 435 r'(?:www\.)?piped\.aeong\.one',
05799a48
RH
436 r'(?:www\.)?piped\.moomoo\.me',
437 r'(?:www\.)?piped\.chauvet\.pro',
438 r'(?:www\.)?watch\.leptons\.xyz',
439 r'(?:www\.)?pd\.vern\.cc',
440 r'(?:www\.)?piped\.hostux\.net',
441 r'(?:www\.)?piped\.lunar\.icu',
78a78fa7
BG
442 # Hyperpipe instances from https://hyperpipe.codeberg.page/
443 r'(?:www\.)?hyperpipe\.surge\.sh',
444 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
445 r'(?:www\.)?listen\.whatever\.social',
446 r'(?:www\.)?music\.adminforge\.de',
d9190e44
RH
447 )
448
c26f9b99 449 # extracted from account/account_menu ep
450 # XXX: These are the supported YouTube UI and API languages,
451 # which is slightly different from languages supported for translation in YouTube studio
452 _SUPPORTED_LANG_CODES = [
453 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
454 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
455 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
456 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
457 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
458 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
459 ]
460
a057779d 461 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
462
7666b936 463 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
464 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
465
466 def ucid_or_none(self, ucid):
467 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
468
469 def handle_or_none(self, handle):
470 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
471
472 def handle_from_url(self, url):
473 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
474 url, 'channel handle', default=None)
475
476 def ucid_from_url(self, url):
477 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
478 url, 'channel id', default=None)
479
c26f9b99 480 @functools.cached_property
481 def _preferred_lang(self):
482 """
483 Returns a language code supported by YouTube for the user preferred language.
484 Returns None if no preferred language set.
485 """
486 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
487 if not preferred_lang:
488 return
489 if preferred_lang not in self._SUPPORTED_LANG_CODES:
490 raise ExtractorError(
491 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
492 expected=True)
493 elif preferred_lang != 'en':
494 self.report_warning(
495 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
496 return preferred_lang
497
cce889b9 498 def _initialize_consent(self):
499 cookies = self._get_cookies('https://www.youtube.com/')
500 if cookies.get('__Secure-3PSID'):
501 return
378ae9f9 502 socs = cookies.get('SOCS')
503 if socs and not socs.value.startswith('CAA'): # not consented
504 return
505 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
8d81f3e3 506
f3aa3c3f 507 def _initialize_pref(self):
508 cookies = self._get_cookies('https://www.youtube.com/')
509 pref_cookie = cookies.get('PREF')
510 pref = {}
511 if pref_cookie:
512 try:
14f25df2 513 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 514 except ValueError:
515 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 516 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 517 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 518
b2e8bc1b 519 def _real_initialize(self):
f3aa3c3f 520 self._initialize_pref()
cce889b9 521 self._initialize_consent()
a25bca9f 522 self._check_login_required()
523
524 def _check_login_required(self):
24146491 525 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 526 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 527
b7c47b74 528 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
529 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 530
000c15a4 531 def _get_default_ytcfg(self, client='web'):
532 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 533
000c15a4 534 def _get_innertube_host(self, client='web'):
535 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 536
000c15a4 537 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 538 # try_get but with fallback to default ytcfg client values when present
539 _func = lambda y: try_get(y, getter, expected_type)
540 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
541
000c15a4 542 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 543 return self._ytcfg_get_safe(
544 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 545 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 546
000c15a4 547 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 548 return self._ytcfg_get_safe(
549 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 550 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 551
2ae778b8 552 def _select_api_hostname(self, req_api_hostname, default_client=None):
553 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
554 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
555
000c15a4 556 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 557 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 558
000c15a4 559 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 560 context = get_first(
561 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 562 # Enforce language and tz for extraction
563 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 564 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 565 return context
566
cf87314d 567 _SAPISID = None
568
109dd3b2 569 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 570 time_now = round(time.time())
cf87314d 571 if self._SAPISID is None:
572 yt_cookies = self._get_cookies('https://www.youtube.com')
573 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
574 # See: https://github.com/yt-dlp/yt-dlp/issues/393
575 sapisid_cookie = dict_get(
576 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
577 if sapisid_cookie and sapisid_cookie.value:
578 self._SAPISID = sapisid_cookie.value
579 self.write_debug('Extracted SAPISID cookie')
580 # SAPISID cookie is required if not already present
581 if not yt_cookies.get('SAPISID'):
582 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
583 self._set_cookie(
584 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
585 else:
586 self._SAPISID = False
587 if not self._SAPISID:
588 return None
1974e99f 589 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
590 sapisidhash = hashlib.sha1(
86e5f3ed 591 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 592 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
593
594 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 595 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 596 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 597
109dd3b2 598 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 599 data.update(query)
11f9be09 600 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 601 real_headers.update({'content-type': 'application/json'})
602 if headers:
603 real_headers.update(headers)
2ae778b8 604 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
605 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 606 return self._download_json(
2ae778b8 607 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 608 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 609 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 610 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 611
65141660 612 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
613 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 614
99e9e001 615 @staticmethod
616 def _extract_session_index(*data):
617 """
618 Index of current account in account list.
619 See: https://github.com/yt-dlp/yt-dlp/pull/519
620 """
621 for ytcfg in data:
622 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
623 if session_index is not None:
624 return session_index
625
626 # Deprecated?
627 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 628 if ytcfg:
14f25df2 629 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
630 if token:
631 return token
99e9e001 632 if webpage:
633 return self._search_regex(
634 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
635 'identity token', default=None, fatal=False)
a1c5d2ca
M
636
637 @staticmethod
fe93e2c4 638 def _extract_account_syncid(*args):
8ea3f7b9 639 """
640 Extract syncId required to download private playlists of secondary channels
fe93e2c4 641 @params response and/or ytcfg
8ea3f7b9 642 """
fe93e2c4 643 for data in args:
644 # ytcfg includes channel_syncid if on secondary channel
14f25df2 645 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 646 if delegated_sid:
647 return delegated_sid
648 sync_ids = (try_get(
649 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 650 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 651 if len(sync_ids) >= 2 and sync_ids[1]:
652 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
653 # and just "user_syncid||" for primary channel. We only want the channel_syncid
654 return sync_ids[0]
a1c5d2ca 655
ac56cf38 656 @staticmethod
657 def _extract_visitor_data(*args):
658 """
659 Extracts visitorData from an API response or ytcfg
660 Appears to be used to track session state
661 """
9222c381 662 return get_first(
6c73052c 663 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 664 expected_type=str)
ac56cf38 665
2762dbb1 666 @functools.cached_property
99e9e001 667 def is_authenticated(self):
668 return bool(self._generate_sapisidhash_header())
669
11f9be09 670 def extract_ytcfg(self, video_id, webpage):
8c54a305 671 if not webpage:
672 return {}
29f7c58a 673 return self._parse_json(
674 self._search_regex(
675 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 676 default='{}'), video_id, fatal=False) or {}
677
11f9be09 678 def generate_api_headers(
99e9e001 679 self, *, ytcfg=None, account_syncid=None, session_index=None,
680 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
681
2ae778b8 682 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 683 headers = {
14f25df2 684 'X-YouTube-Client-Name': str(
11f9be09 685 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
686 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 687 'Origin': origin,
688 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
689 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 690 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
691 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 692 }
693 if session_index is None:
314ee305 694 session_index = self._extract_session_index(ytcfg)
695 if account_syncid or session_index is not None:
696 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 697
109dd3b2 698 auth = self._generate_sapisidhash_header(origin)
f4f751af 699 if auth is not None:
700 headers['Authorization'] = auth
109dd3b2 701 headers['X-Origin'] = origin
7a32c70d 702 return filter_dict(headers)
29f7c58a 703
a25bca9f 704 def _download_ytcfg(self, client, video_id):
705 url = {
706 'web': 'https://www.youtube.com',
707 'web_music': 'https://music.youtube.com',
708 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
709 }.get(client)
710 if not url:
711 return {}
712 webpage = self._download_webpage(
713 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
714 return self.extract_ytcfg(video_id, webpage) or {}
715
2d6659b9 716 @staticmethod
717 def _build_api_continuation_query(continuation, ctp=None):
718 query = {
719 'continuation': continuation
720 }
721 # TODO: Inconsistency with clickTrackingParams.
722 # Currently we have a fixed ctp contained within context (from ytcfg)
723 # and a ctp in root query for continuation.
724 if ctp:
725 query['clickTracking'] = {'clickTrackingParams': ctp}
726 return query
727
2d6659b9 728 @classmethod
729 def _extract_next_continuation_data(cls, renderer):
730 next_continuation = try_get(
731 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
732 lambda x: x['continuation']['reloadContinuationData']), dict)
733 if not next_continuation:
734 return
735 continuation = next_continuation.get('continuation')
736 if not continuation:
737 return
738 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 739 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 740
741 @classmethod
742 def _extract_continuation_ep_data(cls, continuation_ep: dict):
743 if isinstance(continuation_ep, dict):
744 continuation = try_get(
14f25df2 745 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 746 if not continuation:
747 return
748 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 749 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 750
751 @classmethod
752 def _extract_continuation(cls, renderer):
753 next_continuation = cls._extract_next_continuation_data(renderer)
754 if next_continuation:
755 return next_continuation
fe93e2c4 756
7a32c70d 757 return traverse_obj(renderer, (
758 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
759 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
760 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 761
fe93e2c4 762 @classmethod
763 def _extract_alerts(cls, data):
109dd3b2 764 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
765 if not isinstance(alert_dict, dict):
766 continue
767 for alert in alert_dict.values():
768 alert_type = alert.get('type')
769 if not alert_type:
770 continue
052e1350 771 message = cls._get_text(alert, 'text')
109dd3b2 772 if message:
773 yield alert_type, message
774
c0ac49bc 775 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 776 errors, warnings = [], []
109dd3b2 777 for alert_type, alert_message in alerts:
641ad5d8 778 if alert_type.lower() == 'error' and fatal:
109dd3b2 779 errors.append([alert_type, alert_message])
a057779d 780 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 781 warnings.append([alert_type, alert_message])
782
783 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 784 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 785 if errors:
786 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
787
788 def _extract_and_report_alerts(self, data, *args, **kwargs):
789 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
790
14a14335 791 def _extract_badges(self, badge_list: list):
792 """
793 Extract known BadgeType's from a list of badge renderers.
794 @returns [{'type': BadgeType}]
795 """
796 icon_type_map = {
c26f9b99 797 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
798 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
14a14335 799 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
800 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
801 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
8213ce28 802 'CHECK': BadgeType.VERIFIED,
c26f9b99 803 }
804
805 badge_style_map = {
806 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
807 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
14a14335 808 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
8213ce28 809 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
ad54c913 810 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
c26f9b99 811 }
812
813 label_map = {
814 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
815 'private': BadgeType.AVAILABILITY_PRIVATE,
816 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
817 'live': BadgeType.LIVE_NOW,
14a14335 818 'premium': BadgeType.AVAILABILITY_PREMIUM,
8213ce28 819 'verified': BadgeType.VERIFIED,
ad54c913 820 'official artist channel': BadgeType.VERIFIED,
c26f9b99 821 }
822
823 badges = []
14a14335 824 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
c26f9b99 825 badge_type = (
14a14335 826 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
c26f9b99 827 or badge_style_map.get(traverse_obj(badge, 'style'))
828 )
829 if badge_type:
830 badges.append({'type': badge_type})
831 continue
832
833 # fallback, won't work in some languages
14a14335 834 label = traverse_obj(
835 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
c26f9b99 836 for match, label_badge_type in label_map.items():
837 if match in label.lower():
14a14335 838 badges.append({'type': label_badge_type})
839 break
c26f9b99 840
47193e02 841 return badges
842
c26f9b99 843 @staticmethod
844 def _has_badge(badges, badge_type):
845 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
846
47193e02 847 @staticmethod
052e1350 848 def _get_text(data, *path_list, max_runs=None):
849 for path in path_list or [None]:
850 if path is None:
851 obj = [data]
852 else:
853 obj = traverse_obj(data, path, default=[])
854 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
855 obj = [obj]
856 for item in obj:
14f25df2 857 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 858 if text:
859 return text
860 runs = try_get(item, lambda x: x['runs'], list) or []
861 if not runs and isinstance(item, list):
862 runs = item
863
864 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 865 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 866 if text:
867 return text
47193e02 868
f0d785d3 869 def _get_count(self, data, *path_list):
870 count_text = self._get_text(data, *path_list) or ''
871 count = parse_count(count_text)
872 if count is None:
873 count = str_to_int(
874 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
875 return count
876
a709d873 877 @staticmethod
878 def _extract_thumbnails(data, *path_list):
879 """
880 Extract thumbnails from thumbnails dict
881 @param path_list: path list to level that contains 'thumbnails' key
882 """
883 thumbnails = []
884 for path in path_list or [()]:
6839ae1f 885 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
a709d873 886 thumbnail_url = url_or_none(thumbnail.get('url'))
887 if not thumbnail_url:
888 continue
889 # Sometimes youtube gives a wrong thumbnail URL. See:
890 # https://github.com/yt-dlp/yt-dlp/issues/233
891 # https://github.com/ytdl-org/youtube-dl/issues/28023
892 if 'maxresdefault' in thumbnail_url:
893 thumbnail_url = thumbnail_url.split('?')[0]
894 thumbnails.append({
895 'url': thumbnail_url,
896 'height': int_or_none(thumbnail.get('height')),
897 'width': int_or_none(thumbnail.get('width')),
898 })
899 return thumbnails
900
f3aa3c3f 901 @staticmethod
902 def extract_relative_time(relative_time_text):
903 """
904 Extracts a relative time from string and converts to dt object
2fb35f60 905 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
f3aa3c3f 906 """
2fb35f60 907
5ca095cb 908 # XXX: this could be moved to a general function in utils/_utils.py
2fb35f60 909 # The relative time text strings are roughly the same as what
910 # Javascript's Intl.RelativeTimeFormat function generates.
911 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
912 mobj = re.search(
913 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
914 relative_time_text)
f3aa3c3f 915 if mobj:
f0d785d3 916 start = mobj.group('start')
917 if start:
918 return datetime_from_str(start)
f3aa3c3f 919 try:
f0d785d3 920 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 921 except ValueError:
922 return None
923
c26f9b99 924 def _parse_time_text(self, text):
925 if not text:
926 return
f3aa3c3f 927 dt = self.extract_relative_time(text)
928 timestamp = None
929 if isinstance(dt, datetime.datetime):
930 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 931
932 if timestamp is None:
933 timestamp = (
934 unified_timestamp(text) or unified_timestamp(
935 self._search_regex(
17322130 936 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 937 text.lower(), 'time text', default=None)))
f0d785d3 938
c26f9b99 939 if text and timestamp is None and self._preferred_lang in (None, 'en'):
940 self.report_warning(
941 f'Cannot parse localized time text "{text}"', only_once=True)
942 return timestamp
f3aa3c3f 943
109dd3b2 944 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
945 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 946 default_client='web'):
eb5bdbfa 947 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
948 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
949 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
950 icd_rm = next(icd_retries)
951 main_retries = iter(self.RetryManager())
952 main_rm = next(main_retries)
feebf6d0
SS
953 # Manual retry loop for multiple RetryManagers
954 # The proper RetryManager MUST be advanced after an error
b634ba74 955 # and its result MUST be checked if the manager is non fatal
feebf6d0 956 while True:
109dd3b2 957 try:
958 response = self._call_api(
959 ep=ep, fatal=True, headers=headers,
be5c1ae8 960 video_id=item_id, query=query, note=note,
109dd3b2 961 context=self._extract_context(ytcfg, default_client),
962 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 963 api_hostname=api_hostname, default_client=default_client)
109dd3b2 964 except ExtractorError as e:
be5c1ae8 965 if not isinstance(e.cause, network_exceptions):
966 return self._error_or_warning(e, fatal=fatal)
3d2623a8 967 elif not isinstance(e.cause, HTTPError):
eb5bdbfa 968 main_rm.error = e
969 next(main_retries)
be5c1ae8 970 continue
109dd3b2 971
3d2623a8 972 first_bytes = e.cause.response.read(512)
be5c1ae8 973 if not is_html(first_bytes):
974 yt_error = try_get(
975 self._parse_json(
3d2623a8 976 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
be5c1ae8 977 lambda x: x['error']['message'], str)
978 if yt_error:
979 self._report_alerts([('ERROR', yt_error)], fatal=False)
980 # Downloading page may result in intermittent 5xx HTTP error
eb5bdbfa 981 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
be5c1ae8 982 # We also want to catch all other network exceptions since errors in later pages can be troublesome
983 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
3d2623a8 984 if e.cause.status not in (403, 429):
eb5bdbfa 985 main_rm.error = e
986 next(main_retries)
be5c1ae8 987 continue
988 return self._error_or_warning(e, fatal=fatal)
989
990 try:
991 self._extract_and_report_alerts(response, only_once=True)
992 except ExtractorError as e:
eb5bdbfa 993 # YouTube's servers may return errors we want to retry on in a 200 OK response
be5c1ae8 994 # See: https://github.com/yt-dlp/yt-dlp/issues/839
995 if 'unknown error' in e.msg.lower():
eb5bdbfa 996 main_rm.error = e
997 next(main_retries)
be5c1ae8 998 continue
999 return self._error_or_warning(e, fatal=fatal)
1000 # Youtube sometimes sends incomplete data
1001 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1002 if not traverse_obj(response, *variadic(check_get_keys)):
eb5bdbfa 1003 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1004 should_retry = next(icd_retries, None)
1005 if not should_retry:
1006 return None
be5c1ae8 1007 continue
1008
1009 return response
109dd3b2 1010
9297939e 1011 @staticmethod
1012 def is_music_url(url):
5b28cef7 1013 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
9297939e 1014
30a074c2 1015 def _extract_video(self, renderer):
1016 video_id = renderer.get('videoId')
4dc23a80
M
1017
1018 reel_header_renderer = traverse_obj(renderer, (
1019 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1020 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1021
1022 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 1023 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
1024
1025 duration = int_or_none(renderer.get('lengthSeconds'))
1026 if duration is None:
1027 duration = parse_duration(self._get_text(
1028 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 1029 if duration is None:
4dc23a80 1030 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 1031 duration = parse_duration(self._search_regex(
1032 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1033 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1034 video_id, default=None, group='duration'))
1035
f3aa3c3f 1036 channel_id = traverse_obj(
a44ca5a4 1037 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1038 expected_type=str, get_all=False)
4dc23a80
M
1039 if not channel_id:
1040 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1041
7666b936 1042 channel_id = self.ucid_or_none(channel_id)
1043
f3aa3c3f 1044 overlay_style = traverse_obj(
a44ca5a4 1045 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1046 get_all=False, expected_type=str)
14a14335 1047 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
8213ce28 1048 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
fd2ad7cb 1049 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 1050 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1051 expected_type=str)) or ''
fd2ad7cb 1052 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 1053 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 1054 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 1055
4dc23a80
M
1056 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1057 or self._get_text(reel_header_renderer, 'timestampText') or '')
1058 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1059
867c66ff
M
1060 live_status = (
1061 'is_upcoming' if scheduled_timestamp is not None
1062 else 'was_live' if 'streamed' in time_text.lower()
1063 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1064 else None)
1065
4dc23a80
M
1066 # videoInfo is a string like '50K views • 10 years ago'.
1067 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1068 view_count = (0 if 'no views' in view_count_text.lower()
1069 else self._get_count({'simpleText': view_count_text}))
1070 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1071
93e12ed7 1072 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1073 or self._get_text(reel_header_renderer, 'channelTitleText'))
1074
1075 channel_handle = traverse_obj(renderer, (
1076 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1077 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1078 expected_type=self.handle_from_url, get_all=False)
30a074c2 1079 return {
39ed931e 1080 '_type': 'url',
30a074c2 1081 'ie_key': YoutubeIE.ie_key(),
1082 'id': video_id,
fd2ad7cb 1083 'url': url,
30a074c2 1084 'title': title,
1085 'description': description,
1086 'duration': duration,
f3aa3c3f 1087 'channel_id': channel_id,
93e12ed7 1088 'channel': channel,
4dc23a80 1089 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
93e12ed7 1090 'uploader': channel,
1091 'uploader_id': channel_handle,
1092 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4dc23a80 1093 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 1094 'timestamp': (self._parse_time_text(time_text)
1095 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1096 else None),
f3aa3c3f 1097 'release_timestamp': scheduled_timestamp,
c26f9b99 1098 'availability':
1099 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1100 else self._availability(
1101 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1102 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1103 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 1104 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 1105 view_count_field: view_count,
14a14335 1106 'live_status': live_status,
8213ce28 1107 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
30a074c2 1108 }
1109
0c148415 1110
360e1ca5 1111class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1112 IE_DESC = 'YouTube'
cb7dfeea 1113 _VALID_URL = r"""(?x)^
c5e8d7af 1114 (
edb53e2d 1115 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1116 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1117 (?:www\.)?deturl\.com/www\.youtube\.com|
1118 (?:www\.)?pwnyoutube\.com|
1119 (?:www\.)?hooktube\.com|
1120 (?:www\.)?yourepeat\.com|
1121 tube\.majestyc\.net|
1122 %(invidious)s|
1123 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1124 (?:.*?\#/)? # handle anchor (#/) redirect urls
1125 (?: # the various things that can precede the ID:
dad2210c 1126 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1127 |(?: # or the v= param in all its forms
f7000f3a 1128 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1129 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1130 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1131 v=
1132 )
f4b05232 1133 ))
cbaed4bb
S
1134 |(?:
1135 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1136 vid\.plus| # or vid.plus/xxxx
1137 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1138 %(invidious)s
cbaed4bb 1139 )/
edb53e2d 1140 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1141 )
c5e8d7af 1142 )? # all until now is optional -> you can pass the naked ID
201c1459 1143 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1144 (?(1).+)? # if we found the ID, everything can follow
9297939e 1145 (?:\#|$)""" % {
d9190e44 1146 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1147 }
7c6eb424 1148 _EMBED_REGEX = [
1149 r'''(?x)
1150 (?:
0ca0f881 1151 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1152 data-video-url=|
1153 <embed[^>]+?src=|
1154 embedSWF\(?:\s*|
1155 <object[^>]+data=|
1156 new\s+SWFObject\(
1157 )
1158 (["\'])
1159 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1160 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1161 \1''',
1162 # https://wordpress.org/plugins/lazy-load-for-videos/
1163 r'''(?xs)
1164 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1165 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1166 ]
6368e2e6 1167 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1168
e40c758c 1169 _PLAYER_INFO_RE = (
cc2db878 1170 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1171 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1172 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1173 )
2c62dc26 1174 _formats = {
c2d3cb4c 1175 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1176 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1177 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1178 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1179 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1180 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1181 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1182 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1183 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1184 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1185 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1186 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1187 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1188 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1189 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1190 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1191 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1192 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1193
1194
1195 # 3D videos
c2d3cb4c 1196 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1197 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1198 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1199 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1200 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1201 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1202 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1203
96fb5605 1204 # Apple HTTP Live Streaming
11f12195 1205 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1206 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1207 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1208 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1209 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1210 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1211 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1212 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1213
1214 # DASH mp4 video
d23028a8
S
1215 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1216 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1217 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1218 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1219 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1220 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1221 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1222 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1223 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1224 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1225 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1226 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1227
f6f1fc92 1228 # Dash mp4 audio
d23028a8
S
1229 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1230 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1231 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1232 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1233 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1234 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1235 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1236
1237 # Dash webm
d23028a8
S
1238 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1239 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1240 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1241 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1242 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1243 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1244 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1245 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1246 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1247 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1248 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1249 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1250 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1251 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1252 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1253 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1254 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1255 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1256 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1257 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1258 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1260
1261 # Dash webm audio
d23028a8
S
1262 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1263 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1264
0857baad 1265 # Dash webm audio with opus inside
d23028a8
S
1266 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1267 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1268 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1269
ce6b9a2d
PH
1270 # RTMP (unnamed)
1271 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1272
1273 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1274 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1275 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1276 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1277 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1278 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1279 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1280 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1281 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1282 }
29f7c58a 1283 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1284
fd5c4aab
S
1285 _GEO_BYPASS = False
1286
78caa52a 1287 IE_NAME = 'youtube'
2eb88d95
PH
1288 _TESTS = [
1289 {
2d3d2997 1290 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1291 'info_dict': {
1292 'id': 'BaW_jenozKc',
1293 'ext': 'mp4',
3867038a 1294 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
ff9f925b 1295 'channel': 'Philipp Hagemeister',
dd4c4492
S
1296 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1297 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1298 'upload_date': '20121002',
ff9f925b 1299 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1300 'categories': ['Science & Technology'],
3867038a 1301 'tags': ['youtube-dl'],
556dbe7f 1302 'duration': 10,
dbdaaa23 1303 'view_count': int,
3e7c1224 1304 'like_count': int,
ff9f925b 1305 'availability': 'public',
1306 'playable_in_embed': True,
1307 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1308 'live_status': 'not_live',
1309 'age_limit': 0,
7c80519c 1310 'start_time': 1,
297a564b 1311 'end_time': 9,
12a1b225 1312 'comment_count': int,
7666b936 1313 'channel_follower_count': int,
1314 'uploader': 'Philipp Hagemeister',
1315 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1316 'uploader_id': '@PhilippHagemeister',
5caf30db 1317 'heatmap': 'count:100',
2eb88d95 1318 }
0e853ca4 1319 },
fccd3771 1320 {
4bc3a23e
PH
1321 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1322 'note': 'Embed-only video (#1746)',
1323 'info_dict': {
1324 'id': 'yZIXLfi8CZQ',
1325 'ext': 'mp4',
1326 'upload_date': '20120608',
1327 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1328 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
94bfcd23 1329 'age_limit': 18,
545cc85d 1330 },
1331 'skip': 'Private video',
fccd3771 1332 },
11b56058 1333 {
8bdd16b4 1334 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1335 'note': 'Use the first video ID in the URL',
1336 'info_dict': {
1337 'id': 'BaW_jenozKc',
1338 'ext': 'mp4',
3867038a 1339 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976ae3ea 1340 'channel': 'Philipp Hagemeister',
1341 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1342 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1343 'upload_date': '20121002',
976ae3ea 1344 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1345 'categories': ['Science & Technology'],
3867038a 1346 'tags': ['youtube-dl'],
556dbe7f 1347 'duration': 10,
dbdaaa23 1348 'view_count': int,
11b56058 1349 'like_count': int,
976ae3ea 1350 'availability': 'public',
1351 'playable_in_embed': True,
1352 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1353 'live_status': 'not_live',
1354 'age_limit': 0,
12a1b225 1355 'comment_count': int,
7666b936 1356 'channel_follower_count': int,
1357 'uploader': 'Philipp Hagemeister',
1358 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1359 'uploader_id': '@PhilippHagemeister',
14a14335 1360 'heatmap': 'count:100',
34a7de29
S
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
11b56058 1365 },
dd27fd17 1366 {
2d3d2997 1367 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1368 'note': '256k DASH audio (format 141) via DASH manifest',
1369 'info_dict': {
1370 'id': 'a9LDPn-MO4I',
1371 'ext': 'm4a',
1372 'upload_date': '20121002',
4bc3a23e 1373 'description': '',
4bc3a23e 1374 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1375 },
4bc3a23e
PH
1376 'params': {
1377 'youtube_include_dash_manifest': True,
1378 'format': '141',
4919603f 1379 },
de3c7fe0 1380 'skip': 'format 141 not served anymore',
dd27fd17 1381 },
8bdd16b4 1382 # DASH manifest with encrypted signature
1383 {
1384 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1385 'info_dict': {
1386 'id': 'IB3lcPjvWLA',
1387 'ext': 'm4a',
1388 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1389 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1390 'duration': 244,
8bdd16b4 1391 'upload_date': '20131011',
cc2db878 1392 'abr': 129.495,
976ae3ea 1393 'like_count': int,
1394 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1395 'playable_in_embed': True,
1396 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1397 'view_count': int,
1398 'track': 'The Spark',
1399 'live_status': 'not_live',
1400 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1401 'channel': 'Afrojack',
976ae3ea 1402 'tags': 'count:19',
1403 'availability': 'public',
1404 'categories': ['Music'],
1405 'age_limit': 0,
1406 'alt_title': 'The Spark',
7666b936 1407 'channel_follower_count': int,
1408 'uploader': 'Afrojack',
1409 'uploader_url': 'https://www.youtube.com/@Afrojack',
1410 'uploader_id': '@Afrojack',
8bdd16b4 1411 },
1412 'params': {
1413 'youtube_include_dash_manifest': True,
1414 'format': '141/bestaudio[ext=m4a]',
1415 },
1416 },
65c2fde2 1417 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1418 {
65c2fde2 1419 'note': 'Embed allowed age-gate video',
2d3d2997 1420 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1421 'info_dict': {
1422 'id': 'HtVdAasjOgU',
1423 'ext': 'mp4',
1424 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1425 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1426 'duration': 142,
c522adb1 1427 'upload_date': '20140605',
34952f09 1428 'age_limit': 18,
976ae3ea 1429 'categories': ['Gaming'],
1430 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1431 'availability': 'needs_auth',
1432 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1433 'like_count': int,
1434 'channel': 'The Witcher',
1435 'live_status': 'not_live',
1436 'tags': 'count:17',
1437 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1438 'playable_in_embed': True,
1439 'view_count': int,
7666b936 1440 'channel_follower_count': int,
1441 'uploader': 'The Witcher',
1442 'uploader_url': 'https://www.youtube.com/@thewitcher',
1443 'uploader_id': '@thewitcher',
14a14335 1444 'comment_count': int,
8213ce28 1445 'channel_is_verified': True,
14a14335 1446 'heatmap': 'count:100',
c522adb1
JMF
1447 },
1448 },
65c2fde2 1449 {
1450 'note': 'Age-gate video with embed allowed in public site',
1451 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1452 'info_dict': {
1453 'id': 'HsUATh_Nc2U',
1454 'ext': 'mp4',
1455 'title': 'Godzilla 2 (Official Video)',
1456 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1457 'upload_date': '20200408',
65c2fde2 1458 'age_limit': 18,
976ae3ea 1459 'availability': 'needs_auth',
1460 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
976ae3ea 1461 'channel': 'FlyingKitty',
1462 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1463 'view_count': int,
1464 'categories': ['Entertainment'],
1465 'live_status': 'not_live',
1466 'tags': ['Flyingkitty', 'godzilla 2'],
1467 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1468 'like_count': int,
1469 'duration': 177,
1470 'playable_in_embed': True,
7666b936 1471 'channel_follower_count': int,
1472 'uploader': 'FlyingKitty',
1473 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1474 'uploader_id': '@FlyingKitty900',
5caf30db 1475 'comment_count': int,
8213ce28 1476 'channel_is_verified': True,
65c2fde2 1477 },
1478 },
1479 {
1480 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1481 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1482 'info_dict': {
1483 'id': 'Tq92D6wQ1mg',
1484 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1485 'ext': 'mp4',
17322130 1486 'upload_date': '20191228',
65c2fde2 1487 'description': 'md5:17eccca93a786d51bc67646756894066',
1488 'age_limit': 18,
976ae3ea 1489 'like_count': int,
1490 'availability': 'needs_auth',
976ae3ea 1491 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1492 'view_count': int,
1493 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1494 'channel': 'Projekt Melody',
1495 'live_status': 'not_live',
1496 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1497 'playable_in_embed': True,
1498 'categories': ['Entertainment'],
1499 'duration': 106,
1500 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1501 'comment_count': int,
7666b936 1502 'channel_follower_count': int,
1503 'uploader': 'Projekt Melody',
1504 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1505 'uploader_id': '@ProjektMelody',
65c2fde2 1506 },
1507 },
1508 {
1509 'note': 'Non-Agegated non-embeddable video',
1510 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1511 'info_dict': {
1512 'id': 'MeJVWBSsPAY',
1513 'ext': 'mp4',
1514 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
65c2fde2 1515 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1516 'upload_date': '20130730',
976ae3ea 1517 'track': 'Such mich find mich',
1518 'age_limit': 0,
1519 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1520 'like_count': int,
1521 'playable_in_embed': False,
1522 'creator': 'OOMPH!',
1523 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1524 'view_count': int,
1525 'alt_title': 'Such mich find mich',
1526 'duration': 210,
1527 'channel': 'Herr Lurik',
1528 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1529 'categories': ['Music'],
1530 'availability': 'public',
976ae3ea 1531 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1532 'live_status': 'not_live',
1533 'artist': 'OOMPH!',
7666b936 1534 'channel_follower_count': int,
1535 'uploader': 'Herr Lurik',
1536 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1537 'uploader_id': '@HerrLurik',
65c2fde2 1538 },
1539 },
1540 {
1541 'note': 'Non-bypassable age-gated video',
1542 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1543 'only_matching': True,
1544 },
8bdd16b4 1545 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1546 # YouTube Red ad is not captured for creator
1547 {
1548 'url': '__2ABJjxzNo',
1549 'info_dict': {
1550 'id': '__2ABJjxzNo',
1551 'ext': 'mp4',
1552 'duration': 266,
1553 'upload_date': '20100430',
545cc85d 1554 'creator': 'deadmau5',
1555 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1556 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1557 'alt_title': 'Some Chords',
976ae3ea 1558 'availability': 'public',
1559 'tags': 'count:14',
1560 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1561 'view_count': int,
1562 'live_status': 'not_live',
1563 'channel': 'deadmau5',
1564 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1565 'like_count': int,
1566 'track': 'Some Chords',
1567 'artist': 'deadmau5',
1568 'playable_in_embed': True,
1569 'age_limit': 0,
1570 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1571 'categories': ['Music'],
1572 'album': 'Some Chords',
7666b936 1573 'channel_follower_count': int,
1574 'uploader': 'deadmau5',
1575 'uploader_url': 'https://www.youtube.com/@deadmau5',
1576 'uploader_id': '@deadmau5',
8bdd16b4 1577 },
1578 'expected_warnings': [
1579 'DASH manifest missing',
1580 ]
1581 },
067aa17e 1582 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1583 {
1584 'url': 'lqQg6PlCWgI',
1585 'info_dict': {
1586 'id': 'lqQg6PlCWgI',
1587 'ext': 'mp4',
556dbe7f 1588 'duration': 6085,
90227264 1589 'upload_date': '20150827',
12a1b225 1590 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
cbe2bd91 1591 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1592 'like_count': int,
1593 'release_timestamp': 1343767800,
1594 'playable_in_embed': True,
1595 'categories': ['Sports'],
1596 'release_date': '20120731',
1597 'channel': 'Olympics',
1598 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1599 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1600 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1601 'age_limit': 0,
1602 'availability': 'public',
1603 'live_status': 'was_live',
1604 'view_count': int,
1605 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
7666b936 1606 'channel_follower_count': int,
1607 'uploader': 'Olympics',
1608 'uploader_url': 'https://www.youtube.com/@Olympics',
1609 'uploader_id': '@Olympics',
8213ce28 1610 'channel_is_verified': True,
cbe2bd91
PH
1611 },
1612 'params': {
1613 'skip_download': 'requires avconv',
e52a40ab 1614 }
cbe2bd91 1615 },
6271f1ca
PH
1616 # Non-square pixels
1617 {
1618 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1619 'info_dict': {
1620 'id': '_b-2C3KPAM0',
1621 'ext': 'mp4',
1622 'stretched_ratio': 16 / 9.,
556dbe7f 1623 'duration': 85,
6271f1ca 1624 'upload_date': '20110310',
6271f1ca 1625 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
6271f1ca 1626 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1627 'playable_in_embed': True,
1628 'channel': '孫ᄋᄅ',
1629 'age_limit': 0,
1630 'tags': 'count:11',
1631 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1632 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1633 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1634 'view_count': int,
1635 'categories': ['People & Blogs'],
1636 'like_count': int,
1637 'live_status': 'not_live',
1638 'availability': 'unlisted',
12a1b225 1639 'comment_count': int,
7666b936 1640 'channel_follower_count': int,
1641 'uploader': '孫ᄋᄅ',
1642 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1643 'uploader_id': '@AllenMeow',
6271f1ca 1644 },
06b491eb
S
1645 },
1646 # url_encoded_fmt_stream_map is empty string
1647 {
1648 'url': 'qEJwOuvDf7I',
1649 'info_dict': {
1650 'id': 'qEJwOuvDf7I',
f57b7835 1651 'ext': 'webm',
06b491eb
S
1652 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1653 'description': '',
1654 'upload_date': '20150404',
06b491eb
S
1655 },
1656 'params': {
1657 'skip_download': 'requires avconv',
e323cf3f
S
1658 },
1659 'skip': 'This live event has ended.',
06b491eb 1660 },
067aa17e 1661 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1662 {
1663 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1664 'info_dict': {
1665 'id': 'FIl7x6_3R5Y',
eb6793ba 1666 'ext': 'webm',
da77d856
S
1667 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1668 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1669 'duration': 220,
da77d856 1670 'upload_date': '20150625',
eb6793ba 1671 'formats': 'mincount:31',
da77d856 1672 },
eb6793ba 1673 'skip': 'not actual anymore',
2ee8f5d8 1674 },
8a1a26ce
YCH
1675 # DASH manifest with segment_list
1676 {
1677 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1678 'md5': '8ce563a1d667b599d21064e982ab9e31',
1679 'info_dict': {
1680 'id': 'CsmdDsKjzN8',
1681 'ext': 'mp4',
17ee98e1 1682 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce 1683 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
8a1a26ce
YCH
1684 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1685 },
1686 'params': {
1687 'youtube_include_dash_manifest': True,
1688 'format': '135', # bestvideo
be49068d
S
1689 },
1690 'skip': 'This live event has ended.',
2ee8f5d8 1691 },
cf7e015f 1692 {
6368e2e6 1693 # Multifeed videos (multiple cameras), URL can be of any Camera
7666b936 1694 # TODO: fix multifeed titles
6368e2e6 1695 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1696 'info_dict': {
6368e2e6 1697 'id': 'zaPI8MvL8pg',
1698 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1699 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1700 },
1701 'playlist': [{
1702 'info_dict': {
6368e2e6 1703 'id': 'j5yGuxZ8lLU',
cf7e015f 1704 'ext': 'mp4',
6368e2e6 1705 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
6368e2e6 1706 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1707 'duration': 10120,
1708 'channel_follower_count': int,
1709 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1710 'availability': 'public',
1711 'playable_in_embed': True,
1712 'upload_date': '20131105',
6368e2e6 1713 'categories': ['Gaming'],
1714 'live_status': 'was_live',
1715 'tags': 'count:24',
1716 'release_timestamp': 1383701910,
1717 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1718 'comment_count': int,
1719 'age_limit': 0,
1720 'like_count': int,
1721 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1722 'channel': 'WiiLikeToPlay',
1723 'view_count': int,
1724 'release_date': '20131106',
7666b936 1725 'uploader': 'WiiLikeToPlay',
1726 'uploader_id': '@WLTP',
1727 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1728 },
1729 }, {
1730 'info_dict': {
6368e2e6 1731 'id': 'zaPI8MvL8pg',
cf7e015f 1732 'ext': 'mp4',
6368e2e6 1733 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
6368e2e6 1734 'availability': 'public',
1735 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1736 'channel': 'WiiLikeToPlay',
6368e2e6 1737 'channel_follower_count': int,
1738 'description': 'md5:563ccbc698b39298481ca3c571169519',
1739 'duration': 10108,
1740 'age_limit': 0,
1741 'like_count': int,
1742 'tags': 'count:24',
1743 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
6368e2e6 1744 'release_timestamp': 1383701915,
1745 'comment_count': int,
1746 'upload_date': '20131105',
1747 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1748 'release_date': '20131106',
1749 'playable_in_embed': True,
1750 'live_status': 'was_live',
1751 'categories': ['Gaming'],
1752 'view_count': int,
7666b936 1753 'uploader': 'WiiLikeToPlay',
1754 'uploader_id': '@WLTP',
1755 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1756 },
1757 }, {
1758 'info_dict': {
6368e2e6 1759 'id': 'R7r3vfO7Hao',
cf7e015f 1760 'ext': 'mp4',
6368e2e6 1761 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1762 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1763 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1764 'like_count': int,
1765 'availability': 'public',
1766 'playable_in_embed': True,
1767 'upload_date': '20131105',
1768 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1769 'channel_follower_count': int,
1770 'tags': 'count:24',
1771 'release_date': '20131106',
6368e2e6 1772 'comment_count': int,
1773 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1774 'channel': 'WiiLikeToPlay',
1775 'categories': ['Gaming'],
1776 'release_timestamp': 1383701914,
1777 'live_status': 'was_live',
1778 'age_limit': 0,
1779 'duration': 10128,
1780 'view_count': int,
7666b936 1781 'uploader': 'WiiLikeToPlay',
1782 'uploader_id': '@WLTP',
1783 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1784 },
1785 }],
6368e2e6 1786 'params': {'skip_download': True},
cbaed4bb 1787 },
f9f49d87 1788 {
067aa17e 1789 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1790 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1791 'info_dict': {
1792 'id': 'gVfLd0zydlo',
1793 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1794 },
1795 'playlist_count': 2,
be49068d 1796 'skip': 'Not multifeed anymore',
f9f49d87 1797 },
cbaed4bb 1798 {
2d3d2997 1799 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1800 'only_matching': True,
0e49d9a6 1801 },
6d4fc66b 1802 {
2d3d2997 1803 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1804 'only_matching': True,
1805 },
0e49d9a6 1806 {
067aa17e 1807 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1808 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1809 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1810 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1811 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1812 'info_dict': {
1813 'id': 'lsguqyKfVQg',
1814 'ext': 'mp4',
1815 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1816 'alt_title': 'Dark Walk',
0e49d9a6 1817 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1818 'duration': 133,
0e49d9a6 1819 'upload_date': '20151119',
11f9be09 1820 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1821 'track': 'Dark Walk',
1822 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1823 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1824 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1825 'categories': ['Film & Animation'],
1826 'view_count': int,
1827 'live_status': 'not_live',
1828 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1829 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1830 'tags': 'count:13',
1831 'availability': 'public',
1832 'channel': 'IronSoulElf',
1833 'playable_in_embed': True,
1834 'like_count': int,
1835 'age_limit': 0,
6c73052c 1836 'channel_follower_count': int
0e49d9a6
LL
1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
61f92af1 1842 {
067aa17e 1843 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1844 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1845 'only_matching': True,
1846 },
313dfc45
LL
1847 {
1848 # Video with yt:stretch=17:0
1849 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1850 'info_dict': {
1851 'id': 'Q39EVAstoRM',
1852 'ext': 'mp4',
1853 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1854 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1855 'upload_date': '20151107',
313dfc45
LL
1856 },
1857 'params': {
1858 'skip_download': True,
1859 },
be49068d 1860 'skip': 'This video does not exist.',
313dfc45 1861 },
201c1459 1862 {
1863 # Video with incomplete 'yt:stretch=16:'
1864 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1865 'only_matching': True,
1866 },
7caf9830
S
1867 {
1868 # Video licensed under Creative Commons
1869 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1870 'info_dict': {
1871 'id': 'M4gD1WSo5mA',
1872 'ext': 'mp4',
1873 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1874 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1875 'duration': 721,
17322130 1876 'upload_date': '20150128',
7caf9830 1877 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1878 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1879 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1880 'like_count': int,
1881 'age_limit': 0,
1882 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1883 'channel': 'The Berkman Klein Center for Internet & Society',
1884 'availability': 'public',
1885 'view_count': int,
1886 'categories': ['Education'],
1887 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1888 'live_status': 'not_live',
1889 'playable_in_embed': True,
d5d1df8a 1890 'channel_follower_count': int,
1891 'chapters': list,
7666b936 1892 'uploader': 'The Berkman Klein Center for Internet & Society',
1893 'uploader_id': '@BKCHarvard',
1894 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
7caf9830
S
1895 },
1896 'params': {
1897 'skip_download': True,
1898 },
1899 },
fd050249 1900 {
fd050249
S
1901 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1902 'info_dict': {
1903 'id': 'eQcmzGIKrzg',
1904 'ext': 'mp4',
1905 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1906 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1907 'duration': 4060,
17322130 1908 'upload_date': '20151120',
fd050249 1909 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1910 'playable_in_embed': True,
1911 'tags': 'count:12',
1912 'like_count': int,
1913 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1914 'age_limit': 0,
1915 'availability': 'public',
1916 'categories': ['News & Politics'],
1917 'channel': 'Bernie Sanders',
1918 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1919 'view_count': int,
1920 'live_status': 'not_live',
1921 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1922 'comment_count': int,
d5d1df8a 1923 'channel_follower_count': int,
1924 'chapters': list,
7666b936 1925 'uploader': 'Bernie Sanders',
1926 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1927 'uploader_id': '@BernieSanders',
8213ce28 1928 'channel_is_verified': True,
14a14335 1929 'heatmap': 'count:100',
fd050249
S
1930 },
1931 'params': {
1932 'skip_download': True,
1933 },
1934 },
040ac686
S
1935 {
1936 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1937 'only_matching': True,
7f29cf54
S
1938 },
1939 {
067aa17e 1940 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1941 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1942 'only_matching': True,
6496ccb4
S
1943 },
1944 {
1945 # Rental video preview
1946 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1947 'info_dict': {
1948 'id': 'uGpuVWrhIzE',
1949 'ext': 'mp4',
1950 'title': 'Piku - Trailer',
1951 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1952 'upload_date': '20150811',
6496ccb4
S
1953 'license': 'Standard YouTube License',
1954 },
1955 'params': {
1956 'skip_download': True,
1957 },
eb6793ba 1958 'skip': 'This video is not available.',
022a5d66 1959 },
12afdc2a
S
1960 {
1961 # YouTube Red video with episode data
1962 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1963 'info_dict': {
1964 'id': 'iqKdEhx-dD4',
1965 'ext': 'mp4',
1966 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1967 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1968 'duration': 2085,
12afdc2a 1969 'upload_date': '20170118',
12afdc2a
S
1970 'series': 'Mind Field',
1971 'season_number': 1,
1972 'episode_number': 1,
976ae3ea 1973 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1974 'tags': 'count:12',
1975 'view_count': int,
1976 'availability': 'public',
1977 'age_limit': 0,
1978 'channel': 'Vsauce',
1979 'episode': 'Episode 1',
1980 'categories': ['Entertainment'],
1981 'season': 'Season 1',
1982 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1983 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1984 'like_count': int,
1985 'playable_in_embed': True,
1986 'live_status': 'not_live',
7666b936 1987 'channel_follower_count': int,
1988 'uploader': 'Vsauce',
1989 'uploader_url': 'https://www.youtube.com/@Vsauce',
1990 'uploader_id': '@Vsauce',
14a14335 1991 'comment_count': int,
8213ce28 1992 'channel_is_verified': True,
12afdc2a
S
1993 },
1994 'params': {
1995 'skip_download': True,
1996 },
1997 'expected_warnings': [
1998 'Skipping DASH manifest',
1999 ],
2000 },
c7121fa7
S
2001 {
2002 # The following content has been identified by the YouTube community
2003 # as inappropriate or offensive to some audiences.
2004 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2005 'info_dict': {
2006 'id': '6SJNVb0GnPI',
2007 'ext': 'mp4',
2008 'title': 'Race Differences in Intelligence',
2009 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2010 'duration': 965,
2011 'upload_date': '20140124',
c7121fa7
S
2012 },
2013 'params': {
2014 'skip_download': True,
2015 },
545cc85d 2016 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 2017 },
022a5d66
S
2018 {
2019 # itag 212
2020 'url': '1t24XAntNCY',
2021 'only_matching': True,
fd5c4aab
S
2022 },
2023 {
2024 # geo restricted to JP
2025 'url': 'sJL6WA-aGkQ',
2026 'only_matching': True,
2027 },
cd5a74a2
S
2028 {
2029 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2030 'only_matching': True,
2031 },
bc2ca1bb 2032 {
2033 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2034 'only_matching': True,
2035 },
2036 {
2037 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2038 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2039 'only_matching': True,
2040 },
825cd268
RA
2041 {
2042 # DRM protected
2043 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2044 'only_matching': True,
4fe54c12
S
2045 },
2046 {
2047 # Video with unsupported adaptive stream type formats
2048 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2049 'info_dict': {
2050 'id': 'Z4Vy8R84T1U',
2051 'ext': 'mp4',
2052 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2053 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2054 'duration': 433,
2055 'upload_date': '20130923',
4fe54c12
S
2056 'formats': 'maxcount:10',
2057 },
2058 'params': {
2059 'skip_download': True,
2060 'youtube_include_dash_manifest': False,
2061 },
5429d6a9 2062 'skip': 'not actual anymore',
5caabd3c 2063 },
2064 {
822b9d9c 2065 # Youtube Music Auto-generated description
7666b936 2066 # TODO: fix metadata extraction
5caabd3c 2067 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2068 'info_dict': {
2069 'id': 'MgNrAu2pzNs',
2070 'ext': 'mp4',
2071 'title': 'Voyeur Girl',
2072 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2073 'upload_date': '20190312',
104a7b5a
L
2074 'artists': ['Stephen'],
2075 'creators': ['Stephen'],
5caabd3c 2076 'track': 'Voyeur Girl',
2077 'album': 'it\'s too much love to know my dear',
2078 'release_date': '20190313',
976ae3ea 2079 'alt_title': 'Voyeur Girl',
2080 'view_count': int,
976ae3ea 2081 'playable_in_embed': True,
2082 'like_count': int,
2083 'categories': ['Music'],
2084 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
7666b936 2085 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2086 'uploader': 'Stephen',
976ae3ea 2087 'availability': 'public',
976ae3ea 2088 'duration': 169,
2089 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2090 'age_limit': 0,
2091 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2092 'tags': 'count:11',
2093 'live_status': 'not_live',
6c73052c 2094 'channel_follower_count': int
5caabd3c 2095 },
2096 'params': {
2097 'skip_download': True,
2098 },
2099 },
66b48727
RA
2100 {
2101 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2102 'only_matching': True,
2103 },
011e75e6
S
2104 {
2105 # invalid -> valid video id redirection
2106 'url': 'DJztXj2GPfl',
2107 'info_dict': {
2108 'id': 'DJztXj2GPfk',
2109 'ext': 'mp4',
2110 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2111 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2112 'upload_date': '20090125',
011e75e6
S
2113 'artist': 'Panjabi MC',
2114 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2115 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2116 },
2117 'params': {
2118 'skip_download': True,
2119 },
545cc85d 2120 'skip': 'Video unavailable',
ea74e00b
DP
2121 },
2122 {
2123 # empty description results in an empty string
2124 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2125 'info_dict': {
2126 'id': 'x41yOUIvK2k',
2127 'ext': 'mp4',
2128 'title': 'IMG 3456',
2129 'description': '',
2130 'upload_date': '20170613',
976ae3ea 2131 'view_count': int,
2132 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
976ae3ea 2133 'like_count': int,
2134 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2135 'tags': [],
2136 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2137 'availability': 'public',
2138 'age_limit': 0,
2139 'categories': ['Pets & Animals'],
2140 'duration': 7,
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
7666b936 2143 'channel': 'l\'Or Vert asbl',
2144 'channel_follower_count': int,
2145 'uploader': 'l\'Or Vert asbl',
2146 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2147 'uploader_id': '@ElevageOrVert',
ea74e00b
DP
2148 },
2149 'params': {
2150 'skip_download': True,
2151 },
2152 },
a0566bbf 2153 {
29f7c58a 2154 # with '};' inside yt initial data (see [1])
2155 # see [2] for an example with '};' inside ytInitialPlayerResponse
2156 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2157 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2158 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2159 'info_dict': {
2160 'id': 'CHqg6qOn4no',
2161 'ext': 'mp4',
2162 'title': 'Part 77 Sort a list of simple types in c#',
2163 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2164 'upload_date': '20130831',
976ae3ea 2165 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2166 'like_count': int,
976ae3ea 2167 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2168 'live_status': 'not_live',
2169 'categories': ['Education'],
2170 'availability': 'public',
2171 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2172 'tags': 'count:12',
2173 'playable_in_embed': True,
2174 'age_limit': 0,
2175 'view_count': int,
2176 'duration': 522,
2177 'channel': 'kudvenkat',
12a1b225 2178 'comment_count': int,
d5d1df8a 2179 'channel_follower_count': int,
2180 'chapters': list,
7666b936 2181 'uploader': 'kudvenkat',
2182 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2183 'uploader_id': '@Csharp-video-tutorialsBlogspot',
8213ce28 2184 'channel_is_verified': True,
14a14335 2185 'heatmap': 'count:100',
a0566bbf 2186 },
2187 'params': {
2188 'skip_download': True,
2189 },
2190 },
29f7c58a 2191 {
2192 # another example of '};' in ytInitialData
2193 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2194 'only_matching': True,
2195 },
2196 {
2197 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2198 'only_matching': True,
2199 },
545cc85d 2200 {
cc2db878 2201 # https://github.com/ytdl-org/youtube-dl/pull/28094
2202 'url': 'OtqTfy26tG0',
2203 'info_dict': {
2204 'id': 'OtqTfy26tG0',
2205 'ext': 'mp4',
2206 'title': 'Burn Out',
2207 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2208 'upload_date': '20141120',
cc2db878 2209 'artist': 'The Cinematic Orchestra',
2210 'track': 'Burn Out',
2211 'album': 'Every Day',
976ae3ea 2212 'like_count': int,
2213 'live_status': 'not_live',
2214 'alt_title': 'Burn Out',
2215 'duration': 614,
2216 'age_limit': 0,
2217 'view_count': int,
2218 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2219 'creator': 'The Cinematic Orchestra',
2220 'channel': 'The Cinematic Orchestra',
2221 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2222 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2223 'availability': 'public',
2224 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2225 'categories': ['Music'],
2226 'playable_in_embed': True,
7666b936 2227 'channel_follower_count': int,
2228 'uploader': 'The Cinematic Orchestra',
2229 'comment_count': int,
cc2db878 2230 },
2231 'params': {
2232 'skip_download': True,
2233 },
545cc85d 2234 },
bc2ca1bb 2235 {
2236 # controversial video, only works with bpctr when authenticated with cookies
2237 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2238 'only_matching': True,
2239 },
a1a7907b 2240 {
2241 # controversial video, requires bpctr/contentCheckOk
2242 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2243 'info_dict': {
2244 'id': 'SZJvDhaSDnc',
2245 'ext': 'mp4',
2246 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2247 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
a1a7907b 2248 'upload_date': '20140716',
976ae3ea 2249 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2250 'duration': 170,
2251 'categories': ['News & Politics'],
976ae3ea 2252 'view_count': int,
2253 'channel': 'CBS Mornings',
2254 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2255 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2256 'age_limit': 18,
2257 'availability': 'needs_auth',
2258 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2259 'like_count': int,
2260 'live_status': 'not_live',
2261 'playable_in_embed': True,
7666b936 2262 'channel_follower_count': int,
2263 'uploader': 'CBS Mornings',
2264 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2265 'uploader_id': '@CBSMornings',
14a14335 2266 'comment_count': int,
8213ce28 2267 'channel_is_verified': True,
a1a7907b 2268 }
2269 },
f7ad7160 2270 {
2271 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2272 'url': 'cBvYw8_A0vQ',
2273 'info_dict': {
2274 'id': 'cBvYw8_A0vQ',
2275 'ext': 'mp4',
2276 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2277 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2278 'upload_date': '20201120',
976ae3ea 2279 'duration': 1456,
2280 'categories': ['Travel & Events'],
2281 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2282 'view_count': int,
2283 'channel': 'Walk around Japan',
2284 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2285 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2286 'age_limit': 0,
2287 'availability': 'public',
2288 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2289 'live_status': 'not_live',
2290 'playable_in_embed': True,
7666b936 2291 'channel_follower_count': int,
2292 'uploader': 'Walk around Japan',
2293 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2294 'uploader_id': '@walkaroundjapan7124',
f7ad7160 2295 },
2296 'params': {
2297 'skip_download': True,
2298 },
0fb983f6 2299 }, {
2300 # Has multiple audio streams
2301 'url': 'WaOKSUlf4TM',
2302 'only_matching': True
9297939e 2303 }, {
2304 # Requires Premium: has format 141 when requested using YTM url
2305 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2306 'only_matching': True
2307 }, {
120916da 2308 # multiple subtitles with same lang_code
2309 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2310 'only_matching': True,
109dd3b2 2311 }, {
2312 # Force use android client fallback
2313 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2314 'info_dict': {
2315 'id': 'YOelRv7fMxY',
11f9be09 2316 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2317 'ext': '3gp',
2318 'upload_date': '20210624',
2319 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
109dd3b2 2320 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2321 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2322 'duration': 596,
2323 'categories': ['Entertainment'],
976ae3ea 2324 'view_count': int,
2325 'channel': 'colinfurze',
2326 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2327 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2328 'age_limit': 0,
2329 'availability': 'public',
2330 'like_count': int,
2331 'live_status': 'not_live',
2332 'playable_in_embed': True,
d5d1df8a 2333 'channel_follower_count': int,
2334 'chapters': list,
7666b936 2335 'uploader': 'colinfurze',
2336 'uploader_url': 'https://www.youtube.com/@colinfurze',
2337 'uploader_id': '@colinfurze',
14a14335 2338 'comment_count': int,
8213ce28 2339 'channel_is_verified': True,
14a14335 2340 'heatmap': 'count:100',
109dd3b2 2341 },
2342 'params': {
2343 'format': '17', # 3gp format available on android
2344 'extractor_args': {'youtube': {'player_client': ['android']}},
2345 },
120916da 2346 },
109dd3b2 2347 {
2348 # Skip download of additional client configs (remix client config in this case)
2349 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2350 'only_matching': True,
2351 'params': {
2352 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2353 },
8fc54b12 2354 }, {
2355 # shorts
2356 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2357 'only_matching': True,
9222c381 2358 }, {
2359 'note': 'Storyboards',
2360 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2361 'info_dict': {
2362 'id': '5KLPxDtMqe8',
2363 'ext': 'mhtml',
2364 'format_id': 'sb0',
2365 'title': 'Your Brain is Plastic',
9222c381 2366 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2367 'upload_date': '20140324',
976ae3ea 2368 'like_count': int,
2369 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2370 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2371 'view_count': int,
2372 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2373 'playable_in_embed': True,
2374 'tags': 'count:12',
976ae3ea 2375 'availability': 'public',
2376 'channel': 'SciShow',
2377 'live_status': 'not_live',
2378 'duration': 248,
2379 'categories': ['Education'],
2380 'age_limit': 0,
d5d1df8a 2381 'channel_follower_count': int,
2382 'chapters': list,
7666b936 2383 'uploader': 'SciShow',
2384 'uploader_url': 'https://www.youtube.com/@SciShow',
2385 'uploader_id': '@SciShow',
14a14335 2386 'comment_count': int,
8213ce28 2387 'channel_is_verified': True,
14a14335 2388 'heatmap': 'count:100',
9222c381 2389 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2390 }, {
2391 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2392 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2393 'info_dict': {
2394 'id': '2NUZ8W2llS4',
2395 'ext': 'mp4',
2396 'title': 'The NP that test your phone performance 🙂',
2397 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
992f9a73 2398 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2399 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2400 'duration': 21,
2401 'view_count': int,
2402 'age_limit': 0,
2403 'categories': ['Gaming'],
2404 'tags': 'count:23',
2405 'playable_in_embed': True,
2406 'live_status': 'not_live',
2407 'upload_date': '20220103',
2408 'like_count': int,
2409 'availability': 'public',
2410 'channel': 'Leon Nguyen',
2411 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2412 'comment_count': int,
7666b936 2413 'channel_follower_count': int,
2414 'uploader': 'Leon Nguyen',
2415 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2416 'uploader_id': '@LeonNguyen',
14a14335 2417 'heatmap': 'count:100',
992f9a73 2418 }
1ff88b7a 2419 }, {
2420 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2421 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2422 'info_dict': {
2423 'id': '2NUZ8W2llS4',
2424 'ext': 'mp4',
2425 'title': 'The NP that test your phone performance 🙂',
2426 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
1ff88b7a 2427 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2428 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2429 'duration': 21,
2430 'view_count': int,
2431 'age_limit': 0,
2432 'categories': ['Gaming'],
2433 'tags': 'count:23',
2434 'playable_in_embed': True,
2435 'live_status': 'not_live',
2436 'upload_date': '20220102',
2437 'like_count': int,
2438 'availability': 'public',
2439 'channel': 'Leon Nguyen',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2441 'comment_count': int,
7666b936 2442 'channel_follower_count': int,
2443 'uploader': 'Leon Nguyen',
2444 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2445 'uploader_id': '@LeonNguyen',
14a14335 2446 'heatmap': 'count:100',
1ff88b7a 2447 },
2448 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2449 }, {
2450 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2451 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2452 'info_dict': {
2453 'id': 'mzZzzBU6lrM',
2454 'ext': 'mp4',
2455 'title': 'I Met GeorgeNotFound In Real Life...',
7666b936 2456 'description': 'md5:978296ec9783a031738b684d4ebf302d',
992f9a73 2457 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2458 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2459 'duration': 955,
2460 'view_count': int,
2461 'age_limit': 0,
2462 'categories': ['Entertainment'],
2463 'tags': 'count:26',
2464 'playable_in_embed': True,
2465 'live_status': 'not_live',
2466 'release_timestamp': 1641172509,
2467 'release_date': '20220103',
2468 'upload_date': '20220103',
2469 'like_count': int,
2470 'availability': 'public',
2471 'channel': 'Quackity',
2472 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
7666b936 2473 'channel_follower_count': int,
2474 'uploader': 'Quackity',
2475 'uploader_id': '@Quackity',
2476 'uploader_url': 'https://www.youtube.com/@Quackity',
14a14335 2477 'comment_count': int,
8213ce28 2478 'channel_is_verified': True,
14a14335 2479 'heatmap': 'count:100',
992f9a73 2480 }
2481 },
2482 { # continuous livestream. Microformat upload date should be preferred.
2483 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2484 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2485 'info_dict': {
2486 'id': 'kgx4WGK0oNU',
2487 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2488 'ext': 'mp4',
2489 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2490 'availability': 'public',
2491 'age_limit': 0,
2492 'release_timestamp': 1637975704,
2493 'upload_date': '20210619',
2494 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2495 'live_status': 'is_live',
2496 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
992f9a73 2497 'channel': 'Abao in Tokyo',
2498 'channel_follower_count': int,
2499 'release_date': '20211127',
2500 'tags': 'count:39',
2501 'categories': ['People & Blogs'],
2502 'like_count': int,
992f9a73 2503 'view_count': int,
2504 'playable_in_embed': True,
2505 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2506 'concurrent_view_count': int,
7666b936 2507 'uploader': 'Abao in Tokyo',
2508 'uploader_url': 'https://www.youtube.com/@abaointokyo',
2509 'uploader_id': '@abaointokyo',
992f9a73 2510 },
2511 'params': {'skip_download': True}
ee27297f 2512 }, {
2513 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2514 'info_dict': {
2515 'id': 'tjjjtzRLHvA',
2516 'ext': 'mp4',
2517 'title': 'ハッシュタグ無し };if window.ytcsi',
2518 'upload_date': '20220323',
2519 'like_count': int,
2520 'availability': 'unlisted',
7666b936 2521 'channel': 'Lesmiscore',
2522 'thumbnail': r're:^https?://.*\.jpg',
ee27297f 2523 'age_limit': 0,
ee27297f 2524 'categories': ['Music'],
6e634cbe 2525 'view_count': int,
2526 'description': '',
ee27297f 2527 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2528 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2529 'live_status': 'not_live',
2530 'playable_in_embed': True,
2531 'channel_follower_count': int,
2532 'duration': 6,
2533 'tags': [],
7666b936 2534 'uploader_id': '@lesmiscore',
2535 'uploader': 'Lesmiscore',
2536 'uploader_url': 'https://www.youtube.com/@lesmiscore',
6e634cbe 2537 }
c26f9b99 2538 }, {
2539 # Prefer primary title+description language metadata by default
2540 # Do not prefer translated description if primary is empty
2541 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2542 'info_dict': {
2543 'id': 'el3E4MbxRqQ',
2544 'ext': 'mp4',
2545 'title': 'dlp test video 2 - primary sv no desc',
2546 'description': '',
2547 'channel': 'cole-dlp-test-acc',
2548 'tags': [],
2549 'view_count': int,
2550 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2551 'like_count': int,
2552 'playable_in_embed': True,
2553 'availability': 'unlisted',
7666b936 2554 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2555 'age_limit': 0,
2556 'duration': 5,
c26f9b99 2557 'live_status': 'not_live',
2558 'upload_date': '20220908',
2559 'categories': ['People & Blogs'],
c26f9b99 2560 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
7666b936 2561 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2562 'uploader_id': '@coletdjnz',
2563 'uploader': 'cole-dlp-test-acc',
c26f9b99 2564 },
2565 'params': {'skip_download': True}
2566 }, {
2567 # Extractor argument: prefer translated title+description
2568 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2569 'info_dict': {
2570 'id': 'gHKT4uU8Zng',
2571 'ext': 'mp4',
2572 'channel': 'cole-dlp-test-acc',
2573 'tags': [],
2574 'duration': 5,
2575 'live_status': 'not_live',
2576 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2577 'upload_date': '20220728',
c26f9b99 2578 'view_count': int,
2579 'categories': ['People & Blogs'],
7666b936 2580 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2581 'title': 'dlp test video title translated (fr)',
2582 'availability': 'public',
c26f9b99 2583 'age_limit': 0,
2584 'description': 'dlp test video description translated (fr)',
2585 'playable_in_embed': True,
2586 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
7666b936 2587 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2588 'uploader_id': '@coletdjnz',
2589 'uploader': 'cole-dlp-test-acc',
c26f9b99 2590 },
2591 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2592 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2593 }, {
2594 'note': '6 channel audio',
2595 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2596 'only_matching': True,
a4894d3e 2597 }, {
2598 'note': 'Multiple HLS formats with same itag',
2599 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2600 'info_dict': {
2601 'id': 'kX3nB4PpJko',
2602 'ext': 'mp4',
2603 'categories': ['Entertainment'],
2604 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
a4894d3e 2605 'live_status': 'not_live',
2606 'duration': 937,
2607 'channel_follower_count': int,
2608 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2609 'title': 'Last To Take Hand Off Jet, Keeps It!',
2610 'channel': 'MrBeast',
2611 'playable_in_embed': True,
2612 'view_count': int,
2613 'upload_date': '20221112',
a4894d3e 2614 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2615 'age_limit': 0,
2616 'availability': 'public',
2617 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2618 'like_count': int,
2619 'tags': [],
7666b936 2620 'uploader': 'MrBeast',
2621 'uploader_url': 'https://www.youtube.com/@MrBeast',
2622 'uploader_id': '@MrBeast',
14a14335 2623 'comment_count': int,
8213ce28 2624 'channel_is_verified': True,
14a14335 2625 'heatmap': 'count:100',
a4894d3e 2626 },
2627 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2628 }, {
2629 'note': 'Audio formats with Dynamic Range Compression',
2630 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2631 'info_dict': {
2632 'id': 'Tq92D6wQ1mg',
7666b936 2633 'ext': 'webm',
9bb85699 2634 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2635 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2636 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2637 'channel_follower_count': int,
2638 'description': 'md5:17eccca93a786d51bc67646756894066',
2639 'upload_date': '20191228',
9bb85699 2640 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2641 'playable_in_embed': True,
2642 'like_count': int,
2643 'categories': ['Entertainment'],
2644 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2645 'age_limit': 18,
2646 'channel': 'Projekt Melody',
9bb85699 2647 'view_count': int,
2648 'availability': 'needs_auth',
2649 'comment_count': int,
2650 'live_status': 'not_live',
9bb85699 2651 'duration': 106,
7666b936 2652 'uploader': 'Projekt Melody',
2653 'uploader_id': '@ProjektMelody',
2654 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
9bb85699 2655 },
2656 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2657 },
2658 {
2659 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2660 'info_dict': {
2661 'id': 'qVv6vCqciTM',
2662 'ext': 'mp4',
2663 'age_limit': 0,
dad2210c 2664 'comment_count': int,
2665 'chapters': 'count:13',
2666 'upload_date': '20221223',
2667 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2668 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
dad2210c 2669 'like_count': int,
2670 'release_date': '20221223',
2671 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2672 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2673 'view_count': int,
2674 'playable_in_embed': True,
2675 'duration': 4438,
2676 'availability': 'public',
2677 'channel_follower_count': int,
2678 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2679 'categories': ['Entertainment'],
2680 'live_status': 'was_live',
2681 'release_timestamp': 1671793345,
2682 'channel': 'さなちゃんねる',
2683 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2684 'uploader': 'さなちゃんねる',
7666b936 2685 'uploader_url': 'https://www.youtube.com/@sana_natori',
2686 'uploader_id': '@sana_natori',
8213ce28 2687 'channel_is_verified': True,
14a14335 2688 'heatmap': 'count:100',
7666b936 2689 },
2690 },
2691 {
2692 # Fallbacks when webpage and web client is unavailable
2693 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2694 'info_dict': {
2695 'id': 'wSSmNUl9Snw',
2696 'ext': 'mp4',
2697 # 'categories': ['Science & Technology'],
2698 'view_count': int,
2699 'chapters': 'count:2',
2700 'channel': 'Scott Manley',
2701 'like_count': int,
2702 'age_limit': 0,
2703 # 'availability': 'public',
2704 'channel_follower_count': int,
2705 'live_status': 'not_live',
2706 'upload_date': '20170831',
2707 'duration': 682,
2708 'tags': 'count:8',
2709 'uploader_url': 'https://www.youtube.com/@scottmanley',
2710 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2711 'uploader': 'Scott Manley',
2712 'uploader_id': '@scottmanley',
2713 'title': 'The Computer Hack That Saved Apollo 14',
2714 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2715 'thumbnail': r're:^https?://.*\.webp',
2716 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2717 'playable_in_embed': True,
14a14335 2718 'comment_count': int,
8213ce28 2719 'channel_is_verified': True,
14a14335 2720 'heatmap': 'count:100',
7666b936 2721 },
2722 'params': {
2723 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
dad2210c 2724 },
2725 },
2eb88d95
PH
2726 ]
2727
f2e8dbcc 2728 _WEBPAGE_TESTS = [
2729 # YouTube <object> embed
2730 {
2731 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2732 'md5': '873c81d308b979f0e23ee7e620b312a3',
2733 'info_dict': {
2734 'id': 'msN87y-iEx0',
2735 'ext': 'mp4',
2736 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2737 'upload_date': '20080526',
2738 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
f2e8dbcc 2739 'age_limit': 0,
2740 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2741 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2742 'playable_in_embed': True,
2743 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2744 'like_count': int,
2745 'comment_count': int,
2746 'channel': 'Christopher Sykes',
2747 'live_status': 'not_live',
2748 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2749 'availability': 'public',
2750 'duration': 195,
2751 'view_count': int,
2752 'categories': ['Science & Technology'],
2753 'channel_follower_count': int,
7666b936 2754 'uploader': 'Christopher Sykes',
2755 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2756 'uploader_id': '@ChristopherSykesDocumentaries',
14a14335 2757 'heatmap': 'count:100',
f2e8dbcc 2758 },
2759 'params': {
2760 'skip_download': True,
2761 }
2762 },
2763 ]
2764
201c1459 2765 @classmethod
2766 def suitable(cls, url):
4dfbf869 2767 from ..utils import parse_qs
2768
201c1459 2769 qs = parse_qs(url)
2770 if qs.get('list', [None])[0]:
2771 return False
86e5f3ed 2772 return super().suitable(url)
201c1459 2773
e0df6211 2774 def __init__(self, *args, **kwargs):
86e5f3ed 2775 super().__init__(*args, **kwargs)
545cc85d 2776 self._code_cache = {}
83799698 2777 self._player_cache = {}
e0df6211 2778
4d37720a 2779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2780 lock = threading.Lock()
185bf310 2781 start_time = time.time()
adbc4ec4
THD
2782 formats = [f for f in formats if f.get('is_from_start')]
2783
185bf310 2784 def refetch_manifest(format_id, delay):
2785 nonlocal formats, start_time, is_live
2786 if time.time() <= start_time + delay:
adbc4ec4
THD
2787 return
2788
2789 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2790 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2791 microformats = traverse_obj(
2792 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2793 expected_type=dict)
4d37720a
L
2794 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2795 is_live = live_status == 'is_live'
185bf310 2796 start_time = time.time()
adbc4ec4 2797
185bf310 2798 def mpd_feed(format_id, delay):
adbc4ec4
THD
2799 """
2800 @returns (manifest_url, manifest_stream_number, is_live) or None
2801 """
253ac4ba 2802 for retry in self.RetryManager(fatal=False):
2803 with lock:
2804 refetch_manifest(format_id, delay)
2805
2806 f = next((f for f in formats if f['format_id'] == format_id), None)
2807 if not f:
2808 if not is_live:
2809 retry.error = f'{video_id}: Video is no longer live'
2810 else:
2811 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2812 continue
2813 return f['manifest_url'], f['manifest_stream_number'], is_live
2814 return None
adbc4ec4
THD
2815
2816 for f in formats:
4d37720a
L
2817 f['is_live'] = is_live
2818 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2819 live_start_time, mpd_feed, not is_live and f.copy())
2820 if is_live:
2821 f['fragments'] = gen
2822 f['protocol'] = 'http_dash_segments_generator'
2823 else:
2824 f['fragments'] = LazyList(gen({}))
2825 del f['is_from_start']
adbc4ec4 2826
4d37720a 2827 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2828 FETCH_SPAN, MAX_DURATION = 5, 432000
2829
2830 mpd_url, stream_number, is_live = None, None, True
2831
2832 begin_index = 0
2833 download_start_time = ctx.get('start') or time.time()
2834
2835 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2836 if lack_early_segments:
2837 self.report_warning(bug_reports_message(
2838 'Starting download from the last 120 hours of the live stream since '
2839 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2840 lack_early_segments = True
2841
2842 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2843 fragments, fragment_base_url = None, None
2844
a539f065 2845 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2846 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2847 # Obtain from MPD's maximum seq value
2848 old_mpd_url = mpd_url
185bf310 2849 last_error = ctx.pop('last_error', None)
3d2623a8 2850 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
185bf310 2851 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2852 or (mpd_url, stream_number, False))
2853 if not refresh_sequence:
2854 if expire_fast and not is_live:
2855 return False, last_seq
2856 elif old_mpd_url == mpd_url:
2857 return True, last_seq
4d37720a
L
2858 if manifestless_orig_fmt:
2859 fmt_info = manifestless_orig_fmt
2860 else:
2861 try:
2862 fmts, _ = self._extract_mpd_formats_and_subtitles(
2863 mpd_url, None, note=False, errnote=False, fatal=False)
2864 except ExtractorError:
2865 fmts = None
2866 if not fmts:
2867 no_fragment_score += 2
2868 return False, last_seq
2869 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2870 fragments = fmt_info['fragments']
2871 fragment_base_url = fmt_info['fragment_base_url']
2872 assert fragment_base_url
2873
2874 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2875 return True, _last_seq
2876
4d37720a 2877 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2878 while is_live:
2879 fetch_time = time.time()
2880 if no_fragment_score > 30:
2881 return
2882 if last_segment_url:
2883 # Obtain from "X-Head-Seqnum" header value from each segment
2884 try:
2885 urlh = self._request_webpage(
2886 last_segment_url, None, note=False, errnote=False, fatal=False)
2887 except ExtractorError:
2888 urlh = None
2889 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2890 if last_seq is None:
a539f065 2891 no_fragment_score += 2
adbc4ec4
THD
2892 last_segment_url = None
2893 continue
2894 else:
a539f065
LNO
2895 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2896 no_fragment_score += 2
185bf310 2897 if not should_continue:
adbc4ec4
THD
2898 continue
2899
2900 if known_idx > last_seq:
2901 last_segment_url = None
2902 continue
2903
2904 last_seq += 1
2905
2906 if begin_index < 0 and known_idx < 0:
2907 # skip from the start when it's negative value
2908 known_idx = last_seq + begin_index
2909 if lack_early_segments:
2910 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2911 try:
2912 for idx in range(known_idx, last_seq):
2913 # do not update sequence here or you'll get skipped some part of it
a539f065 2914 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2915 if not should_continue:
adbc4ec4
THD
2916 known_idx = idx - 1
2917 raise ExtractorError('breaking out of outer loop')
2918 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2919 yield {
2920 'url': last_segment_url,
36195c44 2921 'fragment_count': last_seq,
adbc4ec4
THD
2922 }
2923 if known_idx == last_seq:
2924 no_fragment_score += 5
2925 else:
2926 no_fragment_score = 0
2927 known_idx = last_seq
2928 except ExtractorError:
2929 continue
2930
4d37720a
L
2931 if manifestless_orig_fmt:
2932 # Stop at the first iteration if running for post-live manifestless;
2933 # fragment count no longer increase since it starts
2934 break
2935
adbc4ec4
THD
2936 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2937
b6de707d 2938 def _extract_player_url(self, *ytcfgs, webpage=None):
2939 player_url = traverse_obj(
2940 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2941 get_all=False, expected_type=str)
11f9be09 2942 if not player_url:
b6de707d 2943 return
60f393e4 2944 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2945
b6de707d 2946 def _download_player_url(self, video_id, fatal=False):
2947 res = self._download_webpage(
2948 'https://www.youtube.com/iframe_api',
2949 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2950 if res:
2951 player_version = self._search_regex(
2952 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2953 if player_version:
2954 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2955
60064c53
PH
2956 def _signature_cache_id(self, example_sig):
2957 """ Return a string representation of a signature """
14f25df2 2958 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2959
e40c758c
S
2960 @classmethod
2961 def _extract_player_info(cls, player_url):
2962 for player_re in cls._PLAYER_INFO_RE:
2963 id_m = re.search(player_re, player_url)
2964 if id_m:
2965 break
2966 else:
c081b35c 2967 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2968 return id_m.group('id')
e40c758c 2969
404f611f 2970 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2971 player_id = self._extract_player_info(player_url)
2972 if player_id not in self._code_cache:
1276a43a 2973 code = self._download_webpage(
109dd3b2 2974 player_url, video_id, fatal=fatal,
2975 note='Downloading player ' + player_id,
2976 errnote='Download of %s failed' % player_url)
1276a43a 2977 if code:
2978 self._code_cache[player_id] = code
404f611f 2979 return self._code_cache.get(player_id)
109dd3b2 2980
e40c758c 2981 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2982 player_id = self._extract_player_info(player_url)
e0df6211 2983
c4417ddb 2984 # Read from filesystem cache
86e5f3ed 2985 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2986 assert os.path.basename(func_id) == func_id
a0e07d31 2987
ae61d108 2988 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2989 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2990
580ce007 2991 if not cache_spec:
2992 code = self._load_player(video_id, player_url)
404f611f 2993 if code:
109dd3b2 2994 res = self._parse_sig_js(code)
ac668111 2995 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2996 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2997 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2998
2999 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 3000
60064c53 3001 def _print_sig_code(self, func, example_sig):
404f611f 3002 if not self.get_param('youtube_print_sig_code'):
3003 return
3004
edf3e38e
PH
3005 def gen_sig_code(idxs):
3006 def _genslice(start, end, step):
78caa52a 3007 starts = '' if start == 0 else str(start)
8bcc8756 3008 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 3009 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 3010 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
3011
3012 step = None
7af808a5
PH
3013 # Quelch pyflakes warnings - start will be set when step is set
3014 start = '(Never used)'
edf3e38e
PH
3015 for i, prev in zip(idxs[1:], idxs[:-1]):
3016 if step is not None:
3017 if i - prev == step:
3018 continue
3019 yield _genslice(start, prev, step)
3020 step = None
3021 continue
3022 if i - prev in [-1, 1]:
3023 step = i - prev
3024 start = prev
3025 continue
3026 else:
78caa52a 3027 yield 's[%d]' % prev
edf3e38e 3028 if step is None:
78caa52a 3029 yield 's[%d]' % i
edf3e38e
PH
3030 else:
3031 yield _genslice(start, i, step)
3032
ac668111 3033 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 3034 cache_res = func(test_string)
edf3e38e 3035 cache_spec = [ord(c) for c in cache_res]
78caa52a 3036 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 3037 signature_id_tuple = '(%s)' % (
14f25df2 3038 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 3039 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 3040 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 3041 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 3042
e0df6211
PH
3043 def _parse_sig_js(self, jscode):
3044 funcname = self._search_regex(
abefc03f
S
3045 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3046 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
3047 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3048 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
4823ec9f 3049 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
31ce6e99 3050 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f 3051 # Obsolete patterns
4823ec9f 3052 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 3053 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
3054 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3055 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3056 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f 3057 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 3058 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
3059
3060 jsi = JSInterpreter(jscode)
3061 initial_function = jsi.extract_function(funcname)
e0df6211
PH
3062 return lambda s: initial_function([s])
3063
580ce007 3064 def _cached(self, func, *cache_id):
3065 def inner(*args, **kwargs):
3066 if cache_id not in self._player_cache:
3067 try:
3068 self._player_cache[cache_id] = func(*args, **kwargs)
3069 except ExtractorError as e:
3070 self._player_cache[cache_id] = e
3071 except Exception as e:
3072 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3073
3074 ret = self._player_cache[cache_id]
3075 if isinstance(ret, Exception):
3076 raise ret
3077 return ret
3078 return inner
3079
545cc85d 3080 def _decrypt_signature(self, s, video_id, player_url):
257a2501 3081 """Turn the encrypted s field into a working signature"""
580ce007 3082 extract_sig = self._cached(
3083 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3084 func = extract_sig(video_id, player_url, s)
3085 self._print_sig_code(func, s)
3086 return func(s)
404f611f 3087
3088 def _decrypt_nsig(self, s, video_id, player_url):
3089 """Turn the encrypted n field into a working signature"""
3090 if player_url is None:
3091 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 3092 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 3093
b505e851 3094 try:
3095 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3096 except ExtractorError as e:
3097 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 3098 if self.get_param('youtube_print_sig_code'):
3099 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 3100
25836db6 3101 try:
3102 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3103 ret = extract_nsig(jsi, func_code)(s)
3104 except JSInterpreter.Exception as e:
3105 try:
992dc6b4 3106 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 3107 except ExtractorError:
3108 raise e
3109 self.report_warning(
3110 f'Native nsig extraction failed: Trying with PhantomJS\n'
3111 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 3112 self.write_debug(e, only_once=True)
25836db6 3113
3114 args, func_body = func_code
3115 ret = jsi.execute(
3116 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3117 video_id=video_id, note='Executing signature code').strip()
580ce007 3118
3119 self.write_debug(f'Decrypted nsig {s} => {ret}')
3120 return ret
3121
90a1df30 3122 def _extract_n_function_name(self, jscode):
3123 funcname, idx = self._search_regex(
3124 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3125 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3126 if not idx:
3127 return funcname
3128
3129 return json.loads(js_to_json(self._search_regex(
337734d4 3130 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
90a1df30 3131 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3132
580ce007 3133 def _extract_n_function_code(self, video_id, player_url):
404f611f 3134 player_id = self._extract_player_info(player_url)
05deb747 3135 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3136 jscode = func_code or self._load_player(video_id, player_url)
3137 jsi = JSInterpreter(jscode)
404f611f 3138
3139 if func_code:
580ce007 3140 return jsi, player_id, func_code
404f611f 3141
b505e851 3142 func_name = self._extract_n_function_name(jscode)
3143
3144 # For redundancy
3145 func_code = self._search_regex(
3146 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3147 # NB: The end of the regex is intentionally kept strict
3148 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3149 jscode, 'nsig function', group=('var', 'code'), default=None)
3150 if func_code:
3151 func_code = ([func_code[0]], func_code[1])
3152 else:
3153 self.write_debug('Extracting nsig function with jsinterp')
3154 func_code = jsi.extract_function_code(func_name)
3155
580ce007 3156 self.cache.store('youtube-nsig', player_id, func_code)
3157 return jsi, player_id, func_code
3158
3159 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3160 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3161
580ce007 3162 def extract_nsig(s):
25836db6 3163 try:
3164 ret = func([s])
3165 except JSInterpreter.Exception:
3166 raise
3167 except Exception as e:
3168 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3169
f6ca640b 3170 if ret.startswith('enhanced_except_'):
25836db6 3171 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3172 return ret
580ce007 3173
3174 return extract_nsig
e0df6211 3175
109dd3b2 3176 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3177 """
3178 Extract signatureTimestamp (sts)
3179 Required to tell API what sig/player version is in use.
3180 """
3181 sts = None
3182 if isinstance(ytcfg, dict):
3183 sts = int_or_none(ytcfg.get('STS'))
3184
3185 if not sts:
3186 # Attempt to extract from player
3187 if player_url is None:
3188 error_msg = 'Cannot extract signature timestamp without player_url.'
3189 if fatal:
3190 raise ExtractorError(error_msg)
3191 self.report_warning(error_msg)
3192 return
404f611f 3193 code = self._load_player(video_id, player_url, fatal=fatal)
3194 if code:
109dd3b2 3195 sts = int_or_none(self._search_regex(
3196 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3197 'JS player signature timestamp', group='sts', fatal=fatal))
3198 return sts
3199
11f9be09 3200 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3201 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3202 label = 'fully ' if is_full else ''
3203 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3204 expected_type=url_or_none)
3205 if not url:
3206 self.report_warning(f'Unable to mark {label}watched')
3207 return
14f25df2 3208 parsed_url = urllib.parse.urlparse(url)
3209 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3210
3211 # cpn generation algorithm is reverse engineered from base.js.
3212 # In fact it works even with dummy cpn.
3213 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3214 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3215
3216 # # more consistent results setting it to right before the end
3217 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3218
3219 qs.update({
3220 'ver': ['2'],
3221 'cpn': [cpn],
3222 'cmt': video_length,
3223 'el': 'detailpage', # otherwise defaults to "shorts"
3224 })
3225
3226 if is_full:
3227 # these seem to mark watchtime "history" in the real world
3228 # they're required, so send in a single value
3229 qs.update({
5318156f 3230 'st': 0,
06cc8f10
B
3231 'et': video_length,
3232 })
3233
14f25df2 3234 url = urllib.parse.urlunparse(
3235 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3236
3237 self._download_webpage(
3238 url, video_id, f'Marking {label}watched',
3239 'Unable to mark watched', fatal=False)
d77ab8e2 3240
bfd973ec 3241 @classmethod
3242 def _extract_from_webpage(cls, url, webpage):
3243 # Invidious Instances
3244 # https://github.com/yt-dlp/yt-dlp/issues/195
3245 # https://github.com/iv-org/invidious/pull/1730
3246 mobj = re.search(
3247 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3248 webpage)
3249 if mobj:
3250 yield cls.url_result(mobj.group('url'), cls)
3251 raise cls.StopExtraction()
3252
3253 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3254
3255 # lazyYT YouTube embed
bfd973ec 3256 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3257 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3258
3259 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3260 for m in re.findall(r'''(?x)<div[^>]+
3261 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3262 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3263 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3264
97665381
PH
3265 @classmethod
3266 def extract_id(cls, url):
ae61d108 3267 video_id = cls.get_temp_id(url)
3268 if not video_id:
3269 raise ExtractorError(f'Invalid URL: {url}')
3270 return video_id
c5e8d7af 3271
7c365c21 3272 def _extract_chapters_from_json(self, data, duration):
3273 chapter_list = traverse_obj(
3274 data, (
3275 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3276 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3277 ), expected_type=list)
3278
22ccd542 3279 return self._extract_chapters_helper(
7c365c21 3280 chapter_list,
22ccd542 3281 start_function=lambda chapter: float_or_none(
7c365c21 3282 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
22ccd542 3283 title_function=lambda chapter: traverse_obj(
7c365c21 3284 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3285 duration=duration)
3286
3287 def _extract_chapters_from_engagement_panel(self, data, duration):
3288 content_list = traverse_obj(
8bdd16b4 3289 data,
7c365c21 3290 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3291 expected_type=list)
052e1350 3292 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3293 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3294
1890fc63 3295 return next(filter(None, (
22ccd542 3296 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3297 chapter_time, chapter_title, duration)
1890fc63 3298 for contents in content_list)), [])
7c365c21 3299
03e85ea9 3300 def _extract_heatmap(self, data):
3301 return traverse_obj(data, (
3302 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3303 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3304 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3305 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3306 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3307 'value': ('intensityScoreNormalized', {float_or_none}),
3308 })) or None
5caf30db 3309
a1c5d2ca
M
3310 def _extract_comment(self, comment_renderer, parent=None):
3311 comment_id = comment_renderer.get('commentId')
3312 if not comment_id:
3313 return
fe93e2c4 3314
c35448b7 3315 info = {
3316 'id': comment_id,
3317 'text': self._get_text(comment_renderer, 'contentText'),
3318 'like_count': self._get_count(comment_renderer, 'voteCount'),
3319 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3320 'author': self._get_text(comment_renderer, 'authorText'),
3321 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3322 'parent': parent or 'root',
3323 }
fe93e2c4 3324
c26f9b99 3325 # Timestamp is an estimate calculated from the current time and time_text
3326 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3327 timestamp = self._parse_time_text(time_text)
3328
c35448b7 3329 info.update({
3330 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3331 '_time_text': time_text,
3332 'timestamp': timestamp,
3333 })
fe93e2c4 3334
c35448b7 3335 info['author_url'] = urljoin(
3336 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3337 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3338 expected_type=str, get_all=False))
a1c5d2ca 3339
c35448b7 3340 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3341 if author_is_uploader is not None:
3342 info['author_is_uploader'] = author_is_uploader
3343
3344 comment_abr = traverse_obj(
89bed013 3345 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
c35448b7 3346 if comment_abr is not None:
3347 info['is_favorited'] = 'creatorHeart' in comment_abr
3348
14a14335 3349 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3350 if self._has_badge(badges, BadgeType.VERIFIED):
3351 info['author_is_verified'] = True
c35448b7 3352
3353 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3354 if is_pinned:
3355 info['is_pinned'] = True
3356
3357 return info
a1c5d2ca 3358
46383212 3359 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3360
3361 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3362
3363 def extract_header(contents):
2d6659b9 3364 _continuation = None
3365 for content in contents:
46383212 3366 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3367 expected_comment_count = self._get_count(
3368 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3369
18f8fba7 3370 if expected_comment_count is not None:
46383212 3371 tracker['est_total'] = expected_comment_count
3372 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3373 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3374
3375 sort_menu_item = try_get(
3376 comments_header_renderer,
3377 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3378 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3379
3380 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3381 if not _continuation:
3382 continue
3383
46383212 3384 sort_text = str_or_none(sort_menu_item.get('title'))
3385 if not sort_text:
2d6659b9 3386 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3387 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3388 break
a2160aa4 3389 return _continuation
a1c5d2ca 3390
2d6659b9 3391 def extract_thread(contents):
a1c5d2ca 3392 if not parent:
46383212 3393 tracker['current_page_thread'] = 0
a1c5d2ca 3394 for content in contents:
46383212 3395 if not parent and tracker['total_parent_comments'] >= max_parents:
3396 yield
a1c5d2ca 3397 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3398 comment_renderer = get_first(
3399 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3400 expected_type=dict, default={})
a1c5d2ca 3401
a1c5d2ca
M
3402 comment = self._extract_comment(comment_renderer, parent)
3403 if not comment:
3404 continue
141a8dff 3405 comment_id = comment['id']
c35448b7 3406 if comment.get('is_pinned'):
141a8dff 3407 tracker['pinned_comment_ids'].add(comment_id)
7f51861b 3408 # Sometimes YouTube may break and give us infinite looping comments.
3409 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
141a8dff 3410 if comment_id in tracker['seen_comment_ids']:
c35448b7 3411 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
141a8dff 3412 # Pinned comments may appear a second time in newest first sort
3413 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3414 continue
4dc4d847 3415 self.report_warning(
3416 'Detected YouTube comments looping. Stopping comment extraction '
3417 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
7f51861b 3418 yield
3419 else:
3420 tracker['seen_comment_ids'].add(comment['id'])
46383212 3421
3422 tracker['running_total'] += 1
3423 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3424 yield comment
46383212 3425
a1c5d2ca
M
3426 # Attempt to get the replies
3427 comment_replies_renderer = try_get(
3428 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3429
3430 if comment_replies_renderer:
46383212 3431 tracker['current_page_thread'] += 1
a1c5d2ca 3432 comment_entries_iter = self._comment_entries(
99e9e001 3433 comment_replies_renderer, ytcfg, video_id,
46383212 3434 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3435 yield from itertools.islice(comment_entries_iter, min(
3436 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3437
46383212 3438 # Keeps track of counts across recursive calls
3439 if not tracker:
3440 tracker = dict(
3441 running_total=0,
18f8fba7 3442 est_total=None,
46383212 3443 current_page_thread=0,
3444 total_parent_comments=0,
7f51861b 3445 total_reply_comments=0,
141a8dff 3446 seen_comment_ids=set(),
3447 pinned_comment_ids=set()
3448 )
46383212 3449
3450 # TODO: Deprecated
2d6659b9 3451 # YouTube comments have a max depth of 2
46383212 3452 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3453 if max_depth:
da4db748 3454 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3455 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3456 if max_depth == 1 and parent:
3457 return
a1c5d2ca 3458
46383212 3459 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3460 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3461
46383212 3462 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3463
46383212 3464 response = None
6e634cbe 3465 is_forced_continuation = False
2d6659b9 3466 is_first_continuation = parent is None
6e634cbe 3467 if is_first_continuation and not continuation:
3468 # Sometimes you can get comments by generating the continuation yourself,
3469 # even if YouTube initially reports them being disabled - e.g. stories comments.
3470 # Note: if the comment section is actually disabled, YouTube may return a response with
3471 # required check_get_keys missing. So we will disable that check initially in this case.
3472 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3473 is_forced_continuation = True
a1c5d2ca 3474
18f8fba7 3475 continuation_items_path = (
3476 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
a1c5d2ca
M
3477 for page_num in itertools.count(0):
3478 if not continuation:
3479 break
46383212 3480 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
18f8fba7 3481 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
2d6659b9 3482 if page_num == 0:
3483 if is_first_continuation:
3484 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3485 else:
2d6659b9 3486 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3487 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3488 else:
3489 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3490 ' ' if parent else '', ' replies' if parent else '',
3491 page_num, comment_prog_str)
18f8fba7 3492
3493 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3494 # Ignore check if YouTube says the comment count is 0.
3495 check_get_keys = None
3496 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3497 check_get_keys = [[*continuation_items_path, ..., (
3498 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
e72e48c5
M
3499 try:
3500 response = self._extract_response(
3501 item_id=None, query=continuation,
3502 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
18f8fba7 3503 check_get_keys=check_get_keys)
e72e48c5
M
3504 except ExtractorError as e:
3505 # Ignore incomplete data error for replies if retries didn't work.
3506 # This is to allow any other parent comments and comment threads to be downloaded.
3507 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
4dc4d847 3508 if 'incomplete data' in str(e).lower() and parent:
3509 if self.get_param('ignoreerrors') in (True, 'only_download'):
3510 self.report_warning(
3511 'Received incomplete data for a comment reply thread and retrying did not help. '
3512 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3513 return
3514 else:
3515 raise ExtractorError(
3516 'Incomplete data received for comment reply thread. '
3517 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3518 expected=True)
3519 raise
6e634cbe 3520 is_forced_continuation = False
2d6659b9 3521 continuation = None
18f8fba7 3522 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
46383212 3523 if is_first_continuation:
3524 continuation = extract_header(continuation_items)
3525 is_first_continuation = False
2d6659b9 3526 if continuation:
a1c5d2ca 3527 break
46383212 3528 continue
a1c5d2ca 3529
46383212 3530 for entry in extract_thread(continuation_items):
3531 if not entry:
3532 return
3533 yield entry
3534 continuation = self._extract_continuation({'contents': continuation_items})
3535 if continuation:
2d6659b9 3536 break
a1c5d2ca 3537
6e634cbe 3538 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3539 if message and not parent and tracker['running_total'] == 0:
3540 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3541 raise self.CommentsDisabled
6e634cbe 3542
3543 @staticmethod
3544 def _generate_comment_continuation(video_id):
3545 """
3546 Generates initial comment section continuation token from given video id
3547 """
3548 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3549 return base64.b64encode(token.encode()).decode()
3550
a2160aa4 3551 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3552 """Entry for comment extraction"""
2d6659b9 3553 def _real_comment_extract(contents):
aae16f6e 3554 renderer = next((
3555 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3556 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3557 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3558
a2160aa4 3559 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3560 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3561
109dd3b2 3562 @staticmethod
99e9e001 3563 def _get_checkok_params():
3564 return {'contentCheckOk': True, 'racyCheckOk': True}
3565
3566 @classmethod
3567 def _generate_player_context(cls, sts=None):
109dd3b2 3568 context = {
3569 'html5Preference': 'HTML5_PREF_WANTS',
3570 }
3571 if sts is not None:
3572 context['signatureTimestamp'] = sts
3573 return {
3574 'playbackContext': {
3575 'contentPlaybackContext': context
a1a7907b 3576 },
99e9e001 3577 **cls._get_checkok_params()
109dd3b2 3578 }
3579
e7e94f2a
D
3580 @staticmethod
3581 def _is_agegated(player_response):
3582 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3583 return True
e7e94f2a 3584
6839ae1f 3585 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3586 AGE_GATE_REASONS = (
3587 'confirm your age', 'age-restricted', 'inappropriate', # reason
3588 'age_verification_required', 'age_check_required', # status
3589 )
3590 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3591
3592 @staticmethod
3593 def _is_unplayable(player_response):
3594 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3595
50ac0e54 3596 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3597
11f9be09 3598 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3599 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3600 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3601 headers = self.generate_api_headers(
99e9e001 3602 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3603
6e634cbe 3604 yt_query = {
3605 'videoId': video_id,
6e634cbe 3606 }
e7b17fce 3607 if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
3608 yt_query['params'] = 'CgIIAQ=='
ba06d77a 3609
546b2c28 3610 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 3611 if pp_arg:
3612 yt_query['params'] = pp_arg
50ac0e54 3613
11f9be09 3614 yt_query.update(self._generate_player_context(sts))
3615 return self._extract_response(
3616 item_id=video_id, ep='player', query=yt_query,
379e44ed 3617 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3618 default_client=client,
11f9be09 3619 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3620 ) or None
3621
11f9be09 3622 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3623 requested_clients = []
fd8fcf8f 3624 default = ['ios', 'android', 'web']
000c15a4 3625 allowed_clients = sorted(
86e5f3ed 3626 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3627 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3628 for client in self._configuration_arg('player_client'):
3629 if client in allowed_clients:
3630 requested_clients.append(client)
d0d012d4 3631 elif client == 'default':
3632 requested_clients.extend(default)
b4c055ba 3633 elif client == 'all':
3634 requested_clients.extend(allowed_clients)
3635 else:
3636 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3637 if not requested_clients:
d0d012d4 3638 requested_clients = default
cf7e015f 3639
11f9be09 3640 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3641 requested_clients.extend(
e7e94f2a 3642 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3643
11f9be09 3644 return orderedSet(requested_clients)
cf7e015f 3645
5eedc208
SS
3646 def _invalid_player_response(self, pr, video_id):
3647 # YouTube may return a different video player response than expected.
3648 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3649 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3650 return pr_id
3651
50ac0e54 3652 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3653 initial_pr = None
3654 if webpage:
b7c47b74 3655 initial_pr = self._search_json(
3656 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3657
5eedc208
SS
3658 prs = []
3659 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3660 # Android player_response does not have microFormats which are needed for
3661 # extraction of some data. So we return the initial_pr with formats
3662 # stripped out even if not requested by the user
3663 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3664 prs.append({**initial_pr, 'streamingData': None})
3665
ae729626 3666 all_clients = set(clients)
c0bc527b 3667 clients = clients[::-1]
e7e94f2a 3668
ae729626 3669 def append_client(*client_names):
e7870111 3670 """ Append the first client name that exists but not already used """
ae729626 3671 for client_name in client_names:
e7870111
D
3672 actual_client = _split_innertube_client(client_name)[0]
3673 if actual_client in INNERTUBE_CLIENTS:
3674 if actual_client not in all_clients:
ae729626 3675 clients.append(client_name)
e7870111
D
3676 all_clients.add(actual_client)
3677 return
e7e94f2a 3678
b6de707d 3679 tried_iframe_fallback = False
3680 player_url = None
5eedc208 3681 skipped_clients = {}
c0bc527b 3682 while clients:
e7870111 3683 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3684 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3685 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3686 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3687
b6de707d 3688 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3689 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3690 if 'js' in self._configuration_arg('player_skip'):
3691 require_js_player = False
3692 player_url = None
3693
3694 if not player_url and not tried_iframe_fallback and require_js_player:
3695 player_url = self._download_player_url(video_id)
3696 tried_iframe_fallback = True
3697
379e44ed 3698 try:
3699 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3700 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3701 except ExtractorError as e:
5eedc208 3702 self.report_warning(e)
379e44ed 3703 continue
3704
5eedc208
SS
3705 if pr_id := self._invalid_player_response(pr, video_id):
3706 skipped_clients[client] = pr_id
3707 elif pr:
3708 # Save client name for introspection later
3709 name = short_client_name(client)
3710 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3711 sd[STREAMING_DATA_CLIENT_NAME] = name
3712 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3713 f[STREAMING_DATA_CLIENT_NAME] = name
3714 prs.append(pr)
c0bc527b 3715
e7e94f2a 3716 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3717 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3718 append_client(f'{base_client}_creator')
e7e94f2a 3719 elif self._is_agegated(pr):
e7870111
D
3720 if variant == 'tv_embedded':
3721 append_client(f'{base_client}_embedded')
3722 elif not variant:
3723 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3724
5eedc208
SS
3725 if skipped_clients:
3726 self.report_warning(
3727 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3728 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3729 if not prs:
3730 raise ExtractorError(
3731 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3732 elif not prs:
3733 raise ExtractorError('Failed to extract any player response')
b6de707d 3734 return prs, player_url
11f9be09 3735
4d37720a
L
3736 def _needs_live_processing(self, live_status, duration):
3737 if (live_status == 'is_live' and self.get_param('live_from_start')
d949c10c 3738 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
4d37720a
L
3739 return live_status
3740
3741 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
e389d172 3742 CHUNK_SIZE = 10 << 20
a4894d3e 3743 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3744 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3745 q = qualities([
2a9c6dcd 3746 # Normally tiny is the smallest video-only formats. But
3747 # audio-only formats with unknown quality may get tagged as tiny
3748 'tiny',
3749 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3750 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3751 ])
6839ae1f 3752 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
58786a10 3753 format_types = self._configuration_arg('formats')
3754 all_formats = 'duplicate' in format_types
3755 if self._configuration_arg('include_duplicate_formats'):
3756 all_formats = True
3757 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3758 'Use formats=duplicate extractor argument instead')
9297939e 3759
e389d172 3760 def build_fragments(f):
3761 return LazyList({
3762 'url': update_url_query(f['url'], {
3763 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
3764 })
3765 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3766
545cc85d 3767 for fmt in streaming_formats:
727029c5 3768 if fmt.get('targetDurationSec'):
545cc85d 3769 continue
321bf820 3770
cc2db878 3771 itag = str_or_none(fmt.get('itag'))
9297939e 3772 audio_track = fmt.get('audioTrack') or {}
9bb85699 3773 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
86cb9221 3774 if not all_formats:
3775 if stream_id in stream_ids:
3776 continue
9297939e 3777
cc2db878 3778 quality = fmt.get('quality')
2a9c6dcd 3779 height = int_or_none(fmt.get('height'))
d3fc8074 3780 if quality == 'tiny' or not quality:
3781 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3782 # The 3gp format (17) in android client has a quality of "small",
3783 # but is actually worse than other formats
3784 if itag == '17':
3785 quality = 'tiny'
3786 if quality:
3787 if itag:
3788 itag_qualities[itag] = quality
3789 if height:
3790 res_qualities[height] = quality
cc2db878 3791 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3792 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3793 # number of fragment that would subsequently requested with (`&sq=N`)
3794 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3795 continue
3796
545cc85d 3797 fmt_url = fmt.get('url')
3798 if not fmt_url:
14f25df2 3799 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3800 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3801 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3802 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3803 continue
52023f12 3804 try:
3805 fmt_url += '&%s=%s' % (
3806 traverse_obj(sc, ('sp', -1)) or 'signature',
3807 self._decrypt_signature(encrypted_sig, video_id, player_url)
3808 )
3809 except ExtractorError as e:
580ce007 3810 self.report_warning('Signature extraction failed: Some formats may be missing',
3811 video_id=video_id, only_once=True)
52023f12 3812 self.write_debug(e, only_once=True)
201e9eaa 3813 continue
545cc85d 3814
404f611f 3815 query = parse_qs(fmt_url)
3816 throttled = False
b2916526 3817 if query.get('n'):
404f611f 3818 try:
580ce007 3819 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3820 fmt_url = update_url_query(fmt_url, {
580ce007 3821 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3822 })
404f611f 3823 except ExtractorError as e:
25836db6 3824 phantomjs_hint = ''
3825 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3826 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3827 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3828 if player_url:
3829 self.report_warning(
3830 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3831 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3832 self.write_debug(e, only_once=True)
3833 else:
3834 self.report_warning(
3835 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3836 video_id=video_id, only_once=True)
404f611f 3837 throttled = True
3838
86e3b822 3839 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3840 language_preference = (
3841 10 if audio_track.get('audioIsDefault') and 10
3842 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3843 else -1)
a25a4243 3844 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
0ad92dfb 3845 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3846 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3847 # Make sure to avoid false positives with small duration differences.
62b58c09 3848 # E.g. __2ABJjxzNo, ySuUZEjARPY
a25a4243 3849 is_damaged = try_call(lambda: format_duration < duration // 2)
08d30158 3850 if is_damaged:
0f06bcd7 3851 self.report_warning(
3852 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
c795c39f
L
3853
3854 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
51a07b0d 3855 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
97afb093 3856 fps = int_or_none(fmt.get('fps')) or 0
545cc85d 3857 dct = {
3858 'asr': int_or_none(fmt.get('audioSampleRate')),
3859 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3860 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3861 'format_note': join_nonempty(
392389b7 3862 join_nonempty(audio_track.get('displayName'),
3863 language_preference > 0 and ' (default)', delim=''),
51a07b0d 3864 name, fmt.get('isDrc') and 'DRC',
a4166234 3865 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3866 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
c795c39f 3867 throttled and 'THROTTLED', is_damaged and 'DAMAGED',
86cb9221 3868 (self.get_param('verbose') or all_formats) and client_name,
c795c39f 3869 delim=', '),
91e5e839 3870 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
51a07b0d 3871 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
3872 + (100 if 'Premium' in name else 0)),
97afb093 3873 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
a4166234 3874 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3875 'height': height,
9bb85699 3876 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3877 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3878 'tbr': tbr,
a25a4243 3879 'filesize_approx': filesize_from_tbr(tbr, format_duration),
545cc85d 3880 'url': fmt_url,
2a9c6dcd 3881 'width': int_or_none(fmt.get('width')),
ab6df717 3882 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3883 'desc' if language_preference < -1 else '') or None,
ab6df717 3884 'language_preference': language_preference,
a405b38f 3885 # Strictly de-prioritize damaged and 3gp formats
3886 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3887 }
60bdb7bd 3888 mime_mobj = re.match(
3889 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3890 if mime_mobj:
3891 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3892 dct.update(parse_codecs(mime_mobj.group(2)))
86cb9221 3893 if itag:
3894 itags[itag].add(('https', dct.get('language')))
3895 stream_ids.append(stream_id)
c9abebb8 3896 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3897 if single_stream and dct.get('ext'):
3898 dct['container'] = dct['ext'] + '_dash'
86cb9221 3899
58786a10 3900 if (all_formats or 'dashy' in format_types) and dct['filesize']:
86cb9221 3901 yield {
3902 **dct,
3903 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
5038f6d7 3904 'protocol': 'http_dash_segments',
e389d172 3905 'fragments': build_fragments(dct),
86cb9221 3906 }
58786a10 3907 if all_formats or 'dashy' not in format_types:
3908 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3909 yield dct
545cc85d 3910
4d37720a 3911 needs_live_processing = self._needs_live_processing(live_status, duration)
58786a10 3912 skip_bad_formats = 'incomplete' not in format_types
3913 if self._configuration_arg('include_incomplete_formats'):
3914 skip_bad_formats = False
3915 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3916 'Use formats=incomplete extractor argument instead')
4d37720a
L
3917
3918 skip_manifests = set(self._configuration_arg('skip'))
3919 if (not self.get_param('youtube_include_hls_manifest', True)
3920 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3921 or needs_live_processing and skip_bad_formats):
3922 skip_manifests.add('hls')
3923
0f06bcd7 3924 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3925 skip_manifests.add('dash')
3926 if self._configuration_arg('include_live_dash'):
3927 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
58786a10 3928 'Use formats=incomplete extractor argument instead')
4d37720a
L
3929 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3930 skip_manifests.add('dash')
5d3a0e79 3931
c795c39f 3932 def process_manifest_format(f, proto, client_name, itag):
a4894d3e 3933 key = (proto, f.get('language'))
86cb9221 3934 if not all_formats and key in itags[itag]:
a4894d3e 3935 return False
3936 itags[itag].add(key)
3937
86cb9221 3938 if itag and all_formats:
3939 f['format_id'] = f'{itag}-{proto}'
3940 elif any(p != proto for p, _ in itags[itag]):
a4894d3e 3941 f['format_id'] = f'{itag}-{proto}'
3942 elif itag:
a0bb6ce5 3943 f['format_id'] = itag
a0bb6ce5 3944
94ed638a 3945 if f.get('source_preference') is None:
3946 f['source_preference'] = -1
3947
1e75d97d 3948 if itag in ('616', '235'):
3949 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
94ed638a 3950 f['source_preference'] += 100
1e75d97d 3951
b25cac65 3952 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3953 if f['quality'] == -1 and f.get('height'):
3954 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
ad54c913 3955 if self.get_param('verbose') or all_formats:
c795c39f 3956 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
97afb093 3957 if f.get('fps') and f['fps'] <= 1:
3958 del f['fps']
94ed638a 3959
3960 if proto == 'hls' and f.get('has_drm'):
3961 f['has_drm'] = 'maybe'
3962 f['source_preference'] -= 5
a0bb6ce5 3963 return True
2a9c6dcd 3964
c646d76f 3965 subtitles = {}
11f9be09 3966 for sd in streaming_data:
c795c39f
L
3967 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
3968
4d37720a 3969 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3970 if hls_manifest_url:
4d37720a
L
3971 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3972 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3973 subtitles = self._merge_subtitles(subs, subtitles)
3974 for f in fmts:
c795c39f 3975 if process_manifest_format(f, 'hls', client_name, self._search_regex(
a0bb6ce5 3976 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3977 yield f
545cc85d 3978
4d37720a 3979 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3980 if dash_manifest_url:
c646d76f 3981 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3982 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3983 for f in formats:
c795c39f 3984 if process_manifest_format(f, 'dash', client_name, f['format_id']):
a0bb6ce5 3985 f['filesize'] = int_or_none(self._search_regex(
3986 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3987 if needs_live_processing:
adbc4ec4
THD
3988 f['is_from_start'] = True
3989
a0bb6ce5 3990 yield f
c646d76f 3991 yield subtitles
11f9be09 3992
720c3099 3993 def _extract_storyboard(self, player_responses, duration):
3994 spec = get_first(
3995 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3996 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3997 if not base_url:
720c3099 3998 return
720c3099 3999 L = len(spec) - 1
4000 for i, args in enumerate(spec):
4001 args = args.split('#')
4002 counts = list(map(int_or_none, args[:5]))
4003 if len(args) != 8 or not all(counts):
4004 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4005 continue
4006 width, height, frame_count, cols, rows = counts
4007 N, sigh = args[6:]
4008
4009 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4010 fragment_count = frame_count / (cols * rows)
4011 fragment_duration = duration / fragment_count
4012 yield {
4013 'format_id': f'sb{i}',
4014 'format_note': 'storyboard',
4015 'ext': 'mhtml',
4016 'protocol': 'mhtml',
4017 'acodec': 'none',
4018 'vcodec': 'none',
4019 'url': url,
4020 'width': width,
4021 'height': height,
45e8a04e 4022 'fps': frame_count / duration,
4023 'rows': rows,
4024 'columns': cols,
720c3099 4025 'fragments': [{
b3edc806 4026 'url': url.replace('$M', str(j)),
720c3099 4027 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4028 } for j in range(math.ceil(fragment_count))],
4029 }
4030
adbc4ec4 4031 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 4032 webpage = None
4033 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 4034 query = {'bpctr': '9999999999', 'has_verified': '1'}
546b2c28 4035 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
ba06d77a 4036 if pp:
4037 query['pp'] = pp
b6de707d 4038 webpage = self._download_webpage(
50ac0e54 4039 webpage_url, video_id, fatal=False, query=query)
11f9be09 4040
4041 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 4042
b6de707d 4043 player_responses, player_url = self._extract_player_responses(
11f9be09 4044 self._get_requested_clients(url, smuggled_data),
50ac0e54 4045 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 4046
adbc4ec4
THD
4047 return webpage, master_ytcfg, player_responses, player_url
4048
a1b2d843 4049 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
4050 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4051 is_live = get_first(video_details, 'isLive')
4052 if is_live is None:
4053 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
4054 live_content = get_first(video_details, 'isLiveContent')
4055 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
4056 post_live = get_first(video_details, 'isPostLiveDvr')
4057 live_status = ('post_live' if post_live
4058 else 'is_live' if is_live
4059 else 'is_upcoming' if is_upcoming
6678a4f0 4060 else 'was_live' if live_content
4061 else 'not_live' if False in (is_live, live_content)
4062 else None)
6839ae1f 4063 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 4064 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
94ed638a 4065 if all(f.get('has_drm') for f in formats):
4066 # If there are no formats that definitely don't have DRM, all have DRM
4067 for f in formats:
4068 f['has_drm'] = True
adbc4ec4 4069
4d37720a 4070 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
4071
4072 def _real_extract(self, url):
4073 url, smuggled_data = unsmuggle_url(url, {})
4074 video_id = self._match_id(url)
4075
4076 base_url = self.http_scheme() + '//www.youtube.com/'
4077 webpage_url = base_url + 'watch?v=' + video_id
4078
4079 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4080
11f9be09 4081 playability_statuses = traverse_obj(
6839ae1f 4082 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 4083
4084 trailer_video_id = get_first(
4085 playability_statuses,
4086 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4087 expected_type=str)
4088 if trailer_video_id:
4089 return self.url_result(
4090 trailer_video_id, self.ie_key(), trailer_video_id)
4091
4092 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4093 if webpage else (lambda x: None))
4094
6839ae1f 4095 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 4096 microformats = traverse_obj(
4097 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 4098 expected_type=dict)
c26f9b99 4099
4100 translated_title = self._get_text(microformats, (..., 'title'))
4101 video_title = (self._preferred_lang and translated_title
4102 or get_first(video_details, 'title') # primary
4103 or translated_title
4104 or search_meta(['og:title', 'twitter:title', 'title']))
4105 translated_description = self._get_text(microformats, (..., 'description'))
4106 original_description = get_first(video_details, 'shortDescription')
4107 video_description = (
4108 self._preferred_lang and translated_description
4109 # If original description is blank, it will be an empty string.
4110 # Do not prefer translated description in this case.
4111 or original_description if original_description is not None else translated_description)
11f9be09 4112
d89257f3 4113 multifeed_metadata_list = get_first(
4114 player_responses,
4115 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4116 expected_type=str)
4117 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4118 if self.get_param('noplaylist'):
11f9be09 4119 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 4120 else:
4121 entries = []
4122 feed_ids = []
4123 for feed in multifeed_metadata_list.split(','):
4124 # Unquote should take place before split on comma (,) since textual
4125 # fields may contain comma as well (see
4126 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 4127 feed_data = urllib.parse.parse_qs(
ac668111 4128 urllib.parse.unquote_plus(feed))
d89257f3 4129
4130 def feed_entry(name):
4131 return try_get(
14f25df2 4132 feed_data, lambda x: x[name][0], str)
d89257f3 4133
4134 feed_id = feed_entry('id')
4135 if not feed_id:
4136 continue
4137 feed_title = feed_entry('title')
4138 title = video_title
4139 if feed_title:
4140 title += ' (%s)' % feed_title
4141 entries.append({
4142 '_type': 'url_transparent',
4143 'ie_key': 'Youtube',
4144 'url': smuggle_url(
4145 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
4146 {'force_singlefeed': True}),
4147 'title': title,
4148 })
4149 feed_ids.append(feed_id)
4150 self.to_screen(
4151 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
4152 % (', '.join(feed_ids), video_id))
4153 return self.playlist_result(
4154 entries, video_id, video_title, video_description)
11f9be09 4155
9da6612b 4156 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4157 or int_or_none(get_first(microformats, 'lengthSeconds'))
4158 or parse_duration(search_meta('duration')) or None)
a1b2d843 4159
4d37720a
L
4160 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4161 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4162 if live_status == 'post_live':
4163 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 4164
545cc85d 4165 if not formats:
11f9be09 4166 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 4167 self.report_drm(video_id)
11f9be09 4168 pemr = get_first(
4169 playability_statuses,
4170 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4171 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4172 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 4173 if subreason:
545cc85d 4174 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 4175 countries = get_first(microformats, 'availableCountries')
545cc85d 4176 if not countries:
4177 regions_allowed = search_meta('regionsAllowed')
4178 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 4179 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 4180 reason += f'. {subreason}'
545cc85d 4181 if reason:
b7da73eb 4182 self.raise_no_formats(reason, expected=True)
bf1317d2 4183
11f9be09 4184 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 4185 if not keywords and webpage:
4186 keywords = [
4187 unescapeHTML(m.group('content'))
4188 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4189 for keyword in keywords:
4190 if keyword.startswith('yt:stretch='):
201c1459 4191 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4192 if mobj:
4193 # NB: float is intentional for forcing float division
4194 w, h = (float(v) for v in mobj.groups())
4195 if w > 0 and h > 0:
4196 ratio = w / h
4197 for f in formats:
4198 if f.get('vcodec') != 'none':
4199 f['stretched_ratio'] = ratio
4200 break
a709d873 4201 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4202 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4203 if thumbnail_url:
4204 thumbnails.append({
4205 'url': thumbnail_url,
ff2751ac 4206 })
fccf5021 4207 original_thumbnails = thumbnails.copy()
4208
0ba692ac 4209 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4210 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4211 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4212 thumbnail_names = [
962ffcf8 4213 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4214 # in resolution, these are not the custom thumbnail. So de-prioritize them
4215 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4216 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 4217 ]
cca80fe6 4218 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4219 thumbnails.extend({
4220 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4221 video_id=video_id, name=name, ext=ext,
4d37720a 4222 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4223 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4224 for thumb in thumbnails:
cca80fe6 4225 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4226 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4227 self._remove_duplicate_formats(thumbnails)
fccf5021 4228 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4229
7ea65411 4230 category = get_first(microformats, 'category') or search_meta('genre')
7666b936 4231 channel_id = self.ucid_or_none(str_or_none(
7ea65411 4232 get_first(video_details, 'channelId')
4233 or get_first(microformats, 'externalChannelId')
7666b936 4234 or search_meta('channelId')))
7ea65411 4235 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4236
adbc4ec4
THD
4237 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4238 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4239 if not duration and live_end_time and live_start_time:
4240 duration = live_end_time - live_start_time
4241
4d37720a
L
4242 needs_live_processing = self._needs_live_processing(live_status, duration)
4243
4244 def is_bad_format(fmt):
4245 if needs_live_processing and not fmt.get('is_from_start'):
4246 return True
4247 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4248 and fmt.get('protocol') == 'http_dash_segments'):
4249 return True
4250
4251 for fmt in filter(is_bad_format, formats):
4252 fmt['preference'] = (fmt.get('preference') or -1) - 10
d949c10c 4253 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4d37720a
L
4254
4255 if needs_live_processing:
4256 self._prepare_live_from_start_formats(
4257 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4258
720c3099 4259 formats.extend(self._extract_storyboard(player_responses, duration))
4260
7666b936 4261 channel_handle = self.handle_from_url(owner_profile_url)
4262
545cc85d 4263 info = {
4264 'id': video_id,
39ca3b5c 4265 'title': video_title,
545cc85d 4266 'formats': formats,
4267 'thumbnails': thumbnails,
fccf5021 4268 # The best thumbnail that we are sure exists. Prevents unnecessary
4269 # URL checking if user don't care about getting the best possible thumbnail
4270 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4271 'description': video_description,
545cc85d 4272 'channel_id': channel_id,
7666b936 4273 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
545cc85d 4274 'duration': duration,
4275 'view_count': int_or_none(
11f9be09 4276 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4277 or search_meta('interactionCount')),
11f9be09 4278 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4279 'age_limit': 18 if (
11f9be09 4280 get_first(microformats, 'isFamilySafe') is False
545cc85d 4281 or search_meta('isFamilyFriendly') == 'false'
4282 or search_meta('og:restrictions:age') == '18+') else 0,
4283 'webpage_url': webpage_url,
4284 'categories': [category] if category else None,
4285 'tags': keywords,
11f9be09 4286 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4287 'live_status': live_status,
adbc4ec4 4288 'release_timestamp': live_start_time,
9f14daf2 4289 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4290 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
545cc85d 4291 }
b477fc13 4292
c646d76f 4293 subtitles = {}
3944e7af 4294 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4295 if pctr:
ecdc9049 4296 def get_lang_code(track):
4297 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4298 or track.get('languageCode'))
4299
4300 # Converted into dicts to remove duplicates
4301 captions = {
4302 get_lang_code(sub): sub
6839ae1f 4303 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4304 translation_languages = {
4305 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4306 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4307
774d79cc 4308 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4309 lang_subs = container.setdefault(lang_code, [])
545cc85d 4310 for fmt in self._SUBTITLE_FORMATS:
4311 query.update({
4312 'fmt': fmt,
4313 })
4314 lang_subs.append({
4315 'ext': fmt,
60f393e4 4316 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4317 'name': sub_name,
545cc85d 4318 })
7e72694b 4319
07b47084 4320 # NB: Constructing the full subtitle dictionary is slow
4321 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4322 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4323 for lang_code, caption_track in captions.items():
4324 base_url = caption_track.get('baseUrl')
1235d333 4325 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4326 if not base_url:
4327 continue
ecdc9049 4328 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4329 if caption_track.get('kind') != 'asr':
545cc85d 4330 if not lang_code:
4331 continue
4332 process_language(
ecdc9049 4333 subtitles, base_url, lang_code, lang_name, {})
4334 if not caption_track.get('isTranslatable'):
4335 continue
3944e7af 4336 for trans_code, trans_name in translation_languages.items():
4337 if not trans_code:
545cc85d 4338 continue
1235d333 4339 orig_trans_code = trans_code
71eb82d1 4340 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4341 if not get_translated_subs:
18e49408 4342 continue
ecdc9049 4343 trans_code += f'-{lang_code}'
a70635b8 4344 trans_name += format_field(lang_name, None, ' from %s')
1235d333 4345 if lang_code == f'a-{orig_trans_code}':
ff9b0e07 4346 # Set audio language based on original subtitles
4347 for f in formats:
4348 if f.get('acodec') != 'none' and not f.get('language'):
4349 f['language'] = orig_trans_code
4350 # Add an "-orig" label to the original language so that it can be distinguished.
4351 # The subs are returned without "-orig" as well for compatibility
0c8d9e5f 4352 process_language(
d49669ac 4353 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4354 # Setting tlang=lang returns damaged subtitles.
d49669ac 4355 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4356 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4357
4358 info['automatic_captions'] = automatic_captions
4359 info['subtitles'] = subtitles
7e72694b 4360
14f25df2 4361 parsed_url = urllib.parse.urlparse(url)
545cc85d 4362 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4363 query = urllib.parse.parse_qs(component)
545cc85d 4364 for k, v in query.items():
4365 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4366 d_k += '_time'
4367 if d_k not in info and k in s_ks:
4368 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4369
4370 # Youtube Music Auto-generated description
71dc18fa
BT
4371 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4372 # XXX: Causes catastrophic backtracking if description has "·"
4373 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4374 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4375 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
1890fc63 4376 mobj = re.search(
4377 r'''(?xs)
71dc18fa
BT
4378 (?=(?P<track>[^\n·]+))(?P=track)·
4379 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4380 (?=(?P<album>[^\n]+))(?P=album)\n
1890fc63 4381 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4382 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
71dc18fa
BT
4383 (.+?\nArtist\s*:\s*
4384 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4385 )?.+\nAuto-generated\ by\ YouTube\.\s*$
1890fc63 4386 ''', video_description)
822b9d9c 4387 if mobj:
822b9d9c
RA
4388 release_year = mobj.group('release_year')
4389 release_date = mobj.group('release_date')
4390 if release_date:
4391 release_date = release_date.replace('-', '')
4392 if not release_year:
545cc85d 4393 release_year = release_date[:4]
4394 info.update({
4395 'album': mobj.group('album'.strip()),
104a7b5a
L
4396 'artists': ([a] if (a := mobj.group('clean_artist'))
4397 else [a.strip() for a in mobj.group('artist').split('·')]),
545cc85d 4398 'track': mobj.group('track').strip(),
4399 'release_date': release_date,
cc2db878 4400 'release_year': int_or_none(release_year),
545cc85d 4401 })
7e72694b 4402
545cc85d 4403 initial_data = None
4404 if webpage:
56ba69e4 4405 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
607510b9 4406 if not traverse_obj(initial_data, 'contents'):
4407 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4408 initial_data = None
545cc85d 4409 if not initial_data:
99e9e001 4410 query = {'videoId': video_id}
4411 query.update(self._get_checkok_params())
109dd3b2 4412 initial_data = self._extract_response(
4413 item_id=video_id, ep='next', fatal=False,
607510b9 4414 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
99e9e001 4415 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4416 note='Downloading initial data API JSON')
545cc85d 4417
0df111a3 4418 info['comment_count'] = traverse_obj(initial_data, (
4419 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
071670cb 4420 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
0df111a3 4421 ), (
4422 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
071670cb
ND
4423 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
4424 ), expected_type=self._get_count, get_all=False)
0df111a3 4425
19a03940 4426 try: # This will error if there is no livechat
c60ee3a2 4427 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4428 except (KeyError, IndexError, TypeError):
4429 pass
4430 else:
ecdc9049 4431 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4432 # url is needed to set cookies
4433 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4434 'video_id': video_id,
4435 'ext': 'json',
4d37720a
L
4436 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4437 else 'youtube_live_chat_replay'),
c60ee3a2 4438 }]
545cc85d 4439
4440 if initial_data:
7c365c21 4441 info['chapters'] = (
4442 self._extract_chapters_from_json(initial_data, duration)
4443 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4444 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4445 or None)
545cc85d 4446
03e85ea9 4447 info['heatmap'] = self._extract_heatmap(initial_data)
5caf30db 4448
17322130 4449 contents = traverse_obj(
4450 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4451 expected_type=list, default=[])
4452
4453 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4454 if vpir:
4455 stl = vpir.get('superTitleLink')
4456 if stl:
4457 stl = self._get_text(stl)
4458 if try_get(
4459 vpir,
4460 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4461 info['location'] = stl
4462 else:
affc4fef 4463 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4464 if mobj:
545cc85d 4465 info.update({
17322130 4466 'series': mobj.group(1),
4467 'season_number': int(mobj.group(2)),
4468 'episode_number': int(mobj.group(3)),
545cc85d 4469 })
17322130 4470 for tlb in (try_get(
4471 vpir,
4472 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4473 list) or []):
3ffb2f5b 4474 tbrs = variadic(
4475 traverse_obj(
6839ae1f
SS
4476 tlb, ('toggleButtonRenderer', ...),
4477 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4478 for tbr in tbrs:
4479 for getter, regex in [(
4480 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4481 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4482 lambda x: x['accessibility'],
4483 lambda x: x['accessibilityData']['accessibilityData'],
4484 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4485 label = (try_get(tbr, getter, dict) or {}).get('label')
4486 if label:
4487 mobj = re.match(regex, label)
4488 if mobj:
4489 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4490 break
6b5d93b0
PG
4491
4492 info['like_count'] = traverse_obj(vpir, (
4493 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4494 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4495 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4496 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4497
867c66ff
M
4498 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4499 if vcr:
4500 vc = self._get_count(vcr, 'viewCount')
4501 # Upcoming premieres with waiting count are treated as live here
4502 if vcr.get('isLive'):
4503 info['concurrent_view_count'] = vc
4504 elif info.get('view_count') is None:
4505 info['view_count'] = vc
4506
17322130 4507 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4508 if vsir:
4509 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4510 info.update({
4511 'channel': self._get_text(vor, 'title'),
4512 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4513
7666b936 4514 if not channel_handle:
4515 channel_handle = self.handle_from_url(
4516 traverse_obj(vor, (
4517 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4518 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4519 {str}), get_all=False))
4520
17322130 4521 rows = try_get(
4522 vsir,
4523 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4524 list) or []
4525 multiple_songs = False
4526 for row in rows:
4527 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4528 multiple_songs = True
4529 break
4530 for row in rows:
4531 mrr = row.get('metadataRowRenderer') or {}
4532 mrr_title = mrr.get('title')
4533 if not mrr_title:
4534 continue
4535 mrr_title = self._get_text(mrr, 'title')
4536 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4537 if mrr_title == 'License':
4538 info['license'] = mrr_contents_text
4539 elif not multiple_songs:
4540 if mrr_title == 'Album':
4541 info['album'] = mrr_contents_text
4542 elif mrr_title == 'Artist':
104a7b5a 4543 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
17322130 4544 elif mrr_title == 'Song':
4545 info['track'] = mrr_contents_text
8213ce28 4546 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4547 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4548 info['channel_is_verified'] = True
545cc85d 4549
7666b936 4550 info.update({
4551 'uploader': info.get('channel'),
4552 'uploader_id': channel_handle,
4553 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4554 })
17322130 4555 # The upload date for scheduled, live and past live streams / premieres in microformats
4556 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4557 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4558 upload_date = (
4559 unified_strdate(get_first(microformats, 'uploadDate'))
4560 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4561 if not upload_date or (
4d37720a 4562 live_status in ('not_live', None)
1ff88b7a 4563 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4564 ):
c26f9b99 4565 upload_date = strftime_or_none(
ad54c913 4566 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
17322130 4567 info['upload_date'] = upload_date
992f9a73 4568
ef79d20d 4569 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4570 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4571 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
bb5a54e6 4572 if upload_datetime >= datetime_from_str('today-2days'):
ef79d20d 4573 for fmt in info['formats']:
4574 if fmt.get('protocol') == 'm3u8_native':
4575 fmt['__needs_testing'] = True
4576
104a7b5a 4577 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
545cc85d 4578 v = info.get(s_k)
4579 if v:
4580 info[d_k] = v
b84071c0 4581
14a14335 4582 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
c26f9b99 4583
4584 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4585 or get_first(video_details, 'isPrivate', expected_type=bool))
4586
4587 info['availability'] = (
4588 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4589 else self._availability(
4590 is_private=is_private,
4591 needs_premium=(
4592 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4593 or False if initial_data and is_private is not None else None),
4594 needs_subscription=(
4595 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4596 or False if initial_data and is_private is not None else None),
4597 needs_auth=info['age_limit'] >= 18,
4598 is_unlisted=None if is_private is None else (
4599 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4600 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4601
a2160aa4 4602 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4603
11f9be09 4604 self.mark_watched(video_id, player_responses)
d77ab8e2 4605
545cc85d 4606 return info
c5e8d7af 4607
a61fd4cf 4608
a6213a49 4609class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4610 @staticmethod
4611 def passthrough_smuggled_data(func):
bd7e919a 4612 def _smuggle(info, smuggled_data):
4613 if info.get('_type') not in ('url', 'url_transparent'):
4614 return info
4615 if smuggled_data.get('is_music_url'):
4616 parsed_url = urllib.parse.urlparse(info['url'])
4617 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4618 smuggled_data.pop('is_music_url')
4619 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4620 if smuggled_data:
4621 info['url'] = smuggle_url(info['url'], smuggled_data)
4622 return info
182bda88 4623
4624 @functools.wraps(func)
4625 def wrapper(self, url):
4626 url, smuggled_data = unsmuggle_url(url, {})
4627 if self.is_music_url(url):
4628 smuggled_data['is_music_url'] = True
4629 info_dict = func(self, url, smuggled_data)
bd7e919a 4630 if smuggled_data:
4631 _smuggle(info_dict, smuggled_data)
4632 if info_dict.get('entries'):
a8c754cc 4633 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4634 return info_dict
4635 return wrapper
4636
8bdd16b4 4637 @staticmethod
cd7c66cf 4638 def _extract_basic_item_renderer(item):
4639 # Modified from _extract_grid_item_renderer
201c1459 4640 known_basic_renderers = (
a17526e4 4641 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4642 )
4643 for key, renderer in item.items():
201c1459 4644 if not isinstance(renderer, dict):
cd7c66cf 4645 continue
201c1459 4646 elif key in known_basic_renderers:
4647 return renderer
4648 elif key.startswith('grid') and key.endswith('Renderer'):
4649 return renderer
8bdd16b4 4650
c7335551 4651 def _extract_channel_renderer(self, renderer):
7666b936 4652 channel_id = self.ucid_or_none(renderer['channelId'])
c7335551 4653 title = self._get_text(renderer, 'title')
7666b936 4654 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
7666b936 4655 channel_handle = self.handle_from_url(
4656 traverse_obj(renderer, (
4657 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4658 ('browseEndpoint', 'canonicalBaseUrl')),
4659 {str}), get_all=False))
14a14335 4660 if not channel_handle:
4661 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4662 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
c7335551
M
4663 return {
4664 '_type': 'url',
4665 'url': channel_url,
4666 'id': channel_id,
4667 'ie_key': YoutubeTabIE.ie_key(),
4668 'channel': title,
7666b936 4669 'uploader': title,
c7335551
M
4670 'channel_id': channel_id,
4671 'channel_url': channel_url,
4672 'title': title,
7666b936 4673 'uploader_id': channel_handle,
4674 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
14a14335 4675 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4676 # However, in feed/channels this is set correctly to the subscriber count
4677 'channel_follower_count': traverse_obj(
4678 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
c7335551 4679 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
14a14335 4680 'playlist_count': (
4681 # videoCountText may be the subscriber count
4682 self._get_count(renderer, 'videoCountText')
4683 if self._get_count(renderer, 'subscriberCountText') is not None else None),
c7335551 4684 'description': self._get_text(renderer, 'descriptionSnippet'),
8213ce28 4685 'channel_is_verified': True if self._has_badge(
4686 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
c7335551
M
4687 }
4688
8bdd16b4 4689 def _grid_entries(self, grid_renderer):
4690 for item in grid_renderer['items']:
4691 if not isinstance(item, dict):
39b62db1 4692 continue
cd7c66cf 4693 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4694 if not isinstance(renderer, dict):
4695 continue
052e1350 4696 title = self._get_text(renderer, 'title')
fe93e2c4 4697
8bdd16b4 4698 # playlist
4699 playlist_id = renderer.get('playlistId')
4700 if playlist_id:
4701 yield self.url_result(
4702 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4703 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4704 video_title=title)
201c1459 4705 continue
8bdd16b4 4706 # video
4707 video_id = renderer.get('videoId')
4708 if video_id:
4709 yield self._extract_video(renderer)
201c1459 4710 continue
8bdd16b4 4711 # channel
4712 channel_id = renderer.get('channelId')
4713 if channel_id:
c7335551 4714 yield self._extract_channel_renderer(renderer)
201c1459 4715 continue
4716 # generic endpoint URL support
4717 ep_url = urljoin('https://www.youtube.com/', try_get(
4718 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4719 str))
201c1459 4720 if ep_url:
4721 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4722 if ie.suitable(ep_url):
4723 yield self.url_result(
4724 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4725 break
8bdd16b4 4726
16aa9ea4 4727 def _music_reponsive_list_entry(self, renderer):
4728 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4729 if video_id:
69a40e4a 4730 title = traverse_obj(renderer, (
4731 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4732 'text', 'runs', 0, 'text'))
16aa9ea4 4733 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
69a40e4a 4734 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
16aa9ea4 4735 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4736 if playlist_id:
4737 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4738 if video_id:
4739 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4740 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4741 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4742 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4743 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4744 if browse_id:
4745 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4746 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4747
3d3dddc9 4748 def _shelf_entries_from_content(self, shelf_renderer):
4749 content = shelf_renderer.get('content')
4750 if not isinstance(content, dict):
8bdd16b4 4751 return
cd7c66cf 4752 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4753 if renderer:
4754 # TODO: add support for nested playlists so each shelf is processed
4755 # as separate playlist
4756 # TODO: this includes only first N items
86e5f3ed 4757 yield from self._grid_entries(renderer)
3d3dddc9 4758 renderer = content.get('horizontalListRenderer')
4759 if renderer:
4760 # TODO
4761 pass
8bdd16b4 4762
29f7c58a 4763 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4764 ep = try_get(
4765 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4766 str)
8bdd16b4 4767 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4768 if shelf_url:
29f7c58a 4769 # Skipping links to another channels, note that checking for
4770 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4771 # will not work
4772 if skip_channels and '/channels?' in shelf_url:
4773 return
052e1350 4774 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4775 yield self.url_result(shelf_url, video_title=title)
4776 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4777 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4778
8bdd16b4 4779 def _playlist_entries(self, video_list_renderer):
4780 for content in video_list_renderer['contents']:
4781 if not isinstance(content, dict):
4782 continue
4783 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4784 if not isinstance(renderer, dict):
4785 continue
4786 video_id = renderer.get('videoId')
4787 if not video_id:
4788 continue
4789 yield self._extract_video(renderer)
07aeced6 4790
3462ffa8 4791 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4792 renderer = traverse_obj(
447afb9e 4793 rich_grid_renderer,
4794 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
3462ffa8 4795 video_id = renderer.get('videoId')
447afb9e 4796 if video_id:
4797 yield self._extract_video(renderer)
4798 return
4799 playlist_id = renderer.get('playlistId')
4800 if playlist_id:
4801 yield self.url_result(
4802 f'https://www.youtube.com/playlist?list={playlist_id}',
4803 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4804 video_title=self._get_text(renderer, 'title'))
3462ffa8 4805 return
3462ffa8 4806
8bdd16b4 4807 def _video_entry(self, video_renderer):
4808 video_id = video_renderer.get('videoId')
4809 if video_id:
4810 return self._extract_video(video_renderer)
dacb3a86 4811
ad210f4f 4812 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4813 url = urljoin('https://youtube.com', traverse_obj(
4814 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4815 if url:
4816 return self.url_result(
4817 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4818
8bdd16b4 4819 def _post_thread_entries(self, post_thread_renderer):
4820 post_renderer = try_get(
4821 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4822 if not post_renderer:
4823 return
4824 # video attachment
4825 video_renderer = try_get(
895b0931 4826 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4827 video_id = video_renderer.get('videoId')
4828 if video_id:
4829 entry = self._extract_video(video_renderer)
8bdd16b4 4830 if entry:
4831 yield entry
895b0931 4832 # playlist attachment
4833 playlist_id = try_get(
14f25df2 4834 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4835 if playlist_id:
4836 yield self.url_result(
e28f1c0a 4837 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4838 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4839 # inline video links
4840 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4841 for run in runs:
4842 if not isinstance(run, dict):
4843 continue
4844 ep_url = try_get(
14f25df2 4845 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4846 if not ep_url:
4847 continue
4848 if not YoutubeIE.suitable(ep_url):
4849 continue
4850 ep_video_id = YoutubeIE._match_id(ep_url)
4851 if video_id == ep_video_id:
4852 continue
895b0931 4853 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4854
8bdd16b4 4855 def _post_thread_continuation_entries(self, post_thread_continuation):
4856 contents = post_thread_continuation.get('contents')
4857 if not isinstance(contents, list):
4858 return
4859 for content in contents:
4860 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4861 if isinstance(renderer, dict):
4862 yield from self._post_thread_entries(renderer)
8bdd16b4 4863 continue
6b0b0a28 4864 renderer = content.get('videoRenderer')
4865 if isinstance(renderer, dict):
4866 yield self._video_entry(renderer)
07aeced6 4867
39ed931e 4868 r''' # unused
4869 def _rich_grid_entries(self, contents):
4870 for content in contents:
4871 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4872 if video_renderer:
4873 entry = self._video_entry(video_renderer)
4874 if entry:
4875 yield entry
4876 '''
52efa4b3 4877
0a5095fe 4878 def _report_history_entries(self, renderer):
4879 for url in traverse_obj(renderer, (
7a32c70d 4880 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4881 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4882 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4883 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4884
a6213a49 4885 def _extract_entries(self, parent_renderer, continuation_list):
4886 # continuation_list is modified in-place with continuation_list = [continuation_token]
4887 continuation_list[:] = [None]
4888 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4889 for content in contents:
4890 if not isinstance(content, dict):
4891 continue
16aa9ea4 4892 is_renderer = traverse_obj(
4893 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4894 expected_type=dict)
a6213a49 4895 if not is_renderer:
0a5095fe 4896 if content.get('richItemRenderer'):
4897 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4898 yield entry
4899 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4900 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4901 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4902 yield from self._report_history_entries(table)
4903 continuation_list[0] = self._extract_continuation(table)
a6213a49 4904 continue
0a5095fe 4905
a6213a49 4906 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4907 for isr_content in isr_contents:
4908 if not isinstance(isr_content, dict):
8bdd16b4 4909 continue
69184e41 4910
a6213a49 4911 known_renderers = {
4912 'playlistVideoListRenderer': self._playlist_entries,
4913 'gridRenderer': self._grid_entries,
a17526e4 4914 'reelShelfRenderer': self._grid_entries,
4915 'shelfRenderer': self._shelf_entries,
16aa9ea4 4916 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4917 'backstagePostThreadRenderer': self._post_thread_entries,
4918 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4919 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4920 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
fcbc9ed7 4921 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
4922 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
a6213a49 4923 }
4924 for key, renderer in isr_content.items():
4925 if key not in known_renderers:
4926 continue
4927 for entry in known_renderers[key](renderer):
4928 if entry:
4929 yield entry
4930 continuation_list[0] = self._extract_continuation(renderer)
4931 break
70d5c17b 4932
4933 if not continuation_list[0]:
a6213a49 4934 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4935
a6213a49 4936 if not continuation_list[0]:
4937 continuation_list[0] = self._extract_continuation(parent_renderer)
4938
4939 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4940 continuation_list = [None]
4941 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4942 tab_content = try_get(tab, lambda x: x['content'], dict)
4943 if not tab_content:
4944 return
3462ffa8 4945 parent_renderer = (
29f7c58a 4946 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4947 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4948 yield from extract_entries(parent_renderer)
3462ffa8 4949 continuation = continuation_list[0]
1ba6fe9d 4950 seen_continuations = set()
8bdd16b4 4951 for page_num in itertools.count(1):
4952 if not continuation:
4953 break
1ba6fe9d 4954 continuation_token = continuation.get('continuation')
4955 if continuation_token is not None and continuation_token in seen_continuations:
4956 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
4957 break
4958 seen_continuations.add(continuation_token)
99e9e001 4959 headers = self.generate_api_headers(
4960 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4961 response = self._extract_response(
86e5f3ed 4962 item_id=f'{item_id} page {page_num}',
fe93e2c4 4963 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4964 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4965
4966 if not response:
8bdd16b4 4967 break
ac56cf38 4968 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4969 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4970 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4971
a1b535bd 4972 known_renderers = {
e4b98809 4973 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4974 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4975 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4976 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4977 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4978 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4979 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4980 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4981 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4982 'playlistVideoListContinuation': (self._playlist_entries, None),
4983 'gridContinuation': (self._grid_entries, None),
4984 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4985 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4986 }
1fb53b94 4987
4988 continuation_items = traverse_obj(response, (
4989 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4990 'appendContinuationItemsAction', 'continuationItems'
4991 ), 'continuationContents', get_all=False)
4992 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4993
a1b535bd 4994 video_items_renderer = None
1fb53b94 4995 for key in continuation_item.keys():
a1b535bd 4996 if key not in known_renderers:
8bdd16b4 4997 continue
1fb53b94 4998 func, parent_key = known_renderers[key]
4999 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 5000 continuation_list = [None]
1fb53b94 5001 yield from func(video_items_renderer)
9ba5705a 5002 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 5003
5004 if not video_items_renderer:
a1b535bd 5005 break
9558dcec 5006
8bdd16b4 5007 @staticmethod
7c219ea6 5008 def _extract_selected_tab(tabs, fatal=True):
86973308
M
5009 for tab_renderer in tabs:
5010 if tab_renderer.get('selected'):
5011 return tab_renderer
5012 if fatal:
5013 raise ExtractorError('Unable to find selected tab')
5014
5015 @staticmethod
5016 def _extract_tab_renderers(response):
5017 return traverse_obj(
5018 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 5019
ac56cf38 5020 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 5021 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 5022
8bdd16b4 5023 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 5024 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5025 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5026
5027 return self.playlist_result(
5028 self._entries(
5029 selected_tab, metadata['id'], ytcfg,
5030 self._extract_account_syncid(ytcfg, data),
5031 self._extract_visitor_data(data, ytcfg)),
5032 **metadata)
39ed931e 5033
bd7e919a 5034 def _extract_metadata_from_tabs(self, item_id, data):
5035 info = {'id': item_id}
5036
5037 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 5038 if metadata_renderer:
7666b936 5039 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
4823ec9f 5040 ('channelUrl', {self.ucid_from_url}))
bd7e919a 5041 info.update({
7666b936 5042 'channel': metadata_renderer.get('title'),
5043 'channel_id': channel_id,
bd7e919a 5044 })
7666b936 5045 if info['channel_id']:
5046 info['id'] = info['channel_id']
bd7e919a 5047 else:
5048 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 5049
301d07fc 5050 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5051 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5052 def _get_uncropped(url):
5053 return url_or_none((url or '').split('=')[0] + '=s0')
5054
6141346d 5055 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 5056 if avatar_thumbnails:
5057 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5058 if uncropped_avatar:
5059 avatar_thumbnails.append({
5060 'url': uncropped_avatar,
5061 'id': 'avatar_uncropped',
5062 'preference': 1
5063 })
5064
5065 channel_banners = self._extract_thumbnails(
bd7e919a 5066 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 5067 for banner in channel_banners:
5068 banner['preference'] = -10
5069
5070 if channel_banners:
5071 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5072 if uncropped_banner:
5073 channel_banners.append({
5074 'url': uncropped_banner,
5075 'id': 'banner_uncropped',
5076 'preference': -5
5077 })
5078
bd7e919a 5079 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5080 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5081 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5082
301d07fc 5083 primary_thumbnails = self._extract_thumbnails(
a17526e4 5084 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
5085 playlist_thumbnails = self._extract_thumbnails(
5086 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5087
bd7e919a 5088 info.update({
5089 'title': (traverse_obj(metadata_renderer, 'title')
5090 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5091 or info['id']),
5092 'availability': self._extract_availability(data),
5093 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
5094 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
8828f457 5095 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5096 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
bd7e919a 5097 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5098 })
f0d785d3 5099
7666b936 5100 channel_handle = (
5101 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5102 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5103
5104 if channel_handle:
5105 info.update({
5106 'uploader_id': channel_handle,
5107 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5108 })
8213ce28 5109
5110 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5111 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5112 info['channel_is_verified'] = True
6141346d
M
5113 # Playlist stats is a text runs array containing [video count, view count, last updated].
5114 # last updated or (view count and last updated) may be missing.
5115 playlist_stats = get_first(
bd7e919a 5116 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5117
6141346d
M
5118 last_updated_unix = self._parse_time_text(
5119 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5120 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
ad54c913 5121 info['modified_date'] = strftime_or_none(last_updated_unix)
6141346d 5122
bd7e919a 5123 info['view_count'] = self._get_count(playlist_stats, 1)
5124 if info['view_count'] is None: # 0 is allowed
5125 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
31e18355 5126 if info['view_count'] is None:
5127 info['view_count'] = self._get_count(data, (
5128 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5129 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
bd7e919a 5130
5131 info['playlist_count'] = self._get_count(playlist_stats, 0)
5132 if info['playlist_count'] is None: # 0 is allowed
5133 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5134
7666b936 5135 if not info.get('channel_id'):
6141346d 5136 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 5137 if not owner: # Deprecated
6141346d
M
5138 owner = traverse_obj(
5139 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5140 ('videoOwner', 'videoOwnerRenderer', 'title'))
5141 owner_text = self._get_text(owner)
5142 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 5143 info.update({
7666b936 5144 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5145 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5146 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
bd7e919a 5147 })
6141346d 5148
bd7e919a 5149 info.update({
7666b936 5150 'uploader': info['channel'],
5151 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5152 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
bd7e919a 5153 })
7666b936 5154
bd7e919a 5155 return info
73c4ac2c 5156
6e634cbe 5157 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 5158 first_id = last_id = response = None
2be71994 5159 for page_num in itertools.count(1):
cd7c66cf 5160 videos = list(self._playlist_entries(playlist))
5161 if not videos:
5162 return
2be71994 5163 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5164 if start >= len(videos):
5165 return
24146491 5166 yield from videos[start:]
2be71994 5167 first_id = first_id or videos[0]['id']
5168 last_id = videos[-1]['id']
79360d99 5169 watch_endpoint = try_get(
5170 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 5171 headers = self.generate_api_headers(
5172 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5173 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 5174 query = {
5175 'playlistId': playlist_id,
5176 'videoId': watch_endpoint.get('videoId') or last_id,
5177 'index': watch_endpoint.get('index') or len(videos),
5178 'params': watch_endpoint.get('params') or 'OAE%3D'
5179 }
5180 response = self._extract_response(
5181 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 5182 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 5183 check_get_keys='contents'
5184 )
cd7c66cf 5185 playlist = try_get(
79360d99 5186 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 5187
ac56cf38 5188 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 5189 title = playlist.get('title') or try_get(
14f25df2 5190 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 5191 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 5192
5193 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 5194 playlist_url = urljoin(url, try_get(
5195 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 5196 str))
6e634cbe 5197
5198 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5199 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5200 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5201
5202 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 5203 return self.url_result(
5204 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5205 video_title=title)
cd7c66cf 5206
8bdd16b4 5207 return self.playlist_result(
6e634cbe 5208 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 5209 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 5210
47193e02 5211 def _extract_availability(self, data):
5212 """
5213 Gets the availability of a given playlist/tab.
5214 Note: Unless YouTube tells us explicitly, we do not assume it is public
5215 @param data: response
5216 """
6141346d
M
5217 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5218 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5219 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 5220
14a14335 5221 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
47193e02 5222
5223 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
5224 privacy_setting_icon = get_first(
5225 (playlist_header_renderer, sidebar_renderer),
5226 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5227 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5228 expected_type=str)
5229
5230 microformats_is_unlisted = traverse_obj(
5231 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 5232
c26f9b99 5233 return (
5234 'public' if (
5235 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5236 or player_header_privacy == 'PUBLIC'
5237 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5238 else self._availability(
5239 is_private=(
5240 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5241 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5242 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5243 is_unlisted=(
5244 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5245 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
5246 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5247 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 5248 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5249 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5250 needs_auth=False))
47193e02 5251
5252 @staticmethod
5253 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5254 sidebar_renderer = try_get(
5255 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5256 for item in sidebar_renderer:
5257 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5258 if renderer:
5259 return renderer
5260
ac56cf38 5261 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 5262 """
6141346d 5263 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5264 """
6141346d
M
5265 is_playlist = bool(traverse_obj(
5266 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5267 if not is_playlist:
47193e02 5268 return
11f9be09 5269 headers = self.generate_api_headers(
99e9e001 5270 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5271 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5272 query = {
6141346d
M
5273 'params': 'wgYCCAA=',
5274 'browseId': f'VL{item_id}'
47193e02 5275 }
5276 return self._extract_response(
5277 item_id=item_id, headers=headers, query=query,
fe93e2c4 5278 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5279 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5280
2762dbb1 5281 @functools.cached_property
a25bca9f 5282 def skip_webpage(self):
5283 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5284
ac56cf38 5285 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5286 webpage, data = None, None
5287 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5288 try:
be5c1ae8 5289 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5290 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5291 except ExtractorError as e:
5292 if isinstance(e.cause, network_exceptions):
3d2623a8 5293 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
be5c1ae8 5294 retry.error = e
5295 continue
5296 self._error_or_warning(e, fatal=fatal)
14fdfea9 5297 break
ac56cf38 5298
be5c1ae8 5299 try:
5300 self._extract_and_report_alerts(data)
5301 except ExtractorError as e:
5302 self._error_or_warning(e, fatal=fatal)
5303 break
ac56cf38 5304
be5c1ae8 5305 # Sometimes youtube returns a webpage with incomplete ytInitialData
5306 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5307 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5308 retry.error = ExtractorError('Incomplete yt initial data received')
f9fb3ce8 5309 data = None
be5c1ae8 5310 continue
ac56cf38 5311
cd7c66cf 5312 return webpage, data
5313
a25bca9f 5314 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5315 """Use if failed to extract ytcfg (and data) from initial webpage"""
5316 if not ytcfg and self.is_authenticated:
5317 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5318 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5319 raise ExtractorError(
5320 f'{msg}. If you are not downloading private content, or '
5321 'your cookies are only for the first account and channel,'
5322 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5323 expected=True)
5324 self.report_warning(msg, only_once=True)
5325
ac56cf38 5326 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5327 data = None
a25bca9f 5328 if not self.skip_webpage:
ac56cf38 5329 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5330 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5331 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5332 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5333 if (url != 'https://www.youtube.com/feed/recommended'
5334 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5335 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5336 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5337 if fatal:
5338 raise ExtractorError(msg, expected=True)
5339 self.report_warning(msg, only_once=True)
ac56cf38 5340 if not data:
a25bca9f 5341 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5342 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5343 return data, ytcfg
5344
5345 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5346 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5347 resolve_response = self._extract_response(
5348 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5349 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5350 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5351 for ep_key, ep in endpoints.items():
5352 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5353 if params:
5354 return self._extract_response(
5355 item_id=item_id, query=params, ep=ep, headers=headers,
5356 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5357 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5358 err_note = 'Failed to resolve url (does the playlist exist?)'
5359 if fatal:
5360 raise ExtractorError(err_note, expected=True)
5361 self.report_warning(err_note, item_id)
5362
a6213a49 5363 _SEARCH_PARAMS = None
5364
af5c1c55 5365 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5366 data = {'query': query}
5367 if params is NO_DEFAULT:
5368 params = self._SEARCH_PARAMS
5369 if params:
5370 data['params'] = params
16aa9ea4 5371
5372 content_keys = (
5373 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5374 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5375 # ytmusic search
5376 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5377 ('continuationContents', ),
5378 )
a25bca9f 5379 display_id = f'query "{query}"'
86e5f3ed 5380 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5381 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5382 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5383
a61fd4cf 5384 continuation_list = [None]
a25bca9f 5385 search = None
a6213a49 5386 for page_num in itertools.count(1):
a61fd4cf 5387 data.update(continuation_list[0] or {})
a25bca9f 5388 headers = self.generate_api_headers(
5389 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5390 search = self._extract_response(
a25bca9f 5391 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5392 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5393 slr_contents = traverse_obj(search, *content_keys)
5394 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5395 if not continuation_list[0]:
a6213a49 5396 break
5397
5398
5399class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5400 IE_DESC = 'YouTube Tabs'
5401 _VALID_URL = r'''(?x:
5402 https?://
b032ff0f 5403 (?!consent\.)(?:\w+\.)?
a6213a49 5404 (?:
5405 youtube(?:kids)?\.com|
5406 %(invidious)s
5407 )/
5408 (?:
5409 (?P<channel_type>channel|c|user|browse)/|
5410 (?P<not_channel>
5411 feed/|hashtag/|
5412 (?:playlist|watch)\?.*?\blist=
5413 )|
5414 (?!(?:%(reserved_names)s)\b) # Direct URLs
5415 )
5416 (?P<id>[^/?\#&]+)
5417 )''' % {
5418 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5419 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5420 }
5421 IE_NAME = 'youtube:tab'
5422
5423 _TESTS = [{
5424 'note': 'playlists, multipage',
5425 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5426 'playlist_mincount': 94,
5427 'info_dict': {
5428 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5429 'title': 'Igor Kleiner Ph.D. - Playlists',
5430 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5431 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5432 'uploader_id': '@IgorDataScience',
5433 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5434 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5435 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5436 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5437 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5438 'channel_follower_count': int
a6213a49 5439 },
5440 }, {
5441 'note': 'playlists, multipage, different order',
5442 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5443 'playlist_mincount': 94,
5444 'info_dict': {
5445 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5446 'title': 'Igor Kleiner Ph.D. - Playlists',
5447 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5448 'uploader': 'Igor Kleiner Ph.D.',
7666b936 5449 'uploader_id': '@IgorDataScience',
5450 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
8828f457 5451 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
976ae3ea 5452 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8828f457 5453 'channel': 'Igor Kleiner Ph.D.',
976ae3ea 5454 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5455 'channel_follower_count': int
a6213a49 5456 },
5457 }, {
5458 'note': 'playlists, series',
5459 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5460 'playlist_mincount': 5,
5461 'info_dict': {
5462 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5463 'title': '3Blue1Brown - Playlists',
8828f457 5464 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5465 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5466 'channel': '3Blue1Brown',
5467 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5468 'uploader_id': '@3blue1brown',
5469 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5470 'uploader': '3Blue1Brown',
976ae3ea 5471 'tags': ['Mathematics'],
14a14335 5472 'channel_follower_count': int,
8213ce28 5473 'channel_is_verified': True,
a6213a49 5474 },
5475 }, {
5476 'note': 'playlists, singlepage',
5477 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5478 'playlist_mincount': 4,
5479 'info_dict': {
5480 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5481 'title': 'ThirstForScience - Playlists',
5482 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5483 'uploader': 'ThirstForScience',
7666b936 5484 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5485 'uploader_id': '@ThirstForScience',
976ae3ea 5486 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
7666b936 5487 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
8828f457 5488 'tags': 'count:12',
976ae3ea 5489 'channel': 'ThirstForScience',
6c73052c 5490 'channel_follower_count': int
a6213a49 5491 }
5492 }, {
5493 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5494 'only_matching': True,
5495 }, {
5496 'note': 'basic, single video playlist',
5497 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5498 'info_dict': {
a6213a49 5499 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5500 'title': 'youtube-dl public playlist',
976ae3ea 5501 'description': '',
5502 'tags': [],
5503 'view_count': int,
5504 'modified_date': '20201130',
5505 'channel': 'Sergey M.',
5506 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
976ae3ea 5507 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5508 'availability': 'public',
7666b936 5509 'uploader': 'Sergey M.',
5510 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5511 'uploader_id': '@sergeym.6173',
a6213a49 5512 },
5513 'playlist_count': 1,
5514 }, {
5515 'note': 'empty playlist',
5516 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5517 'info_dict': {
a6213a49 5518 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5519 'title': 'youtube-dl empty playlist',
976ae3ea 5520 'tags': [],
5521 'channel': 'Sergey M.',
5522 'description': '',
8828f457 5523 'modified_date': '20230921',
976ae3ea 5524 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5525 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8828f457 5526 'availability': 'unlisted',
7666b936 5527 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5528 'uploader_id': '@sergeym.6173',
5529 'uploader': 'Sergey M.',
a6213a49 5530 },
5531 'playlist_count': 0,
5532 }, {
5533 'note': 'Home tab',
5534 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5535 'info_dict': {
5536 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5537 'title': 'lex will - Home',
5538 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5539 'uploader': 'lex will',
7666b936 5540 'uploader_id': '@lexwill718',
976ae3ea 5541 'channel': 'lex will',
5542 'tags': ['bible', 'history', 'prophesy'],
7666b936 5543 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5544 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5545 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5546 'channel_follower_count': int
a6213a49 5547 },
5548 'playlist_mincount': 2,
5549 }, {
5550 'note': 'Videos tab',
5551 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5552 'info_dict': {
5553 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5554 'title': 'lex will - Videos',
5555 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5556 'uploader': 'lex will',
7666b936 5557 'uploader_id': '@lexwill718',
976ae3ea 5558 'tags': ['bible', 'history', 'prophesy'],
5559 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5560 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5561 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5562 'channel': 'lex will',
6c73052c 5563 'channel_follower_count': int
a6213a49 5564 },
5565 'playlist_mincount': 975,
5566 }, {
5567 'note': 'Videos tab, sorted by popular',
5568 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5569 'info_dict': {
5570 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5571 'title': 'lex will - Videos',
5572 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5573 'uploader': 'lex will',
7666b936 5574 'uploader_id': '@lexwill718',
976ae3ea 5575 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5576 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5577 'channel': 'lex will',
5578 'tags': ['bible', 'history', 'prophesy'],
5579 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5580 'channel_follower_count': int
a6213a49 5581 },
5582 'playlist_mincount': 199,
5583 }, {
5584 'note': 'Playlists tab',
5585 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5586 'info_dict': {
5587 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5588 'title': 'lex will - Playlists',
5589 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5590 'uploader': 'lex will',
7666b936 5591 'uploader_id': '@lexwill718',
5592 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5593 'channel': 'lex will',
5594 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5595 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5596 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5597 'channel_follower_count': int
a6213a49 5598 },
5599 'playlist_mincount': 17,
5600 }, {
5601 'note': 'Community tab',
5602 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5603 'info_dict': {
5604 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5605 'title': 'lex will - Community',
5606 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5607 'channel': 'lex will',
5608 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5609 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5610 'tags': ['bible', 'history', 'prophesy'],
7666b936 5611 'channel_follower_count': int,
5612 'uploader_url': 'https://www.youtube.com/@lexwill718',
5613 'uploader_id': '@lexwill718',
5614 'uploader': 'lex will',
a6213a49 5615 },
5616 'playlist_mincount': 18,
5617 }, {
5618 'note': 'Channels tab',
5619 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5620 'info_dict': {
5621 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5622 'title': 'lex will - Channels',
5623 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5624 'channel': 'lex will',
5625 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5626 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5627 'tags': ['bible', 'history', 'prophesy'],
7666b936 5628 'channel_follower_count': int,
5629 'uploader_url': 'https://www.youtube.com/@lexwill718',
5630 'uploader_id': '@lexwill718',
5631 'uploader': 'lex will',
a6213a49 5632 },
5633 'playlist_mincount': 12,
5634 }, {
5635 'note': 'Search tab',
5636 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5637 'playlist_mincount': 40,
5638 'info_dict': {
5639 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5640 'title': '3Blue1Brown - Search - linear algebra',
8828f457 5641 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
976ae3ea 5642 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5643 'tags': ['Mathematics'],
5644 'channel': '3Blue1Brown',
5645 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5646 'channel_follower_count': int,
5647 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5648 'uploader_id': '@3blue1brown',
5649 'uploader': '3Blue1Brown',
8213ce28 5650 'channel_is_verified': True,
a6213a49 5651 },
5652 }, {
5653 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5654 'only_matching': True,
5655 }, {
5656 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5657 'only_matching': True,
5658 }, {
5659 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5660 'only_matching': True,
5661 }, {
5662 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5663 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5664 'info_dict': {
5665 'title': '29C3: Not my department',
5666 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
a6213a49 5667 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5668 'tags': [],
976ae3ea 5669 'view_count': int,
5670 'modified_date': '20150605',
5671 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
7666b936 5672 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
976ae3ea 5673 'channel': 'Christiaan008',
c26f9b99 5674 'availability': 'public',
7666b936 5675 'uploader_id': '@ChRiStIaAn008',
5676 'uploader': 'Christiaan008',
5677 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
a6213a49 5678 },
5679 'playlist_count': 96,
5680 }, {
5681 'note': 'Large playlist',
5682 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5683 'info_dict': {
5684 'title': 'Uploads from Cauchemar',
5685 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
7666b936 5686 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5687 'tags': [],
5688 'modified_date': r're:\d{8}',
5689 'channel': 'Cauchemar',
976ae3ea 5690 'view_count': int,
5691 'description': '',
5692 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5693 'availability': 'public',
7666b936 5694 'uploader_id': '@Cauchemar89',
5695 'uploader': 'Cauchemar',
5696 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
a6213a49 5697 },
5698 'playlist_mincount': 1123,
976ae3ea 5699 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5700 }, {
5701 'note': 'even larger playlist, 8832 videos',
5702 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5703 'only_matching': True,
5704 }, {
5705 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5706 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5707 'info_dict': {
5708 'title': 'Uploads from Interstellar Movie',
5709 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5710 'tags': [],
5711 'view_count': int,
5712 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
7666b936 5713 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5714 'channel': 'Interstellar Movie',
5715 'description': '',
5716 'modified_date': r're:\d{8}',
c26f9b99 5717 'availability': 'public',
7666b936 5718 'uploader_id': '@InterstellarMovie',
5719 'uploader': 'Interstellar Movie',
5720 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
a6213a49 5721 },
5722 'playlist_mincount': 21,
5723 }, {
5724 'note': 'Playlist with "show unavailable videos" button',
5725 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5726 'info_dict': {
5727 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5728 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5729 'view_count': int,
5730 'channel': 'Phim Siêu Nhân Nhật Bản',
5731 'tags': [],
976ae3ea 5732 'description': '',
5733 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5734 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5735 'modified_date': r're:\d{8}',
c26f9b99 5736 'availability': 'public',
7666b936 5737 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5738 'uploader_id': '@phimsieunhannhatban',
5739 'uploader': 'Phim Siêu Nhân Nhật Bản',
a6213a49 5740 },
5741 'playlist_mincount': 200,
976ae3ea 5742 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5743 }, {
5744 'note': 'Playlist with unavailable videos in page 7',
5745 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5746 'info_dict': {
5747 'title': 'Uploads from BlankTV',
5748 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5749 'channel': 'BlankTV',
7666b936 5750 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5751 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5752 'view_count': int,
5753 'tags': [],
976ae3ea 5754 'modified_date': r're:\d{8}',
5755 'description': '',
c26f9b99 5756 'availability': 'public',
7666b936 5757 'uploader_id': '@blanktv',
5758 'uploader': 'BlankTV',
5759 'uploader_url': 'https://www.youtube.com/@blanktv',
a6213a49 5760 },
5761 'playlist_mincount': 1000,
976ae3ea 5762 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5763 }, {
5764 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5765 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5766 'info_dict': {
5767 'title': 'Data Analysis with Dr Mike Pound',
5768 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
a6213a49 5769 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5770 'tags': [],
5771 'view_count': int,
5772 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
7666b936 5773 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
976ae3ea 5774 'channel': 'Computerphile',
c26f9b99 5775 'availability': 'public',
6141346d 5776 'modified_date': '20190712',
7666b936 5777 'uploader_id': '@Computerphile',
5778 'uploader': 'Computerphile',
5779 'uploader_url': 'https://www.youtube.com/@Computerphile',
a6213a49 5780 },
5781 'playlist_mincount': 11,
5782 }, {
5783 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5784 'only_matching': True,
5785 }, {
5786 'note': 'Playlist URL that does not actually serve a playlist',
5787 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5788 'info_dict': {
5789 'id': 'FqZTN594JQw',
5790 'ext': 'webm',
5791 'title': "Smiley's People 01 detective, Adventure Series, Action",
a6213a49 5792 'upload_date': '20150526',
5793 'license': 'Standard YouTube License',
5794 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5795 'categories': ['People & Blogs'],
5796 'tags': list,
5797 'view_count': int,
5798 'like_count': int,
a6213a49 5799 },
5800 'params': {
5801 'skip_download': True,
5802 },
5803 'skip': 'This video is not available.',
5804 'add_ie': [YoutubeIE.ie_key()],
5805 }, {
5806 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5807 'only_matching': True,
5808 }, {
5809 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5810 'only_matching': True,
5811 }, {
5812 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5813 'info_dict': {
14a14335 5814 'id': 'hGkQjiJLjWQ', # This will keep changing
a6213a49 5815 'ext': 'mp4',
976ae3ea 5816 'title': str,
a6213a49 5817 'upload_date': r're:\d{8}',
976ae3ea 5818 'description': str,
a6213a49 5819 'categories': ['News & Politics'],
5820 'tags': list,
5821 'like_count': int,
86973308 5822 'release_timestamp': int,
976ae3ea 5823 'channel': 'Sky News',
5824 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5825 'age_limit': 0,
5826 'view_count': int,
86973308 5827 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5828 'playable_in_embed': True,
86973308 5829 'release_date': r're:\d+',
976ae3ea 5830 'availability': 'public',
5831 'live_status': 'is_live',
5832 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5833 'channel_follower_count': int,
5834 'concurrent_view_count': int,
7666b936 5835 'uploader_url': 'https://www.youtube.com/@SkyNews',
5836 'uploader_id': '@SkyNews',
5837 'uploader': 'Sky News',
8213ce28 5838 'channel_is_verified': True,
a6213a49 5839 },
5840 'params': {
5841 'skip_download': True,
5842 },
976ae3ea 5843 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5844 }, {
5845 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5846 'info_dict': {
5847 'id': 'a48o2S1cPoo',
5848 'ext': 'mp4',
5849 'title': 'The Young Turks - Live Main Show',
a6213a49 5850 'upload_date': '20150715',
5851 'license': 'Standard YouTube License',
5852 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5853 'categories': ['News & Politics'],
5854 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5855 'like_count': int,
a6213a49 5856 },
5857 'params': {
5858 'skip_download': True,
5859 },
5860 'only_matching': True,
5861 }, {
5862 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5863 'only_matching': True,
5864 }, {
5865 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5866 'only_matching': True,
5867 }, {
5868 'note': 'A channel that is not live. Should raise error',
5869 'url': 'https://www.youtube.com/user/numberphile/live',
5870 'only_matching': True,
5871 }, {
5872 'url': 'https://www.youtube.com/feed/trending',
5873 'only_matching': True,
5874 }, {
5875 'url': 'https://www.youtube.com/feed/library',
5876 'only_matching': True,
5877 }, {
5878 'url': 'https://www.youtube.com/feed/history',
5879 'only_matching': True,
5880 }, {
5881 'url': 'https://www.youtube.com/feed/subscriptions',
5882 'only_matching': True,
5883 }, {
5884 'url': 'https://www.youtube.com/feed/watch_later',
5885 'only_matching': True,
5886 }, {
5887 'note': 'Recommended - redirects to home page.',
5888 'url': 'https://www.youtube.com/feed/recommended',
5889 'only_matching': True,
5890 }, {
5891 'note': 'inline playlist with not always working continuations',
5892 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5893 'only_matching': True,
5894 }, {
5895 'url': 'https://www.youtube.com/course',
5896 'only_matching': True,
5897 }, {
5898 'url': 'https://www.youtube.com/zsecurity',
5899 'only_matching': True,
5900 }, {
5901 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5902 'only_matching': True,
5903 }, {
5904 'url': 'https://www.youtube.com/TheYoungTurks/live',
5905 'only_matching': True,
5906 }, {
5907 'url': 'https://www.youtube.com/hashtag/cctv9',
5908 'info_dict': {
5909 'id': 'cctv9',
8828f457 5910 'title': 'cctv9 - All',
976ae3ea 5911 'tags': [],
a6213a49 5912 },
4dc23a80 5913 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5914 }, {
5915 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5916 'only_matching': True,
5917 }, {
5918 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5919 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5920 'only_matching': True
5921 }, {
5922 'note': '/browse/ should redirect to /channel/',
5923 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5924 'only_matching': True
5925 }, {
5926 'note': 'VLPL, should redirect to playlist?list=PL...',
5927 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5928 'info_dict': {
5929 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
a6213a49 5930 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
12a1b225 5931 'title': 'NCS : All Releases 💿',
7666b936 5932 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
976ae3ea 5933 'modified_date': r're:\d{8}',
5934 'view_count': int,
5935 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5936 'tags': [],
5937 'channel': 'NoCopyrightSounds',
c26f9b99 5938 'availability': 'public',
7666b936 5939 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
5940 'uploader': 'NoCopyrightSounds',
5941 'uploader_id': '@NoCopyrightSounds',
a6213a49 5942 },
5943 'playlist_mincount': 166,
7666b936 5944 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
a6213a49 5945 }, {
7666b936 5946 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
a6213a49 5947 'note': 'Topic, should redirect to playlist?list=UU...',
5948 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5949 'info_dict': {
5950 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5951 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 5952 'tags': [],
5953 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5954 'channel': 'Royalty Free Music - Topic',
5955 'view_count': int,
5956 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
976ae3ea 5957 'modified_date': r're:\d{8}',
976ae3ea 5958 'description': '',
c26f9b99 5959 'availability': 'public',
7666b936 5960 'uploader': 'Royalty Free Music - Topic',
a6213a49 5961 },
a6213a49 5962 'playlist_mincount': 101,
7666b936 5963 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5964 }, {
86973308
M
5965 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5966 # Treat as a general feed
a6213a49 5967 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5968 'info_dict': {
5969 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5970 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5971 'tags': [],
a6213a49 5972 },
a6213a49 5973 'playlist_mincount': 9,
5974 }, {
5975 'note': 'Youtube music Album',
5976 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5977 'info_dict': {
5978 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5979 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5980 'tags': [],
5981 'view_count': int,
5982 'description': '',
5983 'availability': 'unlisted',
5984 'modified_date': r're:\d{8}',
a6213a49 5985 },
5986 'playlist_count': 50,
7666b936 5987 'expected_warnings': ['YouTube Music is not directly supported'],
a6213a49 5988 }, {
5989 'note': 'unlisted single video playlist',
5990 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5991 'info_dict': {
a6213a49 5992 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5993 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5994 'availability': 'unlisted',
5995 'tags': [],
12a1b225 5996 'modified_date': '20220418',
976ae3ea 5997 'channel': 'colethedj',
5998 'view_count': int,
5999 'description': '',
976ae3ea 6000 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6001 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
7666b936 6002 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6003 'uploader_id': '@colethedj1894',
6004 'uploader': 'colethedj',
a6213a49 6005 },
93e12ed7 6006 'playlist': [{
6007 'info_dict': {
6008 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6009 'id': 'BaW_jenozKc',
6010 '_type': 'url',
6011 'ie_key': 'Youtube',
6012 'duration': 10,
6013 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6014 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6015 'view_count': int,
6016 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6017 'channel': 'Philipp Hagemeister',
6018 'uploader_id': '@PhilippHagemeister',
6019 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6020 'uploader': 'Philipp Hagemeister',
6021 }
6022 }],
a6213a49 6023 'playlist_count': 1,
93e12ed7 6024 'params': {'extract_flat': True},
a6213a49 6025 }, {
6026 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6027 'url': 'https://www.youtube.com/feed/recommended',
6028 'info_dict': {
6029 'id': 'recommended',
6030 'title': 'recommended',
6c73052c 6031 'tags': [],
a6213a49 6032 },
6033 'playlist_mincount': 50,
6034 'params': {
6035 'skip_download': True,
6036 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6037 },
6038 }, {
6039 'note': 'API Fallback: /videos tab, sorted by oldest first',
6040 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6041 'info_dict': {
6042 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6043 'title': 'Cody\'sLab - Videos',
6044 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
976ae3ea 6045 'channel': 'Cody\'sLab',
6046 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6047 'tags': [],
6048 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 6049 'channel_follower_count': int
a6213a49 6050 },
6051 'playlist_mincount': 650,
6052 'params': {
6053 'skip_download': True,
6054 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6055 },
86973308 6056 'skip': 'Query for sorting no longer works',
a6213a49 6057 }, {
6058 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6059 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6060 'info_dict': {
6061 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6062 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6063 'modified_date': r're:\d{8}',
6064 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6065 'description': '',
6066 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6067 'tags': [],
6068 'channel': 'Royalty Free Music - Topic',
6069 'view_count': int,
c26f9b99 6070 'availability': 'public',
7666b936 6071 'uploader': 'Royalty Free Music - Topic',
a6213a49 6072 },
a6213a49 6073 'playlist_mincount': 101,
6074 'params': {
6075 'skip_download': True,
6076 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6077 },
7666b936 6078 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
7c219ea6 6079 }, {
6080 'note': 'non-standard redirect to regional channel',
6081 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6082 'only_matching': True
61d3665d 6083 }, {
6084 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6085 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6086 'info_dict': {
6087 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6088 'modified_date': '20220407',
6089 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6090 'tags': [],
61d3665d 6091 'availability': 'unlisted',
6092 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6093 'channel': 'pukkandan',
6094 'description': 'Test for collaborative playlist',
6095 'title': 'yt-dlp test - collaborative playlist',
12a1b225 6096 'view_count': int,
7666b936 6097 'uploader_url': 'https://www.youtube.com/@pukkandan',
6098 'uploader_id': '@pukkandan',
6099 'uploader': 'pukkandan',
61d3665d 6100 },
6101 'playlist_mincount': 2
c26f9b99 6102 }, {
6103 'note': 'translated tab name',
6104 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6105 'info_dict': {
6106 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6107 'tags': [],
c26f9b99 6108 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 6109 'description': 'test description',
c26f9b99 6110 'title': 'cole-dlp-test-acc - 再生リスト',
c26f9b99 6111 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6112 'channel': 'cole-dlp-test-acc',
7666b936 6113 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6114 'uploader_id': '@coletdjnz',
6115 'uploader': 'cole-dlp-test-acc',
c26f9b99 6116 },
6117 'playlist_mincount': 1,
6118 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6119 'expected_warnings': ['Preferring "ja"'],
6120 }, {
6121 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6122 'note': 'preferred lang set with playlist with translated video titles',
6123 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6124 'info_dict': {
6125 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6126 'tags': [],
6127 'view_count': int,
6128 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
c26f9b99 6129 'channel': 'cole-dlp-test-acc',
6130 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6131 'description': 'test',
c26f9b99 6132 'title': 'dlp test playlist',
6133 'availability': 'public',
7666b936 6134 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6135 'uploader_id': '@coletdjnz',
6136 'uploader': 'cole-dlp-test-acc',
c26f9b99 6137 },
6138 'playlist_mincount': 1,
6139 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6140 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 6141 }, {
6142 # shorts audio pivot for 2GtVksBMYFM.
6143 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6144 'info_dict': {
6145 'id': 'sfv_audio_pivot',
6146 'title': 'sfv_audio_pivot',
6147 'tags': [],
6148 },
6149 'playlist_mincount': 50,
6150
86973308
M
6151 }, {
6152 # Channel with a real live tab (not to be mistaken with streams tab)
6153 # Do not treat like it should redirect to live stream
6154 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6155 'info_dict': {
6156 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6157 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6158 'tags': [],
6159 },
6160 'playlist_mincount': 20,
6161 }, {
6162 # Tab name is not the same as tab id
6163 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6164 'info_dict': {
6165 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6166 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6167 'tags': [],
6168 },
6169 'playlist_mincount': 8,
6170 }, {
6171 # Home tab id is literally home. Not to get mistaken with featured
6172 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6173 'info_dict': {
6174 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6175 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6176 'tags': [],
6177 },
6178 'playlist_mincount': 8,
6179 }, {
6180 # Should get three playlists for videos, shorts and streams tabs
6181 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6182 'info_dict': {
6183 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 6184 'title': 'Polka Ch. 尾丸ポルカ',
6185 'channel_follower_count': int,
6186 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6187 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
8828f457 6188 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
bd7e919a 6189 'channel': 'Polka Ch. 尾丸ポルカ',
6190 'tags': 'count:35',
7666b936 6191 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6192 'uploader': 'Polka Ch. 尾丸ポルカ',
6193 'uploader_id': '@OmaruPolka',
8828f457 6194 'channel_is_verified': True,
86973308
M
6195 },
6196 'playlist_count': 3,
6197 }, {
6198 # Shorts tab with channel with handle
7666b936 6199 # TODO: fix channel description
86973308
M
6200 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6201 'info_dict': {
6202 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6203 'title': 'Not Just Bikes - Shorts',
8828f457 6204 'tags': 'count:10',
86973308 6205 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
8828f457 6206 'description': 'md5:5e82545b3a041345927a92d0585df247',
86973308 6207 'channel_follower_count': int,
86973308 6208 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
86973308 6209 'channel': 'Not Just Bikes',
7666b936 6210 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6211 'uploader': 'Not Just Bikes',
6212 'uploader_id': '@NotJustBikes',
8828f457 6213 'channel_is_verified': True,
86973308
M
6214 },
6215 'playlist_mincount': 10,
6216 }, {
6217 # Streams tab
6218 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6219 'info_dict': {
6220 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6221 'title': '中村悠一 - Live',
6222 'tags': 'count:7',
6223 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6224 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
86973308 6225 'channel': '中村悠一',
86973308 6226 'channel_follower_count': int,
86973308 6227 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
7666b936 6228 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6229 'uploader_id': '@Yuichi-Nakamura',
6230 'uploader': '中村悠一',
86973308
M
6231 },
6232 'playlist_mincount': 60,
6233 }, {
6234 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6235 # See test_youtube_lists
6236 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6237 'only_matching': True,
6238 }, {
6239 # No uploads and no UCID given. Should fail with no uploads error
6240 # See test_youtube_lists
6241 'url': 'https://www.youtube.com/news',
6242 'only_matching': True
6243 }, {
6244 # No videos tab but has a shorts tab
6245 'url': 'https://www.youtube.com/c/TKFShorts',
6246 'info_dict': {
6247 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6248 'title': 'Shorts Break - Shorts',
7666b936 6249 'tags': 'count:48',
86973308
M
6250 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6251 'channel': 'Shorts Break',
7666b936 6252 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
86973308 6253 'channel_follower_count': int,
86973308 6254 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
7666b936 6255 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6256 'uploader': 'Shorts Break',
6257 'uploader_id': '@ShortsBreak_Official',
86973308
M
6258 },
6259 'playlist_mincount': 30,
6260 }, {
6261 # Trending Now Tab. tab id is empty
6262 'url': 'https://www.youtube.com/feed/trending',
6263 'info_dict': {
6264 'id': 'trending',
6265 'title': 'trending - Now',
6266 'tags': [],
6267 },
6268 'playlist_mincount': 30,
6269 }, {
6270 # Trending Gaming Tab. tab id is empty
6271 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6272 'info_dict': {
6273 'id': 'trending',
6274 'title': 'trending - Gaming',
6275 'tags': [],
6276 },
6277 'playlist_mincount': 30,
4dc23a80
M
6278 }, {
6279 # Shorts url result in shorts tab
7666b936 6280 # TODO: Fix channel id extraction
4dc23a80
M
6281 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6282 'info_dict': {
6283 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6284 'title': 'cole-dlp-test-acc - Shorts',
4dc23a80 6285 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6286 'description': 'test description',
6287 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6288 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6289 'tags': [],
7666b936 6290 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6291 'uploader_id': '@coletdjnz',
4dc23a80 6292 'uploader': 'cole-dlp-test-acc',
4dc23a80
M
6293 },
6294 'playlist': [{
6295 'info_dict': {
7666b936 6296 # Channel data is not currently available for short renderers (as of 2023-03-01)
4dc23a80
M
6297 '_type': 'url',
6298 'ie_key': 'Youtube',
6299 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6300 'id': 'sSM9J5YH_60',
4dc23a80 6301 'title': 'SHORT short',
4dc23a80
M
6302 'view_count': int,
6303 'thumbnails': list,
6304 }
6305 }],
6306 'params': {'extract_flat': True},
6307 }, {
6308 # Live video status should be extracted
6309 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6310 'info_dict': {
6311 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6312 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6313 'tags': []
6314 },
6315 'playlist': [{
6316 'info_dict': {
6317 '_type': 'url',
6318 'ie_key': 'Youtube',
6319 'url': 'startswith:https://www.youtube.com/watch?v=',
6320 'id': str,
6321 'title': str,
6322 'live_status': 'is_live',
6323 'channel_id': str,
6324 'channel_url': str,
6325 'concurrent_view_count': int,
6326 'channel': str,
93e12ed7 6327 'uploader': str,
6328 'uploader_url': str,
14a14335 6329 'uploader_id': str,
8213ce28 6330 'channel_is_verified': bool, # this will keep changing
4dc23a80
M
6331 }
6332 }],
c7335551 6333 'params': {'extract_flat': True, 'playlist_items': '1'},
4dc23a80 6334 'playlist_mincount': 1
c7335551
M
6335 }, {
6336 # Channel renderer metadata. Contains number of videos on the channel
6337 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6338 'info_dict': {
6339 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6340 'title': 'cole-dlp-test-acc - Channels',
c7335551
M
6341 'channel': 'cole-dlp-test-acc',
6342 'description': 'test description',
6343 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6344 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6345 'tags': [],
7666b936 6346 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6347 'uploader_id': '@coletdjnz',
c7335551 6348 'uploader': 'cole-dlp-test-acc',
c7335551
M
6349 },
6350 'playlist': [{
6351 'info_dict': {
6352 '_type': 'url',
6353 'ie_key': 'YoutubeTab',
6354 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6355 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6356 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6357 'title': 'PewDiePie',
6358 'channel': 'PewDiePie',
6359 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6360 'thumbnails': list,
6361 'channel_follower_count': int,
7666b936 6362 'playlist_count': int,
6363 'uploader': 'PewDiePie',
6364 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6365 'uploader_id': '@PewDiePie',
8213ce28 6366 'channel_is_verified': True,
c7335551
M
6367 }
6368 }],
6369 'params': {'extract_flat': True},
31e18355 6370 }, {
6371 'url': 'https://www.youtube.com/@3blue1brown/about',
6372 'info_dict': {
8828f457 6373 'id': '@3blue1brown',
31e18355 6374 'tags': ['Mathematics'],
8828f457 6375 'title': '3Blue1Brown',
31e18355 6376 'channel_follower_count': int,
6377 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
31e18355 6378 'channel': '3Blue1Brown',
31e18355 6379 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
8828f457 6380 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
7666b936 6381 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6382 'uploader_id': '@3blue1brown',
6383 'uploader': '3Blue1Brown',
8213ce28 6384 'channel_is_verified': True,
31e18355 6385 },
6386 'playlist_count': 0,
447afb9e 6387 }, {
6388 # Podcasts tab, with rich entry playlistRenderers
6389 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6390 'info_dict': {
6391 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6392 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6393 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6394 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6395 'title': '99 Percent Invisible - Podcasts',
6396 'uploader': '99 Percent Invisible',
6397 'channel_follower_count': int,
6398 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6399 'tags': [],
6400 'channel': '99 Percent Invisible',
6401 'uploader_id': '@99percentinvisiblepodcast',
6402 },
8828f457 6403 'playlist_count': 0,
447afb9e 6404 }, {
6405 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6406 'url': 'https://www.youtube.com/@AHimitsu/releases',
6407 'info_dict': {
6408 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6409 'channel': 'A Himitsu',
6410 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6411 'title': 'A Himitsu - Releases',
6412 'uploader_id': '@AHimitsu',
6413 'uploader': 'A Himitsu',
6414 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
8828f457 6415 'tags': 'count:12',
447afb9e 6416 'description': 'I make music',
6417 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6418 'channel_follower_count': int,
8213ce28 6419 'channel_is_verified': True,
447afb9e 6420 },
6421 'playlist_mincount': 10,
fcbc9ed7 6422 }, {
6423 # Playlist with only shorts, shown as reel renderers
6424 # FIXME: future: YouTube currently doesn't give continuation for this,
6425 # may do in future.
6426 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6427 'info_dict': {
6428 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6429 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6430 'view_count': int,
6431 'uploader_id': '@BangyShorts',
6432 'description': '',
6433 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6434 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6435 'channel': 'Bangy Shorts',
6436 'uploader': 'Bangy Shorts',
6437 'tags': [],
6438 'availability': 'public',
8828f457 6439 'modified_date': r're:\d{8}',
fcbc9ed7 6440 'title': 'Uploads from Bangy Shorts',
6441 },
6442 'playlist_mincount': 100,
6443 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
8828f457 6444 }, {
6445 'note': 'Tags containing spaces',
6446 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6447 'playlist_count': 3,
6448 'info_dict': {
6449 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6450 'channel': 'Markiplier',
6451 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6452 'title': 'Markiplier',
6453 'channel_follower_count': int,
6454 'description': 'md5:0c010910558658824402809750dc5d97',
6455 'uploader_id': '@markiplier',
6456 'uploader_url': 'https://www.youtube.com/@markiplier',
6457 'uploader': 'Markiplier',
6458 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6459 'channel_is_verified': True,
6460 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6461 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6462 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6463 'mark fischbach'],
6464 },
a6213a49 6465 }]
6466
6467 @classmethod
6468 def suitable(cls, url):
86e5f3ed 6469 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6470
86973308
M
6471 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6472
6473 def _get_url_mobj(self, url):
6474 mobj = self._URL_RE.match(url).groupdict()
6475 mobj.update((k, '') for k, v in mobj.items() if v is None)
6476 return mobj
6477
6478 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6479 tab_name = (tab.get('title') or '').lower()
6480 tab_url = urljoin(base_url, traverse_obj(
6481 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6482
bd7e919a 6483 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6484 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6485 if tab_id:
bd7e919a 6486 return {
6487 'TAB_ID_SPONSORSHIPS': 'membership',
6488 }.get(tab_id, tab_id), tab_name
86973308
M
6489
6490 # Fallback to tab name if we cannot get the tab id.
6491 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6492 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6493 if tab_name:
6494 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6495 return {
6496 'home': 'featured',
6497 'live': 'streams',
6498 }.get(tab_name, tab_name), tab_name
6499
6500 def _has_tab(self, tabs, tab_id):
6501 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6502
044886c2 6503 def _empty_playlist(self, item_id, data):
6504 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6505
182bda88 6506 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6507 def _real_extract(self, url, smuggled_data):
cd7c66cf 6508 item_id = self._match_id(url)
14f25df2 6509 url = urllib.parse.urlunparse(
6510 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6511 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6512
86973308
M
6513 mobj = self._get_url_mobj(url)
6514 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6515 if is_channel and smuggled_data.get('is_music_url'):
6516 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6517 return self.url_result(
6518 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6519 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6520 mdata = self._extract_tab_endpoint(
6521 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6522 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6523 get_all=False, expected_type=str)
6524 if not murl:
6525 raise ExtractorError('Failed to resolve album to playlist')
6526 return self.url_result(murl, YoutubeTabIE)
6527 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6528 return self.url_result(
6529 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6530
6531 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6532 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6533 url = f'{pre}/videos{post}'
5b28cef7 6534 if smuggled_data.get('is_music_url'):
6535 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
cd7c66cf 6536
6537 # Handle both video/playlist URLs
201c1459 6538 qs = parse_qs(url)
bd7e919a 6539 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 6540 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6541 if not playlist_id:
fe03a6cd 6542 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6543 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6544 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6545 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6546 return self.url_result(
6547 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6548
86973308
M
6549 if not self._yes_playlist(playlist_id, video_id):
6550 return self.url_result(
6551 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6552
bd7e919a 6553 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6554
7c219ea6 6555 # YouTube may provide a non-standard redirect to the regional channel
6556 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6557 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6558 redirect_url = traverse_obj(
6559 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6560 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6561 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6562 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6563 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6564
bd7e919a 6565 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6566 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6567 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6568 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6569 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6570
044886c2 6571 # /about is no longer a tab
6572 if original_tab_id == 'about':
6573 return self._empty_playlist(item_id, data)
6574
86973308 6575 if not original_tab_id and selected_tab_name:
bd7e919a 6576 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6577 'To download only the videos in a specific tab, pass the tab\'s URL')
6578 if self._has_tab(tabs, 'streams'):
bd7e919a 6579 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6580 if self._has_tab(tabs, 'shorts'):
bd7e919a 6581 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6582 # XXX: Members-only tab should also be extracted
6583
bd7e919a 6584 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6585 # Channel does not have streams, shorts or videos tabs
6586 if item_id[:2] != 'UC':
044886c2 6587 return self._empty_playlist(item_id, data)
86973308
M
6588
6589 # Topic channels don't have /videos. Use the equivalent playlist instead
6590 pl_id = f'UU{item_id[2:]}'
6591 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6592 try:
6593 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6594 except ExtractorError:
044886c2 6595 return self._empty_playlist(item_id, data)
64f36541 6596 else:
86973308
M
6597 item_id, url = pl_id, pl_url
6598 self.to_screen(
6599 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6600
bd7e919a 6601 elif extra_tabs and selected_tab_id != 'videos':
86973308 6602 # When there are shorts/live tabs but not videos tab
bd7e919a 6603 url, data = f'{pre}{post}', None
86973308
M
6604
6605 elif (original_tab_id or 'videos') != selected_tab_id:
6606 if original_tab_id == 'live':
6607 # Live tab should have redirected to the video
6608 # Except in the case the channel has an actual live tab
6609 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6610 raise UserNotLive(video_id=item_id)
86973308
M
6611 elif selected_tab_name:
6612 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6613
6614 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6615 url = f'{pre}{post}'
18db7548 6616
358de58c 6617 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6618 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6619 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6620 self._extract_and_report_alerts(data, only_once=True)
86973308 6621
bd7e919a 6622 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6623 if tabs:
bd7e919a 6624 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6625 entries[0].update({
86973308
M
6626 'extractor_key': YoutubeTabIE.ie_key(),
6627 'extractor': YoutubeTabIE.IE_NAME,
6628 'webpage_url': url,
6629 })
bd7e919a 6630 if self.get_param('playlist_items') == '0':
6631 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6632 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6633 entries.extend(map(self._real_extract, extra_tabs))
6634
6635 if len(entries) == 1:
6636 return entries[0]
6637 elif entries:
6638 metadata = self._extract_metadata_from_tabs(item_id, data)
6639 uploads_url = 'the Uploads (UU) playlist URL'
6640 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6641 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6642 self.to_screen(
6643 'Downloading as multiple playlists, separated by tabs. '
6644 f'To download as a single playlist instead, pass {uploads_url}')
6645 return self.playlist_result(entries, item_id, **metadata)
6646
6647 # Inline playlist
37e57a9f 6648 playlist = traverse_obj(
6649 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6650 if playlist:
ac56cf38 6651 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6652
37e57a9f 6653 video_id = traverse_obj(
6654 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6655 if video_id:
bd7e919a 6656 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6657 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6658 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6659
8bdd16b4 6660 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6661
c5e8d7af 6662
8bdd16b4 6663class YoutubePlaylistIE(InfoExtractor):
96565c7e 6664 IE_DESC = 'YouTube playlists'
8bdd16b4 6665 _VALID_URL = r'''(?x)(?:
6666 (?:https?://)?
6667 (?:\w+\.)?
6668 (?:
6669 (?:
6670 youtube(?:kids)?\.com|
d9190e44 6671 %(invidious)s
8bdd16b4 6672 )
6673 /.*?\?.*?\blist=
6674 )?
6675 (?P<id>%(playlist_id)s)
d9190e44
RH
6676 )''' % {
6677 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6678 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6679 }
8bdd16b4 6680 IE_NAME = 'youtube:playlist'
cdc628a4 6681 _TESTS = [{
8bdd16b4 6682 'note': 'issue #673',
6683 'url': 'PLBB231211A4F62143',
cdc628a4 6684 'info_dict': {
8bdd16b4 6685 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6686 'id': 'PLBB231211A4F62143',
976ae3ea 6687 'uploader': 'Wickman',
7666b936 6688 'uploader_id': '@WickmanVT',
11f9be09 6689 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6690 'view_count': int,
7666b936 6691 'uploader_url': 'https://www.youtube.com/@WickmanVT',
976ae3ea 6692 'modified_date': r're:\d{8}',
6693 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6694 'channel': 'Wickman',
6695 'tags': [],
7666b936 6696 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
86973308 6697 'availability': 'public',
8bdd16b4 6698 },
6699 'playlist_mincount': 29,
6700 }, {
6701 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6702 'info_dict': {
6703 'title': 'YDL_safe_search',
6704 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6705 },
6706 'playlist_count': 2,
6707 'skip': 'This playlist is private',
9558dcec 6708 }, {
8bdd16b4 6709 'note': 'embedded',
6710 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6711 'playlist_count': 4,
9558dcec 6712 'info_dict': {
8bdd16b4 6713 'title': 'JODA15',
6714 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6715 'uploader': 'milan',
7666b936 6716 'uploader_id': '@milan5503',
976ae3ea 6717 'description': '',
6718 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6719 'tags': [],
6720 'modified_date': '20140919',
6721 'view_count': int,
6722 'channel': 'milan',
6723 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7666b936 6724 'uploader_url': 'https://www.youtube.com/@milan5503',
c26f9b99 6725 'availability': 'public',
976ae3ea 6726 },
b012271d 6727 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
cdc628a4 6728 }, {
8bdd16b4 6729 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6730 'playlist_mincount': 455,
8bdd16b4 6731 'info_dict': {
6732 'title': '2018 Chinese New Singles (11/6 updated)',
6733 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6734 'uploader': 'LBK',
7666b936 6735 'uploader_id': '@music_king',
11f9be09 6736 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6737 'channel': 'LBK',
6738 'view_count': int,
7666b936 6739 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
976ae3ea 6740 'tags': [],
7666b936 6741 'uploader_url': 'https://www.youtube.com/@music_king',
976ae3ea 6742 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6743 'modified_date': r're:\d{8}',
c26f9b99 6744 'availability': 'public',
976ae3ea 6745 },
6746 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6747 }, {
29f7c58a 6748 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6749 'only_matching': True,
6750 }, {
6751 # music album playlist
6752 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6753 'only_matching': True,
6754 }]
6755
6756 @classmethod
6757 def suitable(cls, url):
201c1459 6758 if YoutubeTabIE.suitable(url):
6759 return False
49a57e70 6760 from ..utils import parse_qs
201c1459 6761 qs = parse_qs(url)
6762 if qs.get('v', [None])[0]:
6763 return False
86e5f3ed 6764 return super().suitable(url)
29f7c58a 6765
6766 def _real_extract(self, url):
6767 playlist_id = self._match_id(url)
46953e7e 6768 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6769 url = update_url_query(
6770 'https://www.youtube.com/playlist',
6771 parse_qs(url) or {'list': playlist_id})
6772 if is_music_url:
6773 url = smuggle_url(url, {'is_music_url': True})
6774 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6775
6776
6777class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6778 IE_DESC = 'youtu.be'
29f7c58a 6779 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6780 _TESTS = [{
8bdd16b4 6781 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6782 'info_dict': {
6783 'id': 'yeWKywCrFtk',
6784 'ext': 'mp4',
6785 'title': 'Small Scale Baler and Braiding Rugs',
6786 'uploader': 'Backus-Page House Museum',
7666b936 6787 'uploader_id': '@backuspagemuseum',
6788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
8bdd16b4 6789 'upload_date': '20161008',
6790 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6791 'categories': ['Nonprofits & Activism'],
6792 'tags': list,
6793 'like_count': int,
976ae3ea 6794 'age_limit': 0,
6795 'playable_in_embed': True,
7666b936 6796 'thumbnail': r're:^https?://.*\.webp',
976ae3ea 6797 'channel': 'Backus-Page House Museum',
6798 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6799 'live_status': 'not_live',
6800 'view_count': int,
6801 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6802 'availability': 'public',
6803 'duration': 59,
12a1b225
A
6804 'comment_count': int,
6805 'channel_follower_count': int
8bdd16b4 6806 },
6807 'params': {
6808 'noplaylist': True,
6809 'skip_download': True,
6810 },
39e7107d 6811 }, {
8bdd16b4 6812 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6813 'only_matching': True,
cdc628a4
PH
6814 }]
6815
8bdd16b4 6816 def _real_extract(self, url):
5ad28e7f 6817 mobj = self._match_valid_url(url)
29f7c58a 6818 video_id = mobj.group('id')
6819 playlist_id = mobj.group('playlist_id')
8bdd16b4 6820 return self.url_result(
29f7c58a 6821 update_url_query('https://www.youtube.com/watch', {
6822 'v': video_id,
6823 'list': playlist_id,
6824 'feature': 'youtu.be',
6825 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6826
6827
b6ce9bb0 6828class YoutubeLivestreamEmbedIE(InfoExtractor):
6829 IE_DESC = 'YouTube livestream embeds'
6830 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6831 _TESTS = [{
6832 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6833 'only_matching': True,
6834 }]
6835
6836 def _real_extract(self, url):
6837 channel_id = self._match_id(url)
6838 return self.url_result(
6839 f'https://www.youtube.com/channel/{channel_id}/live',
6840 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6841
6842
8bdd16b4 6843class YoutubeYtUserIE(InfoExtractor):
96565c7e 6844 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6845 IE_NAME = 'youtube:user'
8bdd16b4 6846 _VALID_URL = r'ytuser:(?P<id>.+)'
6847 _TESTS = [{
6848 'url': 'ytuser:phihag',
6849 'only_matching': True,
6850 }]
6851
6852 def _real_extract(self, url):
6853 user_id = self._match_id(url)
08270da5 6854 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6855
b05654f0 6856
3d3dddc9 6857class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6858 IE_NAME = 'youtube:favorites'
96565c7e 6859 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6860 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6861 _LOGIN_REQUIRED = True
6862 _TESTS = [{
6863 'url': ':ytfav',
6864 'only_matching': True,
6865 }, {
6866 'url': ':ytfavorites',
6867 'only_matching': True,
6868 }]
6869
6870 def _real_extract(self, url):
6871 return self.url_result(
6872 'https://www.youtube.com/playlist?list=LL',
6873 ie=YoutubeTabIE.ie_key())
6874
6875
ca5300c7 6876class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6877 IE_NAME = 'youtube:notif'
6878 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6879 _VALID_URL = r':ytnotif(?:ication)?s?'
6880 _LOGIN_REQUIRED = True
6881 _TESTS = [{
6882 'url': ':ytnotif',
6883 'only_matching': True,
6884 }, {
6885 'url': ':ytnotifications',
6886 'only_matching': True,
6887 }]
6888
6889 def _extract_notification_menu(self, response, continuation_list):
6890 notification_list = traverse_obj(
6891 response,
6892 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6893 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6894 expected_type=list) or []
6895 continuation_list[0] = None
6896 for item in notification_list:
6897 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6898 if entry:
6899 yield entry
6900 continuation = item.get('continuationItemRenderer')
6901 if continuation:
6902 continuation_list[0] = continuation
6903
6904 def _extract_notification_renderer(self, notification):
6905 video_id = traverse_obj(
6906 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6907 url = f'https://www.youtube.com/watch?v={video_id}'
6908 channel_id = None
6909 if not video_id:
6910 browse_ep = traverse_obj(
6911 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7666b936 6912 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
ca5300c7 6913 post_id = self._search_regex(
6914 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6915 'post id', default=None)
6916 if not channel_id or not post_id:
6917 return
6918 # The direct /post url redirects to this in the browser
6919 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6920
6921 channel = traverse_obj(
6922 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6923 expected_type=str)
c7a7baaa 6924 notification_title = self._get_text(notification, 'shortMessage')
6925 if notification_title:
6926 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6927 # TODO: handle recommended videos
ca5300c7 6928 title = self._search_regex(
c7a7baaa 6929 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6930 'video title', default=None)
5225df50 6931 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6932 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6933 else None)
ca5300c7 6934 return {
6935 '_type': 'url',
6936 'url': url,
6937 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6938 'video_id': video_id,
6939 'title': title,
6940 'channel_id': channel_id,
6941 'channel': channel,
7666b936 6942 'uploader': channel,
ca5300c7 6943 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6944 'timestamp': timestamp,
ca5300c7 6945 }
6946
6947 def _notification_menu_entries(self, ytcfg):
6948 continuation_list = [None]
6949 response = None
6950 for page in itertools.count(1):
6951 ctoken = traverse_obj(
6952 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6953 response = self._extract_response(
6954 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6955 ep='notification/get_notification_menu', check_get_keys='actions',
6956 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6957 yield from self._extract_notification_menu(response, continuation_list)
6958 if not continuation_list[0]:
6959 break
6960
6961 def _real_extract(self, url):
6962 display_id = 'notifications'
6963 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6964 self._report_playlist_authcheck(ytcfg)
6965 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6966
6967
a6213a49 6968class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6969 IE_DESC = 'YouTube search'
78caa52a 6970 IE_NAME = 'youtube:search'
b05654f0 6971 _SEARCH_KEY = 'ytsearch'
17d248a5 6972 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
84bbc545 6973 _TESTS = [{
6974 'url': 'ytsearch5:youtube-dl test video',
6975 'playlist_count': 5,
6976 'info_dict': {
6977 'id': 'youtube-dl test video',
6978 'title': 'youtube-dl test video',
6979 }
17d248a5
AB
6980 }, {
6981 'note': 'Suicide/self-harm search warning',
6982 'url': 'ytsearch1:i hate myself and i wanna die',
6983 'playlist_count': 1,
6984 'info_dict': {
6985 'id': 'i hate myself and i wanna die',
6986 'title': 'i hate myself and i wanna die',
6987 }
84bbc545 6988 }]
b05654f0 6989
a61fd4cf 6990
5f7cb91a 6991class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6992 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6993 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6994 IE_DESC = 'YouTube search, newest videos first'
17d248a5 6995 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
84bbc545 6996 _TESTS = [{
6997 'url': 'ytsearchdate5:youtube-dl test video',
6998 'playlist_count': 5,
6999 'info_dict': {
7000 'id': 'youtube-dl test video',
7001 'title': 'youtube-dl test video',
7002 }
7003 }]
75dff0ee 7004
c9ae7b95 7005
a6213a49 7006class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 7007 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 7008 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 7009 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 7010 _TESTS = [{
7011 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7012 'playlist_mincount': 5,
7013 'info_dict': {
11f9be09 7014 'id': 'youtube-dl test video',
3462ffa8 7015 'title': 'youtube-dl test video',
7016 }
a61fd4cf 7017 }, {
7018 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7019 'playlist_mincount': 5,
7020 'info_dict': {
7021 'id': 'python',
7022 'title': 'python',
7023 }
ad210f4f 7024 }, {
7025 'url': 'https://www.youtube.com/results?search_query=%23cats',
7026 'playlist_mincount': 1,
7027 'info_dict': {
7028 'id': '#cats',
7029 'title': '#cats',
12a1b225
A
7030 # The test suite does not have support for nested playlists
7031 # 'entries': [{
7032 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7033 # 'title': '#cats',
7034 # }],
ad210f4f 7035 },
c7335551
M
7036 }, {
7037 # Channel results
7038 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7039 'info_dict': {
7040 'id': 'kurzgesagt',
7041 'title': 'kurzgesagt',
7042 },
7043 'playlist': [{
7044 'info_dict': {
7045 '_type': 'url',
7046 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7047 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7048 'ie_key': 'YoutubeTab',
7049 'channel': 'Kurzgesagt – In a Nutshell',
7050 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7051 'title': 'Kurzgesagt – In a Nutshell',
7052 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
14a14335 7053 # No longer available for search as it is set to the handle.
7054 # 'playlist_count': int,
c7335551 7055 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7666b936 7056 'thumbnails': list,
7057 'uploader_id': '@kurzgesagt',
7058 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7059 'uploader': 'Kurzgesagt – In a Nutshell',
8213ce28 7060 'channel_is_verified': True,
14a14335 7061 'channel_follower_count': int,
c7335551
M
7062 }
7063 }],
7064 'params': {'extract_flat': True, 'playlist_items': '1'},
7065 'playlist_mincount': 1,
3462ffa8 7066 }, {
7067 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7068 'only_matching': True,
7069 }]
7070
7071 def _real_extract(self, url):
4dfbf869 7072 qs = parse_qs(url)
386e1dd9 7073 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 7074 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 7075
7076
16aa9ea4 7077class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 7078 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 7079 IE_NAME = 'youtube:music:search_url'
7080 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7081 _TESTS = [{
7082 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7083 'playlist_count': 16,
7084 'info_dict': {
7085 'id': 'royalty free music',
7086 'title': 'royalty free music',
7087 }
7088 }, {
7089 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7090 'playlist_mincount': 30,
7091 'info_dict': {
7092 'id': 'royalty free music - songs',
7093 'title': 'royalty free music - songs',
7094 },
7095 'params': {'extract_flat': 'in_playlist'}
7096 }, {
7097 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7098 'playlist_mincount': 30,
7099 'info_dict': {
7100 'id': 'royalty free music - community playlists',
7101 'title': 'royalty free music - community playlists',
7102 },
7103 'params': {'extract_flat': 'in_playlist'}
7104 }]
7105
7106 _SECTIONS = {
7107 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7108 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7109 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7110 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7111 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7112 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7113 }
7114
7115 def _real_extract(self, url):
7116 qs = parse_qs(url)
7117 query = (qs.get('search_query') or qs.get('q'))[0]
7118 params = qs.get('sp', (None,))[0]
7119 if params:
7120 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7121 else:
ac668111 7122 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 7123 params = self._SECTIONS.get(section)
7124 if not params:
7125 section = None
7126 title = join_nonempty(query, section, delim=' - ')
af5c1c55 7127 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 7128
7129
182bda88 7130class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 7131 """
25f14e9f 7132 Base class for feed extractors
82d02080 7133 Subclasses must re-define the _FEED_NAME property.
d7ae0639 7134 """
b2e8bc1b 7135 _LOGIN_REQUIRED = True
82d02080 7136 _FEED_NAME = 'feeds'
a25bca9f 7137
7138 def _real_initialize(self):
7139 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 7140
82d02080 7141 @classproperty
d7ae0639 7142 def IE_NAME(self):
82d02080 7143 return f'youtube:{self._FEED_NAME}'
04cc9617 7144
3853309f 7145 def _real_extract(self, url):
3d3dddc9 7146 return self.url_result(
182bda88 7147 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
7148
7149
ef2f3c7f 7150class YoutubeWatchLaterIE(InfoExtractor):
7151 IE_NAME = 'youtube:watchlater'
96565c7e 7152 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 7153 _VALID_URL = r':ytwatchlater'
bc7a9cd8 7154 _TESTS = [{
8bdd16b4 7155 'url': ':ytwatchlater',
bc7a9cd8
S
7156 'only_matching': True,
7157 }]
25f14e9f
S
7158
7159 def _real_extract(self, url):
ef2f3c7f 7160 return self.url_result(
7161 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 7162
7163
25f14e9f 7164class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 7165 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 7166 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 7167 _FEED_NAME = 'recommended'
45db527f 7168 _LOGIN_REQUIRED = False
3d3dddc9 7169 _TESTS = [{
7170 'url': ':ytrec',
7171 'only_matching': True,
7172 }, {
7173 'url': ':ytrecommended',
7174 'only_matching': True,
7175 }, {
7176 'url': 'https://youtube.com',
7177 'only_matching': True,
7178 }]
1ed5b5c9 7179
1ed5b5c9 7180
25f14e9f 7181class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 7182 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 7183 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 7184 _FEED_NAME = 'subscriptions'
3d3dddc9 7185 _TESTS = [{
7186 'url': ':ytsubs',
7187 'only_matching': True,
7188 }, {
7189 'url': ':ytsubscriptions',
7190 'only_matching': True,
7191 }]
1ed5b5c9 7192
1ed5b5c9 7193
25f14e9f 7194class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 7195 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 7196 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 7197 _FEED_NAME = 'history'
3d3dddc9 7198 _TESTS = [{
7199 'url': ':ythistory',
7200 'only_matching': True,
7201 }]
1ed5b5c9
JMF
7202
7203
80eb0bd9 7204class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 7205 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 7206 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 7207 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 7208 _TESTS = [{
1dd18a88 7209 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 7210 'only_matching': True,
7211 }]
7212
7213 @staticmethod
7214 def _generate_audio_pivot_params(video_id):
7215 """
7216 Generates sfv_audio_pivot browse params for this video id
7217 """
7218 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7219 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7220
7221 def _real_extract(self, url):
7222 video_id = self._match_id(url)
7223 return self.url_result(
7224 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7225 ie=YoutubeTabIE)
7226
7227
15870e90
PH
7228class YoutubeTruncatedURLIE(InfoExtractor):
7229 IE_NAME = 'youtube:truncated_url'
7230 IE_DESC = False # Do not list
975d35db 7231 _VALID_URL = r'''(?x)
b95aab84
PH
7232 (?:https?://)?
7233 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7234 (?:watch\?(?:
c4808c60 7235 feature=[a-z_]+|
b95aab84
PH
7236 annotation_id=annotation_[^&]+|
7237 x-yt-cl=[0-9]+|
c1708b89 7238 hl=[^&]*|
287be8c6 7239 t=[0-9]+
b95aab84
PH
7240 )?
7241 |
7242 attribution_link\?a=[^&]+
7243 )
7244 $
975d35db 7245 '''
15870e90 7246
c4808c60 7247 _TESTS = [{
2d3d2997 7248 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 7249 'only_matching': True,
dc2fc736 7250 }, {
2d3d2997 7251 'url': 'https://www.youtube.com/watch?',
dc2fc736 7252 'only_matching': True,
b95aab84
PH
7253 }, {
7254 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7255 'only_matching': True,
7256 }, {
7257 'url': 'https://www.youtube.com/watch?feature=foo',
7258 'only_matching': True,
c1708b89
PH
7259 }, {
7260 'url': 'https://www.youtube.com/watch?hl=en-GB',
7261 'only_matching': True,
287be8c6
PH
7262 }, {
7263 'url': 'https://www.youtube.com/watch?t=2372',
7264 'only_matching': True,
c4808c60
PH
7265 }]
7266
15870e90
PH
7267 def _real_extract(self, url):
7268 raise ExtractorError(
78caa52a
PH
7269 'Did you forget to quote the URL? Remember that & is a meta '
7270 'character in most shells, so you want to put the URL in quotes, '
3867038a 7271 'like youtube-dl '
2d3d2997 7272 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 7273 ' or simply youtube-dl BaW_jenozKc .',
15870e90 7274 expected=True)
772fd5cc
PH
7275
7276
471d0367 7277class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 7278 IE_NAME = 'youtube:clip'
471d0367 7279 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7280 _TESTS = [{
7281 # FIXME: Other metadata should be extracted from the clip, not from the base video
7282 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7283 'info_dict': {
7284 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7285 'ext': 'mp4',
7286 'section_start': 29.0,
7287 'section_end': 39.7,
7288 'duration': 10.7,
12a1b225
A
7289 'age_limit': 0,
7290 'availability': 'public',
7291 'categories': ['Gaming'],
7292 'channel': 'Scott The Woz',
7293 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7294 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7295 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7296 'like_count': int,
7297 'playable_in_embed': True,
7298 'tags': 'count:17',
7299 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7300 'title': 'Mobile Games on Console - Scott The Woz',
7301 'upload_date': '20210920',
7302 'uploader': 'Scott The Woz',
7666b936 7303 'uploader_id': '@ScottTheWoz',
7304 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
12a1b225
A
7305 'view_count': int,
7306 'live_status': 'not_live',
7666b936 7307 'channel_follower_count': int,
7308 'chapters': 'count:20',
14a14335 7309 'comment_count': int,
7310 'heatmap': 'count:100',
471d0367 7311 }
7312 }]
3cd786db 7313
7314 def _real_extract(self, url):
471d0367 7315 clip_id = self._match_id(url)
7316 _, data = self._extract_webpage(url, clip_id)
7317
7318 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7319 if not video_id:
7320 raise ExtractorError('Unable to find video ID')
7321
7322 clip_data = traverse_obj(data, (
7323 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7324 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7325 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7326 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7327
7328 return {
7329 '_type': 'url_transparent',
7330 'url': f'https://www.youtube.com/watch?v={video_id}',
7331 'ie_key': YoutubeIE.ie_key(),
7332 'id': clip_id,
7333 'section_start': int(clip_data['startTimeMs']) / 1000,
7334 'section_end': int(clip_data['endTimeMs']) / 1000,
7335 }
3cd786db 7336
7337
b032ff0f 7338class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7339 IE_NAME = 'youtube:consent'
7340 IE_DESC = False # Do not list
7341 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7342 _TESTS = [{
7343 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7344 'info_dict': {
7345 'id': 'qVv6vCqciTM',
7346 'ext': 'mp4',
7347 'age_limit': 0,
7666b936 7348 'uploader_id': '@sana_natori',
b032ff0f 7349 'comment_count': int,
7350 'chapters': 'count:13',
7351 'upload_date': '20221223',
7352 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7353 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7666b936 7354 'uploader_url': 'https://www.youtube.com/@sana_natori',
b032ff0f 7355 'like_count': int,
7356 'release_date': '20221223',
7357 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7358 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7359 'view_count': int,
7360 'playable_in_embed': True,
7361 'duration': 4438,
7362 'availability': 'public',
7363 'channel_follower_count': int,
7364 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7365 'categories': ['Entertainment'],
7366 'live_status': 'was_live',
7367 'release_timestamp': 1671793345,
7368 'channel': 'さなちゃんねる',
7369 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7370 'uploader': 'さなちゃんねる',
8213ce28 7371 'channel_is_verified': True,
14a14335 7372 'heatmap': 'count:100',
b032ff0f 7373 },
7374 'add_ie': ['Youtube'],
7375 'params': {'skip_download': 'Youtube'},
7376 }]
7377
7378 def _real_extract(self, url):
7379 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7380 if not redirect_url:
7381 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7382 return self.url_result(redirect_url)
7383
7384
772fd5cc
PH
7385class YoutubeTruncatedIDIE(InfoExtractor):
7386 IE_NAME = 'youtube:truncated_id'
7387 IE_DESC = False # Do not list
b95aab84 7388 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
7389
7390 _TESTS = [{
7391 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7392 'only_matching': True,
7393 }]
7394
7395 def _real_extract(self, url):
7396 video_id = self._match_id(url)
7397 raise ExtractorError(
86e5f3ed 7398 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7399 expected=True)