]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/rozhlas] `MujRozhlas`: Add extractor (#7129)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
fe93e2c4 5import datetime
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
46383212 14import sys
f8271158 15import threading
8a784c74 16import time
e0df6211 17import traceback
14f25df2 18import urllib.error
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
4bb4a188 25from ..utils import (
f8271158 26 NO_DEFAULT,
27 ExtractorError,
4d37720a 28 LazyList,
693f0600 29 UserNotLive,
720c3099 30 bug_reports_message,
82d02080 31 classproperty,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
7a32c70d 35 filter_dict,
2d30521a 36 float_or_none,
11f9be09 37 format_field,
ff91cf74 38 get_first,
dd27fd17 39 int_or_none,
641ad5d8 40 is_html,
34921b43 41 join_nonempty,
48416bc4 42 js_to_json,
94278f72 43 mimetype2ext,
9c0d7f49 44 network_exceptions,
11f9be09 45 orderedSet,
6310acf5 46 parse_codecs,
49bd8c66 47 parse_count,
7c80519c 48 parse_duration,
7ea65411 49 parse_iso8601,
4dfbf869 50 parse_qs,
dca3ff4a 51 qualities,
3995d37d 52 remove_start,
cf7e015f 53 smuggle_url,
dbdaaa23 54 str_or_none,
c93d53f5 55 str_to_int,
f3aa3c3f 56 strftime_or_none,
7c365c21 57 traverse_obj,
556dbe7f 58 try_get,
c5e8d7af
PH
59 unescapeHTML,
60 unified_strdate,
f0d785d3 61 unified_timestamp,
cf7e015f 62 unsmuggle_url,
8bdd16b4 63 update_url_query,
21c340b8 64 url_or_none,
fe93e2c4 65 urljoin,
7c365c21 66 variadic,
c5e8d7af
PH
67)
68
c795c39f 69STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
962ffcf8 70# any clients starting with _ cannot be explicitly requested by the user
000c15a4 71INNERTUBE_CLIENTS = {
72 'web': {
73 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
74 'INNERTUBE_CONTEXT': {
75 'client': {
76 'clientName': 'WEB',
a0c830f4 77 'clientVersion': '2.20220801.00.00',
000c15a4 78 }
79 },
80 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
81 },
82 'web_embedded': {
83 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
84 'INNERTUBE_CONTEXT': {
85 'client': {
86 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 87 'clientVersion': '1.20220731.00.00',
000c15a4 88 },
89 },
90 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
91 },
92 'web_music': {
93 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
94 'INNERTUBE_HOST': 'music.youtube.com',
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_REMIX',
a0c830f4 98 'clientVersion': '1.20220727.01.00',
000c15a4 99 }
100 },
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
102 },
e7e94f2a 103 'web_creator': {
18c7683d 104 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'WEB_CREATOR',
a0c830f4 108 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
109 }
110 },
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
112 },
000c15a4 113 'android': {
18c7683d 114 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 115 'INNERTUBE_CONTEXT': {
116 'client': {
117 'clientName': 'ANDROID',
50ac0e54 118 'clientVersion': '17.31.35',
119 'androidSdkVersion': 30,
120 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 121 }
122 },
123 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 124 'REQUIRE_JS_PLAYER': False
000c15a4 125 },
126 'android_embedded': {
18c7683d 127 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 128 'INNERTUBE_CONTEXT': {
129 'client': {
130 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 131 'clientVersion': '17.31.35',
132 'androidSdkVersion': 30,
133 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 134 },
135 },
b6de707d 136 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
137 'REQUIRE_JS_PLAYER': False
000c15a4 138 },
139 'android_music': {
18c7683d 140 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 141 'INNERTUBE_CONTEXT': {
142 'client': {
143 'clientName': 'ANDROID_MUSIC',
a0c830f4 144 'clientVersion': '5.16.51',
50ac0e54 145 'androidSdkVersion': 30,
146 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 150 'REQUIRE_JS_PLAYER': False
000c15a4 151 },
e7e94f2a 152 'android_creator': {
18c7683d 153 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
154 'INNERTUBE_CONTEXT': {
155 'client': {
156 'clientName': 'ANDROID_CREATOR',
50ac0e54 157 'clientVersion': '22.30.100',
158 'androidSdkVersion': 30,
159 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
160 },
161 },
b6de707d 162 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
163 'REQUIRE_JS_PLAYER': False
e7e94f2a 164 },
18c7683d 165 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
166 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 167 'ios': {
18c7683d 168 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 169 'INNERTUBE_CONTEXT': {
170 'client': {
171 'clientName': 'IOS',
224b5a35 172 'clientVersion': '17.33.2',
18c7683d 173 'deviceModel': 'iPhone14,3',
224b5a35 174 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 175 }
176 },
b6de707d 177 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
178 'REQUIRE_JS_PLAYER': False
000c15a4 179 },
180 'ios_embedded': {
000c15a4 181 'INNERTUBE_CONTEXT': {
182 'client': {
183 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 184 'clientVersion': '17.33.2',
18c7683d 185 'deviceModel': 'iPhone14,3',
224b5a35 186 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 187 },
188 },
b6de707d 189 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
190 'REQUIRE_JS_PLAYER': False
000c15a4 191 },
192 'ios_music': {
18c7683d 193 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 194 'INNERTUBE_CONTEXT': {
195 'client': {
196 'clientName': 'IOS_MUSIC',
224b5a35
SF
197 'clientVersion': '5.21',
198 'deviceModel': 'iPhone14,3',
199 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 200 },
201 },
b6de707d 202 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
203 'REQUIRE_JS_PLAYER': False
000c15a4 204 },
e7e94f2a
D
205 'ios_creator': {
206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'IOS_CREATOR',
224b5a35
SF
209 'clientVersion': '22.33.101',
210 'deviceModel': 'iPhone14,3',
211 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
212 },
213 },
b6de707d 214 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
215 'REQUIRE_JS_PLAYER': False
e7e94f2a 216 },
3619f78d 217 # mweb has 'ultralow' formats
218 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 219 'mweb': {
18c7683d 220 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 221 'INNERTUBE_CONTEXT': {
222 'client': {
223 'clientName': 'MWEB',
a0c830f4 224 'clientVersion': '2.20220801.00.00',
000c15a4 225 }
226 },
227 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
228 },
229 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
230 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
231 'tv_embedded': {
232 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
233 'INNERTUBE_CONTEXT': {
234 'client': {
235 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
236 'clientVersion': '2.0',
237 },
238 },
239 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
240 },
000c15a4 241}
242
243
e7870111
D
244def _split_innertube_client(client_name):
245 variant, *base = client_name.rsplit('.', 1)
246 if base:
247 return variant, base[0], variant
248 base, *variant = client_name.split('_', 1)
249 return client_name, base, variant[0] if variant else None
250
251
c795c39f
L
252def short_client_name(client_name):
253 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
254 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
255
256
000c15a4 257def build_innertube_clients():
2e4cacd0 258 THIRD_PARTY = {
e7870111 259 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 260 }
e7870111 261 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 262 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 263
264 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 265 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 266 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 267 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 268 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 269
e7870111 270 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 271 ytcfg['priority'] = 10 * priority(base_client)
272
e48b3875 273 if not variant:
e7870111
D
274 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
275 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
276 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
277 embedscreen['priority'] -= 3
278 elif variant == 'embedded':
e48b3875 279 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 280 ytcfg['priority'] -= 2
e48b3875 281 else:
000c15a4 282 ytcfg['priority'] -= 3
283
284
285build_innertube_clients()
286
287
c26f9b99 288class BadgeType(enum.Enum):
289 AVAILABILITY_UNLISTED = enum.auto()
290 AVAILABILITY_PRIVATE = enum.auto()
291 AVAILABILITY_PUBLIC = enum.auto()
292 AVAILABILITY_PREMIUM = enum.auto()
293 AVAILABILITY_SUBSCRIPTION = enum.auto()
294 LIVE_NOW = enum.auto()
295
296
de7f3446 297class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 298 """Provide base functions for Youtube extractors"""
e00eb564 299
3462ffa8 300 _RESERVED_NAMES = (
08e29b9f 301 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 302 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 303 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 304 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 305
3619f78d 306 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
307
52efa4b3 308 # _NETRC_MACHINE = 'youtube'
3619f78d 309
b2e8bc1b
JMF
310 # If True it will raise an error if no login info is provided
311 _LOGIN_REQUIRED = False
312
d9190e44
RH
313 _INVIDIOUS_SITES = (
314 # invidious-redirect websites
315 r'(?:www\.)?redirect\.invidious\.io',
316 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 317 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
318 r'(?:www\.)?invidious\.pussthecat\.org',
319 r'(?:www\.)?invidious\.zee\.li',
320 r'(?:www\.)?invidious\.ethibox\.fr',
05799a48
RH
321 r'(?:www\.)?iv\.ggtyler\.dev',
322 r'(?:www\.)?inv\.vern\.i2p',
323 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
324 r'(?:www\.)?inv\.riverside\.rocks',
325 r'(?:www\.)?invidious\.silur\.me',
326 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
327 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
328 r'(?:www\.)?invidious\.slipfox\.xyz',
329 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
330 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
331 r'(?:www\.)?invidious\.tiekoetter\.com',
332 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
333 r'(?:www\.)?invidious\.nerdvpn\.de',
334 r'(?:www\.)?invidious\.weblibre\.org',
335 r'(?:www\.)?inv\.odyssey346\.dev',
336 r'(?:www\.)?invidious\.dhusch\.de',
337 r'(?:www\.)?iv\.melmac\.space',
338 r'(?:www\.)?watch\.thekitty\.zone',
339 r'(?:www\.)?invidious\.privacydev\.net',
340 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
341 r'(?:www\.)?invidious\.drivet\.xyz',
342 r'(?:www\.)?vid\.priv\.au',
343 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
344 r'(?:www\.)?inv\.vern\.cc',
345 r'(?:www\.)?invidious\.esmailelbob\.xyz',
346 r'(?:www\.)?invidious\.sethforprivacy\.com',
347 r'(?:www\.)?yt\.oelrichsgarcia\.de',
348 r'(?:www\.)?yt\.artemislena\.eu',
349 r'(?:www\.)?invidious\.flokinet\.to',
350 r'(?:www\.)?invidious\.baczek\.me',
351 r'(?:www\.)?y\.com\.sb',
352 r'(?:www\.)?invidious\.epicsite\.xyz',
353 r'(?:www\.)?invidious\.lidarshield\.cloud',
354 r'(?:www\.)?yt\.funami\.tech',
d9190e44 355 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
356 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
357 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
358 # youtube-dl invidious instances list
359 r'(?:(?:www|no)\.)?invidiou\.sh',
360 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
361 r'(?:www\.)?invidious\.kabi\.tk',
362 r'(?:www\.)?invidious\.mastodon\.host',
363 r'(?:www\.)?invidious\.zapashcanon\.fr',
364 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
365 r'(?:www\.)?invidious\.tinfoil-hat\.net',
366 r'(?:www\.)?invidious\.himiko\.cloud',
367 r'(?:www\.)?invidious\.reallyancient\.tech',
368 r'(?:www\.)?invidious\.tube',
369 r'(?:www\.)?invidiou\.site',
370 r'(?:www\.)?invidious\.site',
371 r'(?:www\.)?invidious\.xyz',
372 r'(?:www\.)?invidious\.nixnet\.xyz',
373 r'(?:www\.)?invidious\.048596\.xyz',
374 r'(?:www\.)?invidious\.drycat\.fr',
375 r'(?:www\.)?inv\.skyn3t\.in',
376 r'(?:www\.)?tube\.poal\.co',
377 r'(?:www\.)?tube\.connect\.cafe',
378 r'(?:www\.)?vid\.wxzm\.sx',
379 r'(?:www\.)?vid\.mint\.lgbt',
380 r'(?:www\.)?vid\.puffyan\.us',
381 r'(?:www\.)?yewtu\.be',
382 r'(?:www\.)?yt\.elukerio\.org',
383 r'(?:www\.)?yt\.lelux\.fi',
384 r'(?:www\.)?invidious\.ggc-project\.de',
385 r'(?:www\.)?yt\.maisputain\.ovh',
386 r'(?:www\.)?ytprivate\.com',
387 r'(?:www\.)?invidious\.13ad\.de',
388 r'(?:www\.)?invidious\.toot\.koeln',
389 r'(?:www\.)?invidious\.fdn\.fr',
390 r'(?:www\.)?watch\.nettohikari\.com',
391 r'(?:www\.)?invidious\.namazso\.eu',
392 r'(?:www\.)?invidious\.silkky\.cloud',
393 r'(?:www\.)?invidious\.exonip\.de',
394 r'(?:www\.)?invidious\.riverside\.rocks',
395 r'(?:www\.)?invidious\.blamefran\.net',
396 r'(?:www\.)?invidious\.moomoo\.de',
397 r'(?:www\.)?ytb\.trom\.tf',
398 r'(?:www\.)?yt\.cyberhost\.uk',
399 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
400 r'(?:www\.)?qklhadlycap4cnod\.onion',
401 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
402 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
403 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
404 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
405 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
406 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
407 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
408 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
409 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
410 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
411 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
412 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 413 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 414 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 415 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
416 r'(?:www\.)?watch\.whatever\.social',
417 r'(?:www\.)?piped\.garudalinux\.org',
418 r'(?:www\.)?piped\.rivo\.lol',
419 r'(?:www\.)?piped-libre\.kavin\.rocks',
420 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 421 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
422 r'(?:www\.)?il\.ax',
423 r'(?:www\.)?piped\.esmailelbob\.xyz',
424 r'(?:www\.)?piped\.projectsegfau\.lt',
425 r'(?:www\.)?piped\.privacydev\.net',
426 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
427 r'(?:www\.)?piped\.smnz\.de',
428 r'(?:www\.)?piped\.adminforge\.de',
429 r'(?:www\.)?watch\.whatevertinfoil\.de',
430 r'(?:www\.)?piped\.qdi\.fi',
bc87dac7
B
431 r'(?:www\.)?piped\.video',
432 r'(?:www\.)?piped\.aeong\.one',
05799a48
RH
433 r'(?:www\.)?piped\.moomoo\.me',
434 r'(?:www\.)?piped\.chauvet\.pro',
435 r'(?:www\.)?watch\.leptons\.xyz',
436 r'(?:www\.)?pd\.vern\.cc',
437 r'(?:www\.)?piped\.hostux\.net',
438 r'(?:www\.)?piped\.lunar\.icu',
78a78fa7
BG
439 # Hyperpipe instances from https://hyperpipe.codeberg.page/
440 r'(?:www\.)?hyperpipe\.surge\.sh',
441 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
442 r'(?:www\.)?listen\.whatever\.social',
443 r'(?:www\.)?music\.adminforge\.de',
d9190e44
RH
444 )
445
c26f9b99 446 # extracted from account/account_menu ep
447 # XXX: These are the supported YouTube UI and API languages,
448 # which is slightly different from languages supported for translation in YouTube studio
449 _SUPPORTED_LANG_CODES = [
450 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
451 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
452 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
453 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
454 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
455 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
456 ]
457
a057779d 458 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
459
7666b936 460 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
461 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
462
463 def ucid_or_none(self, ucid):
464 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
465
466 def handle_or_none(self, handle):
467 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
468
469 def handle_from_url(self, url):
470 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
471 url, 'channel handle', default=None)
472
473 def ucid_from_url(self, url):
474 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
475 url, 'channel id', default=None)
476
c26f9b99 477 @functools.cached_property
478 def _preferred_lang(self):
479 """
480 Returns a language code supported by YouTube for the user preferred language.
481 Returns None if no preferred language set.
482 """
483 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
484 if not preferred_lang:
485 return
486 if preferred_lang not in self._SUPPORTED_LANG_CODES:
487 raise ExtractorError(
488 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
489 expected=True)
490 elif preferred_lang != 'en':
491 self.report_warning(
492 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
493 return preferred_lang
494
cce889b9 495 def _initialize_consent(self):
496 cookies = self._get_cookies('https://www.youtube.com/')
497 if cookies.get('__Secure-3PSID'):
498 return
499 consent_id = None
500 consent = cookies.get('CONSENT')
501 if consent:
502 if 'YES' in consent.value:
503 return
504 consent_id = self._search_regex(
505 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
506 if not consent_id:
507 consent_id = random.randint(100, 999)
508 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 509
f3aa3c3f 510 def _initialize_pref(self):
511 cookies = self._get_cookies('https://www.youtube.com/')
512 pref_cookie = cookies.get('PREF')
513 pref = {}
514 if pref_cookie:
515 try:
14f25df2 516 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 517 except ValueError:
518 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 519 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 520 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 521
b2e8bc1b 522 def _real_initialize(self):
f3aa3c3f 523 self._initialize_pref()
cce889b9 524 self._initialize_consent()
a25bca9f 525 self._check_login_required()
526
527 def _check_login_required(self):
24146491 528 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 529 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 530
b7c47b74 531 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
532 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 533
000c15a4 534 def _get_default_ytcfg(self, client='web'):
535 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 536
000c15a4 537 def _get_innertube_host(self, client='web'):
538 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 539
000c15a4 540 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 541 # try_get but with fallback to default ytcfg client values when present
542 _func = lambda y: try_get(y, getter, expected_type)
543 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
544
000c15a4 545 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 546 return self._ytcfg_get_safe(
547 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 548 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 549
000c15a4 550 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 551 return self._ytcfg_get_safe(
552 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 553 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 554
2ae778b8 555 def _select_api_hostname(self, req_api_hostname, default_client=None):
556 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
557 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
558
000c15a4 559 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 560 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 561
000c15a4 562 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 563 context = get_first(
564 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 565 # Enforce language and tz for extraction
566 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 567 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 568 return context
569
cf87314d 570 _SAPISID = None
571
109dd3b2 572 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 573 time_now = round(time.time())
cf87314d 574 if self._SAPISID is None:
575 yt_cookies = self._get_cookies('https://www.youtube.com')
576 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
577 # See: https://github.com/yt-dlp/yt-dlp/issues/393
578 sapisid_cookie = dict_get(
579 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
580 if sapisid_cookie and sapisid_cookie.value:
581 self._SAPISID = sapisid_cookie.value
582 self.write_debug('Extracted SAPISID cookie')
583 # SAPISID cookie is required if not already present
584 if not yt_cookies.get('SAPISID'):
585 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
586 self._set_cookie(
587 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
588 else:
589 self._SAPISID = False
590 if not self._SAPISID:
591 return None
1974e99f 592 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
593 sapisidhash = hashlib.sha1(
86e5f3ed 594 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 595 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
596
597 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 598 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 599 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 600
109dd3b2 601 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 602 data.update(query)
11f9be09 603 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 604 real_headers.update({'content-type': 'application/json'})
605 if headers:
606 real_headers.update(headers)
2ae778b8 607 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
608 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 609 return self._download_json(
2ae778b8 610 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 611 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 612 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 613 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 614
65141660 615 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
616 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 617
99e9e001 618 @staticmethod
619 def _extract_session_index(*data):
620 """
621 Index of current account in account list.
622 See: https://github.com/yt-dlp/yt-dlp/pull/519
623 """
624 for ytcfg in data:
625 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
626 if session_index is not None:
627 return session_index
628
629 # Deprecated?
630 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 631 if ytcfg:
14f25df2 632 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
633 if token:
634 return token
99e9e001 635 if webpage:
636 return self._search_regex(
637 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
638 'identity token', default=None, fatal=False)
a1c5d2ca
M
639
640 @staticmethod
fe93e2c4 641 def _extract_account_syncid(*args):
8ea3f7b9 642 """
643 Extract syncId required to download private playlists of secondary channels
fe93e2c4 644 @params response and/or ytcfg
8ea3f7b9 645 """
fe93e2c4 646 for data in args:
647 # ytcfg includes channel_syncid if on secondary channel
14f25df2 648 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 649 if delegated_sid:
650 return delegated_sid
651 sync_ids = (try_get(
652 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 653 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 654 if len(sync_ids) >= 2 and sync_ids[1]:
655 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
656 # and just "user_syncid||" for primary channel. We only want the channel_syncid
657 return sync_ids[0]
a1c5d2ca 658
ac56cf38 659 @staticmethod
660 def _extract_visitor_data(*args):
661 """
662 Extracts visitorData from an API response or ytcfg
663 Appears to be used to track session state
664 """
9222c381 665 return get_first(
6c73052c 666 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 667 expected_type=str)
ac56cf38 668
2762dbb1 669 @functools.cached_property
99e9e001 670 def is_authenticated(self):
671 return bool(self._generate_sapisidhash_header())
672
11f9be09 673 def extract_ytcfg(self, video_id, webpage):
8c54a305 674 if not webpage:
675 return {}
29f7c58a 676 return self._parse_json(
677 self._search_regex(
678 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 679 default='{}'), video_id, fatal=False) or {}
680
11f9be09 681 def generate_api_headers(
99e9e001 682 self, *, ytcfg=None, account_syncid=None, session_index=None,
683 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
684
2ae778b8 685 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 686 headers = {
14f25df2 687 'X-YouTube-Client-Name': str(
11f9be09 688 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
689 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 690 'Origin': origin,
691 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
692 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 693 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
694 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 695 }
696 if session_index is None:
314ee305 697 session_index = self._extract_session_index(ytcfg)
698 if account_syncid or session_index is not None:
699 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 700
109dd3b2 701 auth = self._generate_sapisidhash_header(origin)
f4f751af 702 if auth is not None:
703 headers['Authorization'] = auth
109dd3b2 704 headers['X-Origin'] = origin
7a32c70d 705 return filter_dict(headers)
29f7c58a 706
a25bca9f 707 def _download_ytcfg(self, client, video_id):
708 url = {
709 'web': 'https://www.youtube.com',
710 'web_music': 'https://music.youtube.com',
711 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
712 }.get(client)
713 if not url:
714 return {}
715 webpage = self._download_webpage(
716 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
717 return self.extract_ytcfg(video_id, webpage) or {}
718
2d6659b9 719 @staticmethod
720 def _build_api_continuation_query(continuation, ctp=None):
721 query = {
722 'continuation': continuation
723 }
724 # TODO: Inconsistency with clickTrackingParams.
725 # Currently we have a fixed ctp contained within context (from ytcfg)
726 # and a ctp in root query for continuation.
727 if ctp:
728 query['clickTracking'] = {'clickTrackingParams': ctp}
729 return query
730
2d6659b9 731 @classmethod
732 def _extract_next_continuation_data(cls, renderer):
733 next_continuation = try_get(
734 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
735 lambda x: x['continuation']['reloadContinuationData']), dict)
736 if not next_continuation:
737 return
738 continuation = next_continuation.get('continuation')
739 if not continuation:
740 return
741 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 742 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 743
744 @classmethod
745 def _extract_continuation_ep_data(cls, continuation_ep: dict):
746 if isinstance(continuation_ep, dict):
747 continuation = try_get(
14f25df2 748 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 749 if not continuation:
750 return
751 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 752 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 753
754 @classmethod
755 def _extract_continuation(cls, renderer):
756 next_continuation = cls._extract_next_continuation_data(renderer)
757 if next_continuation:
758 return next_continuation
fe93e2c4 759
7a32c70d 760 return traverse_obj(renderer, (
761 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
762 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
763 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 764
fe93e2c4 765 @classmethod
766 def _extract_alerts(cls, data):
109dd3b2 767 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
768 if not isinstance(alert_dict, dict):
769 continue
770 for alert in alert_dict.values():
771 alert_type = alert.get('type')
772 if not alert_type:
773 continue
052e1350 774 message = cls._get_text(alert, 'text')
109dd3b2 775 if message:
776 yield alert_type, message
777
c0ac49bc 778 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 779 errors, warnings = [], []
109dd3b2 780 for alert_type, alert_message in alerts:
641ad5d8 781 if alert_type.lower() == 'error' and fatal:
109dd3b2 782 errors.append([alert_type, alert_message])
a057779d 783 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 784 warnings.append([alert_type, alert_message])
785
786 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 787 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 788 if errors:
789 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
790
791 def _extract_and_report_alerts(self, data, *args, **kwargs):
792 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
793
47193e02 794 def _extract_badges(self, renderer: dict):
c26f9b99 795 privacy_icon_map = {
796 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
797 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
798 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
799 }
800
801 badge_style_map = {
802 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
803 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
804 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
805 }
806
807 label_map = {
808 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
809 'private': BadgeType.AVAILABILITY_PRIVATE,
810 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
811 'live': BadgeType.LIVE_NOW,
812 'premium': BadgeType.AVAILABILITY_PREMIUM
813 }
814
815 badges = []
6839ae1f 816 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
c26f9b99 817 badge_type = (
818 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
819 or badge_style_map.get(traverse_obj(badge, 'style'))
820 )
821 if badge_type:
822 badges.append({'type': badge_type})
823 continue
824
825 # fallback, won't work in some languages
826 label = traverse_obj(badge, 'label', expected_type=str, default='')
827 for match, label_badge_type in label_map.items():
828 if match in label.lower():
829 badges.append({'type': badge_type})
830 continue
831
47193e02 832 return badges
833
c26f9b99 834 @staticmethod
835 def _has_badge(badges, badge_type):
836 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
837
47193e02 838 @staticmethod
052e1350 839 def _get_text(data, *path_list, max_runs=None):
840 for path in path_list or [None]:
841 if path is None:
842 obj = [data]
843 else:
844 obj = traverse_obj(data, path, default=[])
845 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
846 obj = [obj]
847 for item in obj:
14f25df2 848 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 849 if text:
850 return text
851 runs = try_get(item, lambda x: x['runs'], list) or []
852 if not runs and isinstance(item, list):
853 runs = item
854
855 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 856 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 857 if text:
858 return text
47193e02 859
f0d785d3 860 def _get_count(self, data, *path_list):
861 count_text = self._get_text(data, *path_list) or ''
862 count = parse_count(count_text)
863 if count is None:
864 count = str_to_int(
865 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
866 return count
867
a709d873 868 @staticmethod
869 def _extract_thumbnails(data, *path_list):
870 """
871 Extract thumbnails from thumbnails dict
872 @param path_list: path list to level that contains 'thumbnails' key
873 """
874 thumbnails = []
875 for path in path_list or [()]:
6839ae1f 876 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
a709d873 877 thumbnail_url = url_or_none(thumbnail.get('url'))
878 if not thumbnail_url:
879 continue
880 # Sometimes youtube gives a wrong thumbnail URL. See:
881 # https://github.com/yt-dlp/yt-dlp/issues/233
882 # https://github.com/ytdl-org/youtube-dl/issues/28023
883 if 'maxresdefault' in thumbnail_url:
884 thumbnail_url = thumbnail_url.split('?')[0]
885 thumbnails.append({
886 'url': thumbnail_url,
887 'height': int_or_none(thumbnail.get('height')),
888 'width': int_or_none(thumbnail.get('width')),
889 })
890 return thumbnails
891
f3aa3c3f 892 @staticmethod
893 def extract_relative_time(relative_time_text):
894 """
895 Extracts a relative time from string and converts to dt object
2fb35f60 896 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
f3aa3c3f 897 """
2fb35f60 898
899 # XXX: this could be moved to a general function in utils.py
900 # The relative time text strings are roughly the same as what
901 # Javascript's Intl.RelativeTimeFormat function generates.
902 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
903 mobj = re.search(
904 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
905 relative_time_text)
f3aa3c3f 906 if mobj:
f0d785d3 907 start = mobj.group('start')
908 if start:
909 return datetime_from_str(start)
f3aa3c3f 910 try:
f0d785d3 911 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 912 except ValueError:
913 return None
914
c26f9b99 915 def _parse_time_text(self, text):
916 if not text:
917 return
f3aa3c3f 918 dt = self.extract_relative_time(text)
919 timestamp = None
920 if isinstance(dt, datetime.datetime):
921 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 922
923 if timestamp is None:
924 timestamp = (
925 unified_timestamp(text) or unified_timestamp(
926 self._search_regex(
17322130 927 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 928 text.lower(), 'time text', default=None)))
f0d785d3 929
c26f9b99 930 if text and timestamp is None and self._preferred_lang in (None, 'en'):
931 self.report_warning(
932 f'Cannot parse localized time text "{text}"', only_once=True)
933 return timestamp
f3aa3c3f 934
109dd3b2 935 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
936 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 937 default_client='web'):
be5c1ae8 938 for retry in self.RetryManager():
109dd3b2 939 try:
940 response = self._call_api(
941 ep=ep, fatal=True, headers=headers,
be5c1ae8 942 video_id=item_id, query=query, note=note,
109dd3b2 943 context=self._extract_context(ytcfg, default_client),
944 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 945 api_hostname=api_hostname, default_client=default_client)
109dd3b2 946 except ExtractorError as e:
be5c1ae8 947 if not isinstance(e.cause, network_exceptions):
948 return self._error_or_warning(e, fatal=fatal)
949 elif not isinstance(e.cause, urllib.error.HTTPError):
950 retry.error = e
951 continue
109dd3b2 952
be5c1ae8 953 first_bytes = e.cause.read(512)
954 if not is_html(first_bytes):
955 yt_error = try_get(
956 self._parse_json(
957 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
958 lambda x: x['error']['message'], str)
959 if yt_error:
960 self._report_alerts([('ERROR', yt_error)], fatal=False)
961 # Downloading page may result in intermittent 5xx HTTP error
962 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
963 # We also want to catch all other network exceptions since errors in later pages can be troublesome
964 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
965 if e.cause.code not in (403, 429):
966 retry.error = e
967 continue
968 return self._error_or_warning(e, fatal=fatal)
969
970 try:
971 self._extract_and_report_alerts(response, only_once=True)
972 except ExtractorError as e:
973 # YouTube servers may return errors we want to retry on in a 200 OK response
974 # See: https://github.com/yt-dlp/yt-dlp/issues/839
975 if 'unknown error' in e.msg.lower():
976 retry.error = e
977 continue
978 return self._error_or_warning(e, fatal=fatal)
979 # Youtube sometimes sends incomplete data
980 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
981 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 982 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 983 continue
984
985 return response
109dd3b2 986
9297939e 987 @staticmethod
988 def is_music_url(url):
5b28cef7 989 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
9297939e 990
30a074c2 991 def _extract_video(self, renderer):
992 video_id = renderer.get('videoId')
4dc23a80
M
993
994 reel_header_renderer = traverse_obj(renderer, (
995 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
996 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
997
998 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 999 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
1000
1001 duration = int_or_none(renderer.get('lengthSeconds'))
1002 if duration is None:
1003 duration = parse_duration(self._get_text(
1004 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 1005 if duration is None:
4dc23a80 1006 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 1007 duration = parse_duration(self._search_regex(
1008 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1009 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1010 video_id, default=None, group='duration'))
1011
f3aa3c3f 1012 channel_id = traverse_obj(
a44ca5a4 1013 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1014 expected_type=str, get_all=False)
4dc23a80
M
1015 if not channel_id:
1016 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1017
7666b936 1018 channel_id = self.ucid_or_none(channel_id)
1019
f3aa3c3f 1020 overlay_style = traverse_obj(
a44ca5a4 1021 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1022 get_all=False, expected_type=str)
f3aa3c3f 1023 badges = self._extract_badges(renderer)
4dc23a80 1024
fd2ad7cb 1025 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 1026 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1027 expected_type=str)) or ''
fd2ad7cb 1028 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 1029 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 1030 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 1031
4dc23a80
M
1032 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1033 or self._get_text(reel_header_renderer, 'timestampText') or '')
1034 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1035
867c66ff
M
1036 live_status = (
1037 'is_upcoming' if scheduled_timestamp is not None
1038 else 'was_live' if 'streamed' in time_text.lower()
1039 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1040 else None)
1041
4dc23a80
M
1042 # videoInfo is a string like '50K views • 10 years ago'.
1043 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1044 view_count = (0 if 'no views' in view_count_text.lower()
1045 else self._get_count({'simpleText': view_count_text}))
1046 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1047
93e12ed7 1048 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1049 or self._get_text(reel_header_renderer, 'channelTitleText'))
1050
1051 channel_handle = traverse_obj(renderer, (
1052 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1053 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1054 expected_type=self.handle_from_url, get_all=False)
30a074c2 1055 return {
39ed931e 1056 '_type': 'url',
30a074c2 1057 'ie_key': YoutubeIE.ie_key(),
1058 'id': video_id,
fd2ad7cb 1059 'url': url,
30a074c2 1060 'title': title,
1061 'description': description,
1062 'duration': duration,
f3aa3c3f 1063 'channel_id': channel_id,
93e12ed7 1064 'channel': channel,
4dc23a80 1065 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
93e12ed7 1066 'uploader': channel,
1067 'uploader_id': channel_handle,
1068 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4dc23a80 1069 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 1070 'timestamp': (self._parse_time_text(time_text)
1071 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1072 else None),
f3aa3c3f 1073 'release_timestamp': scheduled_timestamp,
c26f9b99 1074 'availability':
1075 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1076 else self._availability(
1077 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1078 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1079 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 1080 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 1081 view_count_field: view_count,
e63faa10 1082 'live_status': live_status
30a074c2 1083 }
1084
0c148415 1085
360e1ca5 1086class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1087 IE_DESC = 'YouTube'
cb7dfeea 1088 _VALID_URL = r"""(?x)^
c5e8d7af 1089 (
edb53e2d 1090 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1091 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1092 (?:www\.)?deturl\.com/www\.youtube\.com|
1093 (?:www\.)?pwnyoutube\.com|
1094 (?:www\.)?hooktube\.com|
1095 (?:www\.)?yourepeat\.com|
1096 tube\.majestyc\.net|
1097 %(invidious)s|
1098 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1099 (?:.*?\#/)? # handle anchor (#/) redirect urls
1100 (?: # the various things that can precede the ID:
dad2210c 1101 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1102 |(?: # or the v= param in all its forms
f7000f3a 1103 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1104 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1105 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1106 v=
1107 )
f4b05232 1108 ))
cbaed4bb
S
1109 |(?:
1110 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1111 vid\.plus| # or vid.plus/xxxx
1112 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1113 %(invidious)s
cbaed4bb 1114 )/
edb53e2d 1115 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1116 )
c5e8d7af 1117 )? # all until now is optional -> you can pass the naked ID
201c1459 1118 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1119 (?(1).+)? # if we found the ID, everything can follow
9297939e 1120 (?:\#|$)""" % {
d9190e44 1121 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1122 }
7c6eb424 1123 _EMBED_REGEX = [
1124 r'''(?x)
1125 (?:
0ca0f881 1126 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1127 data-video-url=|
1128 <embed[^>]+?src=|
1129 embedSWF\(?:\s*|
1130 <object[^>]+data=|
1131 new\s+SWFObject\(
1132 )
1133 (["\'])
1134 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1135 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1136 \1''',
1137 # https://wordpress.org/plugins/lazy-load-for-videos/
1138 r'''(?xs)
1139 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1140 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1141 ]
6368e2e6 1142 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1143
e40c758c 1144 _PLAYER_INFO_RE = (
cc2db878 1145 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1146 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1147 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1148 )
2c62dc26 1149 _formats = {
c2d3cb4c 1150 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1151 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1152 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1153 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1154 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1155 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1156 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1157 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1158 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1159 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1160 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1161 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1162 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1163 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1164 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1165 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1166 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1167 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1168
1169
1170 # 3D videos
c2d3cb4c 1171 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1172 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1173 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1174 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1175 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1176 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1177 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1178
96fb5605 1179 # Apple HTTP Live Streaming
11f12195 1180 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1181 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1182 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1183 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1184 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1185 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1186 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1187 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1188
1189 # DASH mp4 video
d23028a8
S
1190 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1191 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1192 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1193 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1194 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1195 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1196 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1197 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1198 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1199 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1200 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1201 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1202
f6f1fc92 1203 # Dash mp4 audio
d23028a8
S
1204 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1205 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1206 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1207 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1208 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1209 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1210 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1211
1212 # Dash webm
d23028a8
S
1213 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1214 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1215 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1216 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1217 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1218 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1219 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1220 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1221 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1222 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1223 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1224 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1225 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1226 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1227 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1228 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1229 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1230 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1231 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1232 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1233 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1234 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1235
1236 # Dash webm audio
d23028a8
S
1237 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1238 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1239
0857baad 1240 # Dash webm audio with opus inside
d23028a8
S
1241 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1242 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1243 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1244
ce6b9a2d
PH
1245 # RTMP (unnamed)
1246 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1247
1248 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1249 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1250 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1251 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1252 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1253 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1254 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1255 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1256 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1257 }
29f7c58a 1258 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1259
fd5c4aab
S
1260 _GEO_BYPASS = False
1261
78caa52a 1262 IE_NAME = 'youtube'
2eb88d95
PH
1263 _TESTS = [
1264 {
2d3d2997 1265 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1266 'info_dict': {
1267 'id': 'BaW_jenozKc',
1268 'ext': 'mp4',
3867038a 1269 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
ff9f925b 1270 'channel': 'Philipp Hagemeister',
dd4c4492
S
1271 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1272 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1273 'upload_date': '20121002',
ff9f925b 1274 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1275 'categories': ['Science & Technology'],
3867038a 1276 'tags': ['youtube-dl'],
556dbe7f 1277 'duration': 10,
dbdaaa23 1278 'view_count': int,
3e7c1224 1279 'like_count': int,
ff9f925b 1280 'availability': 'public',
1281 'playable_in_embed': True,
1282 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1283 'live_status': 'not_live',
1284 'age_limit': 0,
7c80519c 1285 'start_time': 1,
297a564b 1286 'end_time': 9,
12a1b225 1287 'comment_count': int,
7666b936 1288 'channel_follower_count': int,
1289 'uploader': 'Philipp Hagemeister',
1290 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1291 'uploader_id': '@PhilippHagemeister',
5caf30db 1292 'heatmap': 'count:100',
2eb88d95 1293 }
0e853ca4 1294 },
fccd3771 1295 {
4bc3a23e
PH
1296 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1297 'note': 'Embed-only video (#1746)',
1298 'info_dict': {
1299 'id': 'yZIXLfi8CZQ',
1300 'ext': 'mp4',
1301 'upload_date': '20120608',
1302 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1303 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
94bfcd23 1304 'age_limit': 18,
545cc85d 1305 },
1306 'skip': 'Private video',
fccd3771 1307 },
11b56058 1308 {
8bdd16b4 1309 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1310 'note': 'Use the first video ID in the URL',
1311 'info_dict': {
1312 'id': 'BaW_jenozKc',
1313 'ext': 'mp4',
3867038a 1314 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976ae3ea 1315 'channel': 'Philipp Hagemeister',
1316 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1317 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1318 'upload_date': '20121002',
976ae3ea 1319 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1320 'categories': ['Science & Technology'],
3867038a 1321 'tags': ['youtube-dl'],
556dbe7f 1322 'duration': 10,
dbdaaa23 1323 'view_count': int,
11b56058 1324 'like_count': int,
976ae3ea 1325 'availability': 'public',
1326 'playable_in_embed': True,
1327 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1328 'live_status': 'not_live',
1329 'age_limit': 0,
12a1b225 1330 'comment_count': int,
7666b936 1331 'channel_follower_count': int,
1332 'uploader': 'Philipp Hagemeister',
1333 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1334 'uploader_id': '@PhilippHagemeister',
34a7de29
S
1335 },
1336 'params': {
1337 'skip_download': True,
1338 },
11b56058 1339 },
dd27fd17 1340 {
2d3d2997 1341 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1342 'note': '256k DASH audio (format 141) via DASH manifest',
1343 'info_dict': {
1344 'id': 'a9LDPn-MO4I',
1345 'ext': 'm4a',
1346 'upload_date': '20121002',
4bc3a23e 1347 'description': '',
4bc3a23e 1348 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1349 },
4bc3a23e
PH
1350 'params': {
1351 'youtube_include_dash_manifest': True,
1352 'format': '141',
4919603f 1353 },
de3c7fe0 1354 'skip': 'format 141 not served anymore',
dd27fd17 1355 },
8bdd16b4 1356 # DASH manifest with encrypted signature
1357 {
1358 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1359 'info_dict': {
1360 'id': 'IB3lcPjvWLA',
1361 'ext': 'm4a',
1362 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1363 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1364 'duration': 244,
8bdd16b4 1365 'upload_date': '20131011',
cc2db878 1366 'abr': 129.495,
976ae3ea 1367 'like_count': int,
1368 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1369 'playable_in_embed': True,
1370 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1371 'view_count': int,
1372 'track': 'The Spark',
1373 'live_status': 'not_live',
1374 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1375 'channel': 'Afrojack',
976ae3ea 1376 'tags': 'count:19',
1377 'availability': 'public',
1378 'categories': ['Music'],
1379 'age_limit': 0,
1380 'alt_title': 'The Spark',
7666b936 1381 'channel_follower_count': int,
1382 'uploader': 'Afrojack',
1383 'uploader_url': 'https://www.youtube.com/@Afrojack',
1384 'uploader_id': '@Afrojack',
8bdd16b4 1385 },
1386 'params': {
1387 'youtube_include_dash_manifest': True,
1388 'format': '141/bestaudio[ext=m4a]',
1389 },
1390 },
65c2fde2 1391 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1392 {
65c2fde2 1393 'note': 'Embed allowed age-gate video',
2d3d2997 1394 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1395 'info_dict': {
1396 'id': 'HtVdAasjOgU',
1397 'ext': 'mp4',
1398 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1399 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1400 'duration': 142,
c522adb1 1401 'upload_date': '20140605',
34952f09 1402 'age_limit': 18,
976ae3ea 1403 'categories': ['Gaming'],
1404 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1405 'availability': 'needs_auth',
1406 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1407 'like_count': int,
1408 'channel': 'The Witcher',
1409 'live_status': 'not_live',
1410 'tags': 'count:17',
1411 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1412 'playable_in_embed': True,
1413 'view_count': int,
7666b936 1414 'channel_follower_count': int,
1415 'uploader': 'The Witcher',
1416 'uploader_url': 'https://www.youtube.com/@thewitcher',
1417 'uploader_id': '@thewitcher',
c522adb1
JMF
1418 },
1419 },
65c2fde2 1420 {
1421 'note': 'Age-gate video with embed allowed in public site',
1422 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1423 'info_dict': {
1424 'id': 'HsUATh_Nc2U',
1425 'ext': 'mp4',
1426 'title': 'Godzilla 2 (Official Video)',
1427 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1428 'upload_date': '20200408',
65c2fde2 1429 'age_limit': 18,
976ae3ea 1430 'availability': 'needs_auth',
1431 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
976ae3ea 1432 'channel': 'FlyingKitty',
1433 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1434 'view_count': int,
1435 'categories': ['Entertainment'],
1436 'live_status': 'not_live',
1437 'tags': ['Flyingkitty', 'godzilla 2'],
1438 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1439 'like_count': int,
1440 'duration': 177,
1441 'playable_in_embed': True,
7666b936 1442 'channel_follower_count': int,
1443 'uploader': 'FlyingKitty',
1444 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1445 'uploader_id': '@FlyingKitty900',
5caf30db 1446 'comment_count': int,
65c2fde2 1447 },
1448 },
1449 {
1450 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1451 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1452 'info_dict': {
1453 'id': 'Tq92D6wQ1mg',
1454 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1455 'ext': 'mp4',
17322130 1456 'upload_date': '20191228',
65c2fde2 1457 'description': 'md5:17eccca93a786d51bc67646756894066',
1458 'age_limit': 18,
976ae3ea 1459 'like_count': int,
1460 'availability': 'needs_auth',
976ae3ea 1461 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1462 'view_count': int,
1463 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1464 'channel': 'Projekt Melody',
1465 'live_status': 'not_live',
1466 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1467 'playable_in_embed': True,
1468 'categories': ['Entertainment'],
1469 'duration': 106,
1470 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1471 'comment_count': int,
7666b936 1472 'channel_follower_count': int,
1473 'uploader': 'Projekt Melody',
1474 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1475 'uploader_id': '@ProjektMelody',
65c2fde2 1476 },
1477 },
1478 {
1479 'note': 'Non-Agegated non-embeddable video',
1480 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1481 'info_dict': {
1482 'id': 'MeJVWBSsPAY',
1483 'ext': 'mp4',
1484 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
65c2fde2 1485 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1486 'upload_date': '20130730',
976ae3ea 1487 'track': 'Such mich find mich',
1488 'age_limit': 0,
1489 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1490 'like_count': int,
1491 'playable_in_embed': False,
1492 'creator': 'OOMPH!',
1493 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1494 'view_count': int,
1495 'alt_title': 'Such mich find mich',
1496 'duration': 210,
1497 'channel': 'Herr Lurik',
1498 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1499 'categories': ['Music'],
1500 'availability': 'public',
976ae3ea 1501 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1502 'live_status': 'not_live',
1503 'artist': 'OOMPH!',
7666b936 1504 'channel_follower_count': int,
1505 'uploader': 'Herr Lurik',
1506 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1507 'uploader_id': '@HerrLurik',
65c2fde2 1508 },
1509 },
1510 {
1511 'note': 'Non-bypassable age-gated video',
1512 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1513 'only_matching': True,
1514 },
8bdd16b4 1515 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1516 # YouTube Red ad is not captured for creator
1517 {
1518 'url': '__2ABJjxzNo',
1519 'info_dict': {
1520 'id': '__2ABJjxzNo',
1521 'ext': 'mp4',
1522 'duration': 266,
1523 'upload_date': '20100430',
545cc85d 1524 'creator': 'deadmau5',
1525 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1526 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1527 'alt_title': 'Some Chords',
976ae3ea 1528 'availability': 'public',
1529 'tags': 'count:14',
1530 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1531 'view_count': int,
1532 'live_status': 'not_live',
1533 'channel': 'deadmau5',
1534 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1535 'like_count': int,
1536 'track': 'Some Chords',
1537 'artist': 'deadmau5',
1538 'playable_in_embed': True,
1539 'age_limit': 0,
1540 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1541 'categories': ['Music'],
1542 'album': 'Some Chords',
7666b936 1543 'channel_follower_count': int,
1544 'uploader': 'deadmau5',
1545 'uploader_url': 'https://www.youtube.com/@deadmau5',
1546 'uploader_id': '@deadmau5',
8bdd16b4 1547 },
1548 'expected_warnings': [
1549 'DASH manifest missing',
1550 ]
1551 },
067aa17e 1552 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1553 {
1554 'url': 'lqQg6PlCWgI',
1555 'info_dict': {
1556 'id': 'lqQg6PlCWgI',
1557 'ext': 'mp4',
556dbe7f 1558 'duration': 6085,
90227264 1559 'upload_date': '20150827',
12a1b225 1560 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
cbe2bd91 1561 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1562 'like_count': int,
1563 'release_timestamp': 1343767800,
1564 'playable_in_embed': True,
1565 'categories': ['Sports'],
1566 'release_date': '20120731',
1567 'channel': 'Olympics',
1568 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1569 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1570 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1571 'age_limit': 0,
1572 'availability': 'public',
1573 'live_status': 'was_live',
1574 'view_count': int,
1575 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
7666b936 1576 'channel_follower_count': int,
1577 'uploader': 'Olympics',
1578 'uploader_url': 'https://www.youtube.com/@Olympics',
1579 'uploader_id': '@Olympics',
cbe2bd91
PH
1580 },
1581 'params': {
1582 'skip_download': 'requires avconv',
e52a40ab 1583 }
cbe2bd91 1584 },
6271f1ca
PH
1585 # Non-square pixels
1586 {
1587 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1588 'info_dict': {
1589 'id': '_b-2C3KPAM0',
1590 'ext': 'mp4',
1591 'stretched_ratio': 16 / 9.,
556dbe7f 1592 'duration': 85,
6271f1ca 1593 'upload_date': '20110310',
6271f1ca 1594 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
6271f1ca 1595 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1596 'playable_in_embed': True,
1597 'channel': '孫ᄋᄅ',
1598 'age_limit': 0,
1599 'tags': 'count:11',
1600 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1601 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1602 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1603 'view_count': int,
1604 'categories': ['People & Blogs'],
1605 'like_count': int,
1606 'live_status': 'not_live',
1607 'availability': 'unlisted',
12a1b225 1608 'comment_count': int,
7666b936 1609 'channel_follower_count': int,
1610 'uploader': '孫ᄋᄅ',
1611 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1612 'uploader_id': '@AllenMeow',
6271f1ca 1613 },
06b491eb
S
1614 },
1615 # url_encoded_fmt_stream_map is empty string
1616 {
1617 'url': 'qEJwOuvDf7I',
1618 'info_dict': {
1619 'id': 'qEJwOuvDf7I',
f57b7835 1620 'ext': 'webm',
06b491eb
S
1621 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1622 'description': '',
1623 'upload_date': '20150404',
06b491eb
S
1624 },
1625 'params': {
1626 'skip_download': 'requires avconv',
e323cf3f
S
1627 },
1628 'skip': 'This live event has ended.',
06b491eb 1629 },
067aa17e 1630 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1631 {
1632 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1633 'info_dict': {
1634 'id': 'FIl7x6_3R5Y',
eb6793ba 1635 'ext': 'webm',
da77d856
S
1636 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1637 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1638 'duration': 220,
da77d856 1639 'upload_date': '20150625',
eb6793ba 1640 'formats': 'mincount:31',
da77d856 1641 },
eb6793ba 1642 'skip': 'not actual anymore',
2ee8f5d8 1643 },
8a1a26ce
YCH
1644 # DASH manifest with segment_list
1645 {
1646 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1647 'md5': '8ce563a1d667b599d21064e982ab9e31',
1648 'info_dict': {
1649 'id': 'CsmdDsKjzN8',
1650 'ext': 'mp4',
17ee98e1 1651 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce 1652 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
8a1a26ce
YCH
1653 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1654 },
1655 'params': {
1656 'youtube_include_dash_manifest': True,
1657 'format': '135', # bestvideo
be49068d
S
1658 },
1659 'skip': 'This live event has ended.',
2ee8f5d8 1660 },
cf7e015f 1661 {
6368e2e6 1662 # Multifeed videos (multiple cameras), URL can be of any Camera
7666b936 1663 # TODO: fix multifeed titles
6368e2e6 1664 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1665 'info_dict': {
6368e2e6 1666 'id': 'zaPI8MvL8pg',
1667 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1668 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1669 },
1670 'playlist': [{
1671 'info_dict': {
6368e2e6 1672 'id': 'j5yGuxZ8lLU',
cf7e015f 1673 'ext': 'mp4',
6368e2e6 1674 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
6368e2e6 1675 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1676 'duration': 10120,
1677 'channel_follower_count': int,
1678 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1679 'availability': 'public',
1680 'playable_in_embed': True,
1681 'upload_date': '20131105',
6368e2e6 1682 'categories': ['Gaming'],
1683 'live_status': 'was_live',
1684 'tags': 'count:24',
1685 'release_timestamp': 1383701910,
1686 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1687 'comment_count': int,
1688 'age_limit': 0,
1689 'like_count': int,
1690 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1691 'channel': 'WiiLikeToPlay',
1692 'view_count': int,
1693 'release_date': '20131106',
7666b936 1694 'uploader': 'WiiLikeToPlay',
1695 'uploader_id': '@WLTP',
1696 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1697 },
1698 }, {
1699 'info_dict': {
6368e2e6 1700 'id': 'zaPI8MvL8pg',
cf7e015f 1701 'ext': 'mp4',
6368e2e6 1702 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
6368e2e6 1703 'availability': 'public',
1704 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1705 'channel': 'WiiLikeToPlay',
6368e2e6 1706 'channel_follower_count': int,
1707 'description': 'md5:563ccbc698b39298481ca3c571169519',
1708 'duration': 10108,
1709 'age_limit': 0,
1710 'like_count': int,
1711 'tags': 'count:24',
1712 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
6368e2e6 1713 'release_timestamp': 1383701915,
1714 'comment_count': int,
1715 'upload_date': '20131105',
1716 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1717 'release_date': '20131106',
1718 'playable_in_embed': True,
1719 'live_status': 'was_live',
1720 'categories': ['Gaming'],
1721 'view_count': int,
7666b936 1722 'uploader': 'WiiLikeToPlay',
1723 'uploader_id': '@WLTP',
1724 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1725 },
1726 }, {
1727 'info_dict': {
6368e2e6 1728 'id': 'R7r3vfO7Hao',
cf7e015f 1729 'ext': 'mp4',
6368e2e6 1730 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1731 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1732 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1733 'like_count': int,
1734 'availability': 'public',
1735 'playable_in_embed': True,
1736 'upload_date': '20131105',
1737 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1738 'channel_follower_count': int,
1739 'tags': 'count:24',
1740 'release_date': '20131106',
6368e2e6 1741 'comment_count': int,
1742 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1743 'channel': 'WiiLikeToPlay',
1744 'categories': ['Gaming'],
1745 'release_timestamp': 1383701914,
1746 'live_status': 'was_live',
1747 'age_limit': 0,
1748 'duration': 10128,
1749 'view_count': int,
7666b936 1750 'uploader': 'WiiLikeToPlay',
1751 'uploader_id': '@WLTP',
1752 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1753 },
1754 }],
6368e2e6 1755 'params': {'skip_download': True},
cbaed4bb 1756 },
f9f49d87 1757 {
067aa17e 1758 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1759 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1760 'info_dict': {
1761 'id': 'gVfLd0zydlo',
1762 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1763 },
1764 'playlist_count': 2,
be49068d 1765 'skip': 'Not multifeed anymore',
f9f49d87 1766 },
cbaed4bb 1767 {
2d3d2997 1768 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1769 'only_matching': True,
0e49d9a6 1770 },
6d4fc66b 1771 {
2d3d2997 1772 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1773 'only_matching': True,
1774 },
0e49d9a6 1775 {
067aa17e 1776 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1777 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1778 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1779 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1780 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1781 'info_dict': {
1782 'id': 'lsguqyKfVQg',
1783 'ext': 'mp4',
1784 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1785 'alt_title': 'Dark Walk',
0e49d9a6 1786 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1787 'duration': 133,
0e49d9a6 1788 'upload_date': '20151119',
11f9be09 1789 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1790 'track': 'Dark Walk',
1791 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1792 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1793 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1794 'categories': ['Film & Animation'],
1795 'view_count': int,
1796 'live_status': 'not_live',
1797 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1798 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1799 'tags': 'count:13',
1800 'availability': 'public',
1801 'channel': 'IronSoulElf',
1802 'playable_in_embed': True,
1803 'like_count': int,
1804 'age_limit': 0,
6c73052c 1805 'channel_follower_count': int
0e49d9a6
LL
1806 },
1807 'params': {
1808 'skip_download': True,
1809 },
1810 },
61f92af1 1811 {
067aa17e 1812 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1813 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1814 'only_matching': True,
1815 },
313dfc45
LL
1816 {
1817 # Video with yt:stretch=17:0
1818 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1819 'info_dict': {
1820 'id': 'Q39EVAstoRM',
1821 'ext': 'mp4',
1822 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1823 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1824 'upload_date': '20151107',
313dfc45
LL
1825 },
1826 'params': {
1827 'skip_download': True,
1828 },
be49068d 1829 'skip': 'This video does not exist.',
313dfc45 1830 },
201c1459 1831 {
1832 # Video with incomplete 'yt:stretch=16:'
1833 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1834 'only_matching': True,
1835 },
7caf9830
S
1836 {
1837 # Video licensed under Creative Commons
1838 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1839 'info_dict': {
1840 'id': 'M4gD1WSo5mA',
1841 'ext': 'mp4',
1842 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1843 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1844 'duration': 721,
17322130 1845 'upload_date': '20150128',
7caf9830 1846 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1847 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1848 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1849 'like_count': int,
1850 'age_limit': 0,
1851 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1852 'channel': 'The Berkman Klein Center for Internet & Society',
1853 'availability': 'public',
1854 'view_count': int,
1855 'categories': ['Education'],
1856 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1857 'live_status': 'not_live',
1858 'playable_in_embed': True,
d5d1df8a 1859 'channel_follower_count': int,
1860 'chapters': list,
7666b936 1861 'uploader': 'The Berkman Klein Center for Internet & Society',
1862 'uploader_id': '@BKCHarvard',
1863 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
7caf9830
S
1864 },
1865 'params': {
1866 'skip_download': True,
1867 },
1868 },
fd050249 1869 {
fd050249
S
1870 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1871 'info_dict': {
1872 'id': 'eQcmzGIKrzg',
1873 'ext': 'mp4',
1874 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1875 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1876 'duration': 4060,
17322130 1877 'upload_date': '20151120',
fd050249 1878 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1879 'playable_in_embed': True,
1880 'tags': 'count:12',
1881 'like_count': int,
1882 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1883 'age_limit': 0,
1884 'availability': 'public',
1885 'categories': ['News & Politics'],
1886 'channel': 'Bernie Sanders',
1887 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1888 'view_count': int,
1889 'live_status': 'not_live',
1890 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1891 'comment_count': int,
d5d1df8a 1892 'channel_follower_count': int,
1893 'chapters': list,
7666b936 1894 'uploader': 'Bernie Sanders',
1895 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1896 'uploader_id': '@BernieSanders',
fd050249
S
1897 },
1898 'params': {
1899 'skip_download': True,
1900 },
1901 },
040ac686
S
1902 {
1903 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1904 'only_matching': True,
7f29cf54
S
1905 },
1906 {
067aa17e 1907 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1908 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1909 'only_matching': True,
6496ccb4
S
1910 },
1911 {
1912 # Rental video preview
1913 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1914 'info_dict': {
1915 'id': 'uGpuVWrhIzE',
1916 'ext': 'mp4',
1917 'title': 'Piku - Trailer',
1918 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1919 'upload_date': '20150811',
6496ccb4
S
1920 'license': 'Standard YouTube License',
1921 },
1922 'params': {
1923 'skip_download': True,
1924 },
eb6793ba 1925 'skip': 'This video is not available.',
022a5d66 1926 },
12afdc2a
S
1927 {
1928 # YouTube Red video with episode data
1929 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1930 'info_dict': {
1931 'id': 'iqKdEhx-dD4',
1932 'ext': 'mp4',
1933 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1934 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1935 'duration': 2085,
12afdc2a 1936 'upload_date': '20170118',
12afdc2a
S
1937 'series': 'Mind Field',
1938 'season_number': 1,
1939 'episode_number': 1,
976ae3ea 1940 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1941 'tags': 'count:12',
1942 'view_count': int,
1943 'availability': 'public',
1944 'age_limit': 0,
1945 'channel': 'Vsauce',
1946 'episode': 'Episode 1',
1947 'categories': ['Entertainment'],
1948 'season': 'Season 1',
1949 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1950 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1951 'like_count': int,
1952 'playable_in_embed': True,
1953 'live_status': 'not_live',
7666b936 1954 'channel_follower_count': int,
1955 'uploader': 'Vsauce',
1956 'uploader_url': 'https://www.youtube.com/@Vsauce',
1957 'uploader_id': '@Vsauce',
12afdc2a
S
1958 },
1959 'params': {
1960 'skip_download': True,
1961 },
1962 'expected_warnings': [
1963 'Skipping DASH manifest',
1964 ],
1965 },
c7121fa7
S
1966 {
1967 # The following content has been identified by the YouTube community
1968 # as inappropriate or offensive to some audiences.
1969 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1970 'info_dict': {
1971 'id': '6SJNVb0GnPI',
1972 'ext': 'mp4',
1973 'title': 'Race Differences in Intelligence',
1974 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1975 'duration': 965,
1976 'upload_date': '20140124',
c7121fa7
S
1977 },
1978 'params': {
1979 'skip_download': True,
1980 },
545cc85d 1981 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1982 },
022a5d66
S
1983 {
1984 # itag 212
1985 'url': '1t24XAntNCY',
1986 'only_matching': True,
fd5c4aab
S
1987 },
1988 {
1989 # geo restricted to JP
1990 'url': 'sJL6WA-aGkQ',
1991 'only_matching': True,
1992 },
cd5a74a2
S
1993 {
1994 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1995 'only_matching': True,
1996 },
bc2ca1bb 1997 {
1998 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1999 'only_matching': True,
2000 },
2001 {
2002 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2003 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2004 'only_matching': True,
2005 },
825cd268
RA
2006 {
2007 # DRM protected
2008 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2009 'only_matching': True,
4fe54c12
S
2010 },
2011 {
2012 # Video with unsupported adaptive stream type formats
2013 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2014 'info_dict': {
2015 'id': 'Z4Vy8R84T1U',
2016 'ext': 'mp4',
2017 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2018 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2019 'duration': 433,
2020 'upload_date': '20130923',
4fe54c12
S
2021 'formats': 'maxcount:10',
2022 },
2023 'params': {
2024 'skip_download': True,
2025 'youtube_include_dash_manifest': False,
2026 },
5429d6a9 2027 'skip': 'not actual anymore',
5caabd3c 2028 },
2029 {
822b9d9c 2030 # Youtube Music Auto-generated description
7666b936 2031 # TODO: fix metadata extraction
5caabd3c 2032 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2033 'info_dict': {
2034 'id': 'MgNrAu2pzNs',
2035 'ext': 'mp4',
2036 'title': 'Voyeur Girl',
2037 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2038 'upload_date': '20190312',
5caabd3c 2039 'artist': 'Stephen',
2040 'track': 'Voyeur Girl',
2041 'album': 'it\'s too much love to know my dear',
2042 'release_date': '20190313',
2043 'release_year': 2019,
976ae3ea 2044 'alt_title': 'Voyeur Girl',
2045 'view_count': int,
976ae3ea 2046 'playable_in_embed': True,
2047 'like_count': int,
2048 'categories': ['Music'],
2049 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
7666b936 2050 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2051 'uploader': 'Stephen',
976ae3ea 2052 'availability': 'public',
2053 'creator': 'Stephen',
2054 'duration': 169,
2055 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2056 'age_limit': 0,
2057 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2058 'tags': 'count:11',
2059 'live_status': 'not_live',
6c73052c 2060 'channel_follower_count': int
5caabd3c 2061 },
2062 'params': {
2063 'skip_download': True,
2064 },
2065 },
66b48727
RA
2066 {
2067 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2068 'only_matching': True,
2069 },
011e75e6
S
2070 {
2071 # invalid -> valid video id redirection
2072 'url': 'DJztXj2GPfl',
2073 'info_dict': {
2074 'id': 'DJztXj2GPfk',
2075 'ext': 'mp4',
2076 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2077 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2078 'upload_date': '20090125',
011e75e6
S
2079 'artist': 'Panjabi MC',
2080 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2081 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2082 },
2083 'params': {
2084 'skip_download': True,
2085 },
545cc85d 2086 'skip': 'Video unavailable',
ea74e00b
DP
2087 },
2088 {
2089 # empty description results in an empty string
2090 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2091 'info_dict': {
2092 'id': 'x41yOUIvK2k',
2093 'ext': 'mp4',
2094 'title': 'IMG 3456',
2095 'description': '',
2096 'upload_date': '20170613',
976ae3ea 2097 'view_count': int,
2098 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
976ae3ea 2099 'like_count': int,
2100 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2101 'tags': [],
2102 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2103 'availability': 'public',
2104 'age_limit': 0,
2105 'categories': ['Pets & Animals'],
2106 'duration': 7,
2107 'playable_in_embed': True,
2108 'live_status': 'not_live',
7666b936 2109 'channel': 'l\'Or Vert asbl',
2110 'channel_follower_count': int,
2111 'uploader': 'l\'Or Vert asbl',
2112 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2113 'uploader_id': '@ElevageOrVert',
ea74e00b
DP
2114 },
2115 'params': {
2116 'skip_download': True,
2117 },
2118 },
a0566bbf 2119 {
29f7c58a 2120 # with '};' inside yt initial data (see [1])
2121 # see [2] for an example with '};' inside ytInitialPlayerResponse
2122 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2123 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2124 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2125 'info_dict': {
2126 'id': 'CHqg6qOn4no',
2127 'ext': 'mp4',
2128 'title': 'Part 77 Sort a list of simple types in c#',
2129 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2130 'upload_date': '20130831',
976ae3ea 2131 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2132 'like_count': int,
976ae3ea 2133 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2134 'live_status': 'not_live',
2135 'categories': ['Education'],
2136 'availability': 'public',
2137 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2138 'tags': 'count:12',
2139 'playable_in_embed': True,
2140 'age_limit': 0,
2141 'view_count': int,
2142 'duration': 522,
2143 'channel': 'kudvenkat',
12a1b225 2144 'comment_count': int,
d5d1df8a 2145 'channel_follower_count': int,
2146 'chapters': list,
7666b936 2147 'uploader': 'kudvenkat',
2148 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2149 'uploader_id': '@Csharp-video-tutorialsBlogspot',
a0566bbf 2150 },
2151 'params': {
2152 'skip_download': True,
2153 },
2154 },
29f7c58a 2155 {
2156 # another example of '};' in ytInitialData
2157 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2158 'only_matching': True,
2159 },
2160 {
2161 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2162 'only_matching': True,
2163 },
545cc85d 2164 {
cc2db878 2165 # https://github.com/ytdl-org/youtube-dl/pull/28094
2166 'url': 'OtqTfy26tG0',
2167 'info_dict': {
2168 'id': 'OtqTfy26tG0',
2169 'ext': 'mp4',
2170 'title': 'Burn Out',
2171 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2172 'upload_date': '20141120',
cc2db878 2173 'artist': 'The Cinematic Orchestra',
2174 'track': 'Burn Out',
2175 'album': 'Every Day',
976ae3ea 2176 'like_count': int,
2177 'live_status': 'not_live',
2178 'alt_title': 'Burn Out',
2179 'duration': 614,
2180 'age_limit': 0,
2181 'view_count': int,
2182 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2183 'creator': 'The Cinematic Orchestra',
2184 'channel': 'The Cinematic Orchestra',
2185 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2186 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2187 'availability': 'public',
2188 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2189 'categories': ['Music'],
2190 'playable_in_embed': True,
7666b936 2191 'channel_follower_count': int,
2192 'uploader': 'The Cinematic Orchestra',
2193 'comment_count': int,
cc2db878 2194 },
2195 'params': {
2196 'skip_download': True,
2197 },
545cc85d 2198 },
bc2ca1bb 2199 {
2200 # controversial video, only works with bpctr when authenticated with cookies
2201 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2202 'only_matching': True,
2203 },
a1a7907b 2204 {
2205 # controversial video, requires bpctr/contentCheckOk
2206 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2207 'info_dict': {
2208 'id': 'SZJvDhaSDnc',
2209 'ext': 'mp4',
2210 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2211 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
a1a7907b 2212 'upload_date': '20140716',
976ae3ea 2213 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2214 'duration': 170,
2215 'categories': ['News & Politics'],
976ae3ea 2216 'view_count': int,
2217 'channel': 'CBS Mornings',
2218 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2219 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2220 'age_limit': 18,
2221 'availability': 'needs_auth',
2222 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2223 'like_count': int,
2224 'live_status': 'not_live',
2225 'playable_in_embed': True,
7666b936 2226 'channel_follower_count': int,
2227 'uploader': 'CBS Mornings',
2228 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2229 'uploader_id': '@CBSMornings',
a1a7907b 2230 }
2231 },
f7ad7160 2232 {
2233 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2234 'url': 'cBvYw8_A0vQ',
2235 'info_dict': {
2236 'id': 'cBvYw8_A0vQ',
2237 'ext': 'mp4',
2238 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2239 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2240 'upload_date': '20201120',
976ae3ea 2241 'duration': 1456,
2242 'categories': ['Travel & Events'],
2243 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2244 'view_count': int,
2245 'channel': 'Walk around Japan',
2246 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2247 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2248 'age_limit': 0,
2249 'availability': 'public',
2250 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2251 'live_status': 'not_live',
2252 'playable_in_embed': True,
7666b936 2253 'channel_follower_count': int,
2254 'uploader': 'Walk around Japan',
2255 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2256 'uploader_id': '@walkaroundjapan7124',
f7ad7160 2257 },
2258 'params': {
2259 'skip_download': True,
2260 },
0fb983f6 2261 }, {
2262 # Has multiple audio streams
2263 'url': 'WaOKSUlf4TM',
2264 'only_matching': True
9297939e 2265 }, {
2266 # Requires Premium: has format 141 when requested using YTM url
2267 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2268 'only_matching': True
2269 }, {
120916da 2270 # multiple subtitles with same lang_code
2271 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2272 'only_matching': True,
109dd3b2 2273 }, {
2274 # Force use android client fallback
2275 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2276 'info_dict': {
2277 'id': 'YOelRv7fMxY',
11f9be09 2278 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2279 'ext': '3gp',
2280 'upload_date': '20210624',
2281 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
109dd3b2 2282 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2283 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2284 'duration': 596,
2285 'categories': ['Entertainment'],
976ae3ea 2286 'view_count': int,
2287 'channel': 'colinfurze',
2288 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2289 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2290 'age_limit': 0,
2291 'availability': 'public',
2292 'like_count': int,
2293 'live_status': 'not_live',
2294 'playable_in_embed': True,
d5d1df8a 2295 'channel_follower_count': int,
2296 'chapters': list,
7666b936 2297 'uploader': 'colinfurze',
2298 'uploader_url': 'https://www.youtube.com/@colinfurze',
2299 'uploader_id': '@colinfurze',
109dd3b2 2300 },
2301 'params': {
2302 'format': '17', # 3gp format available on android
2303 'extractor_args': {'youtube': {'player_client': ['android']}},
2304 },
120916da 2305 },
109dd3b2 2306 {
2307 # Skip download of additional client configs (remix client config in this case)
2308 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2309 'only_matching': True,
2310 'params': {
2311 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2312 },
8fc54b12 2313 }, {
2314 # shorts
2315 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2316 'only_matching': True,
9222c381 2317 }, {
2318 'note': 'Storyboards',
2319 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2320 'info_dict': {
2321 'id': '5KLPxDtMqe8',
2322 'ext': 'mhtml',
2323 'format_id': 'sb0',
2324 'title': 'Your Brain is Plastic',
9222c381 2325 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2326 'upload_date': '20140324',
976ae3ea 2327 'like_count': int,
2328 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2329 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2330 'view_count': int,
2331 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2332 'playable_in_embed': True,
2333 'tags': 'count:12',
976ae3ea 2334 'availability': 'public',
2335 'channel': 'SciShow',
2336 'live_status': 'not_live',
2337 'duration': 248,
2338 'categories': ['Education'],
2339 'age_limit': 0,
d5d1df8a 2340 'channel_follower_count': int,
2341 'chapters': list,
7666b936 2342 'uploader': 'SciShow',
2343 'uploader_url': 'https://www.youtube.com/@SciShow',
2344 'uploader_id': '@SciShow',
9222c381 2345 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2346 }, {
2347 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2348 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2349 'info_dict': {
2350 'id': '2NUZ8W2llS4',
2351 'ext': 'mp4',
2352 'title': 'The NP that test your phone performance 🙂',
2353 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
992f9a73 2354 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2355 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2356 'duration': 21,
2357 'view_count': int,
2358 'age_limit': 0,
2359 'categories': ['Gaming'],
2360 'tags': 'count:23',
2361 'playable_in_embed': True,
2362 'live_status': 'not_live',
2363 'upload_date': '20220103',
2364 'like_count': int,
2365 'availability': 'public',
2366 'channel': 'Leon Nguyen',
2367 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2368 'comment_count': int,
7666b936 2369 'channel_follower_count': int,
2370 'uploader': 'Leon Nguyen',
2371 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2372 'uploader_id': '@LeonNguyen',
992f9a73 2373 }
1ff88b7a 2374 }, {
2375 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2376 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2377 'info_dict': {
2378 'id': '2NUZ8W2llS4',
2379 'ext': 'mp4',
2380 'title': 'The NP that test your phone performance 🙂',
2381 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
1ff88b7a 2382 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2383 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2384 'duration': 21,
2385 'view_count': int,
2386 'age_limit': 0,
2387 'categories': ['Gaming'],
2388 'tags': 'count:23',
2389 'playable_in_embed': True,
2390 'live_status': 'not_live',
2391 'upload_date': '20220102',
2392 'like_count': int,
2393 'availability': 'public',
2394 'channel': 'Leon Nguyen',
2395 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2396 'comment_count': int,
7666b936 2397 'channel_follower_count': int,
2398 'uploader': 'Leon Nguyen',
2399 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2400 'uploader_id': '@LeonNguyen',
1ff88b7a 2401 },
2402 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2403 }, {
2404 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2405 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2406 'info_dict': {
2407 'id': 'mzZzzBU6lrM',
2408 'ext': 'mp4',
2409 'title': 'I Met GeorgeNotFound In Real Life...',
7666b936 2410 'description': 'md5:978296ec9783a031738b684d4ebf302d',
992f9a73 2411 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2412 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2413 'duration': 955,
2414 'view_count': int,
2415 'age_limit': 0,
2416 'categories': ['Entertainment'],
2417 'tags': 'count:26',
2418 'playable_in_embed': True,
2419 'live_status': 'not_live',
2420 'release_timestamp': 1641172509,
2421 'release_date': '20220103',
2422 'upload_date': '20220103',
2423 'like_count': int,
2424 'availability': 'public',
2425 'channel': 'Quackity',
2426 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
7666b936 2427 'channel_follower_count': int,
2428 'uploader': 'Quackity',
2429 'uploader_id': '@Quackity',
2430 'uploader_url': 'https://www.youtube.com/@Quackity',
992f9a73 2431 }
2432 },
2433 { # continuous livestream. Microformat upload date should be preferred.
2434 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2435 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2436 'info_dict': {
2437 'id': 'kgx4WGK0oNU',
2438 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2439 'ext': 'mp4',
2440 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2441 'availability': 'public',
2442 'age_limit': 0,
2443 'release_timestamp': 1637975704,
2444 'upload_date': '20210619',
2445 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2446 'live_status': 'is_live',
2447 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
992f9a73 2448 'channel': 'Abao in Tokyo',
2449 'channel_follower_count': int,
2450 'release_date': '20211127',
2451 'tags': 'count:39',
2452 'categories': ['People & Blogs'],
2453 'like_count': int,
992f9a73 2454 'view_count': int,
2455 'playable_in_embed': True,
2456 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2457 'concurrent_view_count': int,
7666b936 2458 'uploader': 'Abao in Tokyo',
2459 'uploader_url': 'https://www.youtube.com/@abaointokyo',
2460 'uploader_id': '@abaointokyo',
992f9a73 2461 },
2462 'params': {'skip_download': True}
6e634cbe 2463 }, {
2464 # Story. Requires specific player params to work.
ee27297f 2465 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2466 'info_dict': {
ee27297f 2467 'id': 'vv8qTUWmulI',
6e634cbe 2468 'ext': 'mp4',
ee27297f 2469 'availability': 'unlisted',
2470 'view_count': int,
2471 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2472 'upload_date': '20220526',
2473 'categories': ['Education'],
2474 'title': 'Story',
2475 'channel': 'IT\'S HISTORY',
2476 'description': '',
ee27297f 2477 'duration': 12,
6e634cbe 2478 'playable_in_embed': True,
6e634cbe 2479 'age_limit': 0,
6e634cbe 2480 'live_status': 'not_live',
ee27297f 2481 'tags': [],
2482 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
ee27297f 2483 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2484 },
2485 'skip': 'stories get removed after some period of time',
ee27297f 2486 }, {
2487 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2488 'info_dict': {
2489 'id': 'tjjjtzRLHvA',
2490 'ext': 'mp4',
2491 'title': 'ハッシュタグ無し };if window.ytcsi',
2492 'upload_date': '20220323',
2493 'like_count': int,
2494 'availability': 'unlisted',
7666b936 2495 'channel': 'Lesmiscore',
2496 'thumbnail': r're:^https?://.*\.jpg',
ee27297f 2497 'age_limit': 0,
ee27297f 2498 'categories': ['Music'],
6e634cbe 2499 'view_count': int,
2500 'description': '',
ee27297f 2501 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2502 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2503 'live_status': 'not_live',
2504 'playable_in_embed': True,
2505 'channel_follower_count': int,
2506 'duration': 6,
2507 'tags': [],
7666b936 2508 'uploader_id': '@lesmiscore',
2509 'uploader': 'Lesmiscore',
2510 'uploader_url': 'https://www.youtube.com/@lesmiscore',
6e634cbe 2511 }
c26f9b99 2512 }, {
2513 # Prefer primary title+description language metadata by default
2514 # Do not prefer translated description if primary is empty
2515 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2516 'info_dict': {
2517 'id': 'el3E4MbxRqQ',
2518 'ext': 'mp4',
2519 'title': 'dlp test video 2 - primary sv no desc',
2520 'description': '',
2521 'channel': 'cole-dlp-test-acc',
2522 'tags': [],
2523 'view_count': int,
2524 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2525 'like_count': int,
2526 'playable_in_embed': True,
2527 'availability': 'unlisted',
7666b936 2528 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2529 'age_limit': 0,
2530 'duration': 5,
c26f9b99 2531 'live_status': 'not_live',
2532 'upload_date': '20220908',
2533 'categories': ['People & Blogs'],
c26f9b99 2534 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
7666b936 2535 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2536 'uploader_id': '@coletdjnz',
2537 'uploader': 'cole-dlp-test-acc',
c26f9b99 2538 },
2539 'params': {'skip_download': True}
2540 }, {
2541 # Extractor argument: prefer translated title+description
2542 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2543 'info_dict': {
2544 'id': 'gHKT4uU8Zng',
2545 'ext': 'mp4',
2546 'channel': 'cole-dlp-test-acc',
2547 'tags': [],
2548 'duration': 5,
2549 'live_status': 'not_live',
2550 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2551 'upload_date': '20220728',
c26f9b99 2552 'view_count': int,
2553 'categories': ['People & Blogs'],
7666b936 2554 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2555 'title': 'dlp test video title translated (fr)',
2556 'availability': 'public',
c26f9b99 2557 'age_limit': 0,
2558 'description': 'dlp test video description translated (fr)',
2559 'playable_in_embed': True,
2560 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
7666b936 2561 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2562 'uploader_id': '@coletdjnz',
2563 'uploader': 'cole-dlp-test-acc',
c26f9b99 2564 },
2565 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2566 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2567 }, {
2568 'note': '6 channel audio',
2569 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2570 'only_matching': True,
a4894d3e 2571 }, {
2572 'note': 'Multiple HLS formats with same itag',
2573 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2574 'info_dict': {
2575 'id': 'kX3nB4PpJko',
2576 'ext': 'mp4',
2577 'categories': ['Entertainment'],
2578 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
a4894d3e 2579 'live_status': 'not_live',
2580 'duration': 937,
2581 'channel_follower_count': int,
2582 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2583 'title': 'Last To Take Hand Off Jet, Keeps It!',
2584 'channel': 'MrBeast',
2585 'playable_in_embed': True,
2586 'view_count': int,
2587 'upload_date': '20221112',
a4894d3e 2588 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2589 'age_limit': 0,
2590 'availability': 'public',
2591 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2592 'like_count': int,
2593 'tags': [],
7666b936 2594 'uploader': 'MrBeast',
2595 'uploader_url': 'https://www.youtube.com/@MrBeast',
2596 'uploader_id': '@MrBeast',
a4894d3e 2597 },
2598 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2599 }, {
2600 'note': 'Audio formats with Dynamic Range Compression',
2601 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2602 'info_dict': {
2603 'id': 'Tq92D6wQ1mg',
7666b936 2604 'ext': 'webm',
9bb85699 2605 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2606 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2607 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2608 'channel_follower_count': int,
2609 'description': 'md5:17eccca93a786d51bc67646756894066',
2610 'upload_date': '20191228',
9bb85699 2611 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2612 'playable_in_embed': True,
2613 'like_count': int,
2614 'categories': ['Entertainment'],
2615 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2616 'age_limit': 18,
2617 'channel': 'Projekt Melody',
9bb85699 2618 'view_count': int,
2619 'availability': 'needs_auth',
2620 'comment_count': int,
2621 'live_status': 'not_live',
9bb85699 2622 'duration': 106,
7666b936 2623 'uploader': 'Projekt Melody',
2624 'uploader_id': '@ProjektMelody',
2625 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
9bb85699 2626 },
2627 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2628 },
2629 {
2630 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2631 'info_dict': {
2632 'id': 'qVv6vCqciTM',
2633 'ext': 'mp4',
2634 'age_limit': 0,
dad2210c 2635 'comment_count': int,
2636 'chapters': 'count:13',
2637 'upload_date': '20221223',
2638 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2639 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
dad2210c 2640 'like_count': int,
2641 'release_date': '20221223',
2642 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2643 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2644 'view_count': int,
2645 'playable_in_embed': True,
2646 'duration': 4438,
2647 'availability': 'public',
2648 'channel_follower_count': int,
2649 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2650 'categories': ['Entertainment'],
2651 'live_status': 'was_live',
2652 'release_timestamp': 1671793345,
2653 'channel': 'さなちゃんねる',
2654 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2655 'uploader': 'さなちゃんねる',
7666b936 2656 'uploader_url': 'https://www.youtube.com/@sana_natori',
2657 'uploader_id': '@sana_natori',
2658 },
2659 },
2660 {
2661 # Fallbacks when webpage and web client is unavailable
2662 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2663 'info_dict': {
2664 'id': 'wSSmNUl9Snw',
2665 'ext': 'mp4',
2666 # 'categories': ['Science & Technology'],
2667 'view_count': int,
2668 'chapters': 'count:2',
2669 'channel': 'Scott Manley',
2670 'like_count': int,
2671 'age_limit': 0,
2672 # 'availability': 'public',
2673 'channel_follower_count': int,
2674 'live_status': 'not_live',
2675 'upload_date': '20170831',
2676 'duration': 682,
2677 'tags': 'count:8',
2678 'uploader_url': 'https://www.youtube.com/@scottmanley',
2679 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2680 'uploader': 'Scott Manley',
2681 'uploader_id': '@scottmanley',
2682 'title': 'The Computer Hack That Saved Apollo 14',
2683 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2684 'thumbnail': r're:^https?://.*\.webp',
2685 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2686 'playable_in_embed': True,
2687 },
2688 'params': {
2689 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
dad2210c 2690 },
2691 },
2eb88d95
PH
2692 ]
2693
f2e8dbcc 2694 _WEBPAGE_TESTS = [
2695 # YouTube <object> embed
2696 {
2697 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2698 'md5': '873c81d308b979f0e23ee7e620b312a3',
2699 'info_dict': {
2700 'id': 'msN87y-iEx0',
2701 'ext': 'mp4',
2702 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2703 'upload_date': '20080526',
2704 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
f2e8dbcc 2705 'age_limit': 0,
2706 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2707 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2708 'playable_in_embed': True,
2709 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2710 'like_count': int,
2711 'comment_count': int,
2712 'channel': 'Christopher Sykes',
2713 'live_status': 'not_live',
2714 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2715 'availability': 'public',
2716 'duration': 195,
2717 'view_count': int,
2718 'categories': ['Science & Technology'],
2719 'channel_follower_count': int,
7666b936 2720 'uploader': 'Christopher Sykes',
2721 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2722 'uploader_id': '@ChristopherSykesDocumentaries',
f2e8dbcc 2723 },
2724 'params': {
2725 'skip_download': True,
2726 }
2727 },
2728 ]
2729
201c1459 2730 @classmethod
2731 def suitable(cls, url):
4dfbf869 2732 from ..utils import parse_qs
2733
201c1459 2734 qs = parse_qs(url)
2735 if qs.get('list', [None])[0]:
2736 return False
86e5f3ed 2737 return super().suitable(url)
201c1459 2738
e0df6211 2739 def __init__(self, *args, **kwargs):
86e5f3ed 2740 super().__init__(*args, **kwargs)
545cc85d 2741 self._code_cache = {}
83799698 2742 self._player_cache = {}
e0df6211 2743
4d37720a 2744 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2745 lock = threading.Lock()
185bf310 2746 start_time = time.time()
adbc4ec4
THD
2747 formats = [f for f in formats if f.get('is_from_start')]
2748
185bf310 2749 def refetch_manifest(format_id, delay):
2750 nonlocal formats, start_time, is_live
2751 if time.time() <= start_time + delay:
adbc4ec4
THD
2752 return
2753
2754 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2755 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2756 microformats = traverse_obj(
2757 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2758 expected_type=dict)
4d37720a
L
2759 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2760 is_live = live_status == 'is_live'
185bf310 2761 start_time = time.time()
adbc4ec4 2762
185bf310 2763 def mpd_feed(format_id, delay):
adbc4ec4
THD
2764 """
2765 @returns (manifest_url, manifest_stream_number, is_live) or None
2766 """
253ac4ba 2767 for retry in self.RetryManager(fatal=False):
2768 with lock:
2769 refetch_manifest(format_id, delay)
2770
2771 f = next((f for f in formats if f['format_id'] == format_id), None)
2772 if not f:
2773 if not is_live:
2774 retry.error = f'{video_id}: Video is no longer live'
2775 else:
2776 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2777 continue
2778 return f['manifest_url'], f['manifest_stream_number'], is_live
2779 return None
adbc4ec4
THD
2780
2781 for f in formats:
4d37720a
L
2782 f['is_live'] = is_live
2783 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2784 live_start_time, mpd_feed, not is_live and f.copy())
2785 if is_live:
2786 f['fragments'] = gen
2787 f['protocol'] = 'http_dash_segments_generator'
2788 else:
2789 f['fragments'] = LazyList(gen({}))
2790 del f['is_from_start']
adbc4ec4 2791
4d37720a 2792 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2793 FETCH_SPAN, MAX_DURATION = 5, 432000
2794
2795 mpd_url, stream_number, is_live = None, None, True
2796
2797 begin_index = 0
2798 download_start_time = ctx.get('start') or time.time()
2799
2800 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2801 if lack_early_segments:
2802 self.report_warning(bug_reports_message(
2803 'Starting download from the last 120 hours of the live stream since '
2804 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2805 lack_early_segments = True
2806
2807 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2808 fragments, fragment_base_url = None, None
2809
a539f065 2810 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2811 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2812 # Obtain from MPD's maximum seq value
2813 old_mpd_url = mpd_url
185bf310 2814 last_error = ctx.pop('last_error', None)
14f25df2 2815 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2816 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2817 or (mpd_url, stream_number, False))
2818 if not refresh_sequence:
2819 if expire_fast and not is_live:
2820 return False, last_seq
2821 elif old_mpd_url == mpd_url:
2822 return True, last_seq
4d37720a
L
2823 if manifestless_orig_fmt:
2824 fmt_info = manifestless_orig_fmt
2825 else:
2826 try:
2827 fmts, _ = self._extract_mpd_formats_and_subtitles(
2828 mpd_url, None, note=False, errnote=False, fatal=False)
2829 except ExtractorError:
2830 fmts = None
2831 if not fmts:
2832 no_fragment_score += 2
2833 return False, last_seq
2834 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2835 fragments = fmt_info['fragments']
2836 fragment_base_url = fmt_info['fragment_base_url']
2837 assert fragment_base_url
2838
2839 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2840 return True, _last_seq
2841
4d37720a 2842 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2843 while is_live:
2844 fetch_time = time.time()
2845 if no_fragment_score > 30:
2846 return
2847 if last_segment_url:
2848 # Obtain from "X-Head-Seqnum" header value from each segment
2849 try:
2850 urlh = self._request_webpage(
2851 last_segment_url, None, note=False, errnote=False, fatal=False)
2852 except ExtractorError:
2853 urlh = None
2854 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2855 if last_seq is None:
a539f065 2856 no_fragment_score += 2
adbc4ec4
THD
2857 last_segment_url = None
2858 continue
2859 else:
a539f065
LNO
2860 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2861 no_fragment_score += 2
185bf310 2862 if not should_continue:
adbc4ec4
THD
2863 continue
2864
2865 if known_idx > last_seq:
2866 last_segment_url = None
2867 continue
2868
2869 last_seq += 1
2870
2871 if begin_index < 0 and known_idx < 0:
2872 # skip from the start when it's negative value
2873 known_idx = last_seq + begin_index
2874 if lack_early_segments:
2875 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2876 try:
2877 for idx in range(known_idx, last_seq):
2878 # do not update sequence here or you'll get skipped some part of it
a539f065 2879 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2880 if not should_continue:
adbc4ec4
THD
2881 known_idx = idx - 1
2882 raise ExtractorError('breaking out of outer loop')
2883 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2884 yield {
2885 'url': last_segment_url,
36195c44 2886 'fragment_count': last_seq,
adbc4ec4
THD
2887 }
2888 if known_idx == last_seq:
2889 no_fragment_score += 5
2890 else:
2891 no_fragment_score = 0
2892 known_idx = last_seq
2893 except ExtractorError:
2894 continue
2895
4d37720a
L
2896 if manifestless_orig_fmt:
2897 # Stop at the first iteration if running for post-live manifestless;
2898 # fragment count no longer increase since it starts
2899 break
2900
adbc4ec4
THD
2901 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2902
b6de707d 2903 def _extract_player_url(self, *ytcfgs, webpage=None):
2904 player_url = traverse_obj(
2905 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2906 get_all=False, expected_type=str)
11f9be09 2907 if not player_url:
b6de707d 2908 return
60f393e4 2909 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2910
b6de707d 2911 def _download_player_url(self, video_id, fatal=False):
2912 res = self._download_webpage(
2913 'https://www.youtube.com/iframe_api',
2914 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2915 if res:
2916 player_version = self._search_regex(
2917 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2918 if player_version:
2919 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2920
60064c53
PH
2921 def _signature_cache_id(self, example_sig):
2922 """ Return a string representation of a signature """
14f25df2 2923 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2924
e40c758c
S
2925 @classmethod
2926 def _extract_player_info(cls, player_url):
2927 for player_re in cls._PLAYER_INFO_RE:
2928 id_m = re.search(player_re, player_url)
2929 if id_m:
2930 break
2931 else:
c081b35c 2932 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2933 return id_m.group('id')
e40c758c 2934
404f611f 2935 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2936 player_id = self._extract_player_info(player_url)
2937 if player_id not in self._code_cache:
1276a43a 2938 code = self._download_webpage(
109dd3b2 2939 player_url, video_id, fatal=fatal,
2940 note='Downloading player ' + player_id,
2941 errnote='Download of %s failed' % player_url)
1276a43a 2942 if code:
2943 self._code_cache[player_id] = code
404f611f 2944 return self._code_cache.get(player_id)
109dd3b2 2945
e40c758c 2946 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2947 player_id = self._extract_player_info(player_url)
e0df6211 2948
c4417ddb 2949 # Read from filesystem cache
86e5f3ed 2950 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2951 assert os.path.basename(func_id) == func_id
a0e07d31 2952
ae61d108 2953 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2954 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2955
580ce007 2956 if not cache_spec:
2957 code = self._load_player(video_id, player_url)
404f611f 2958 if code:
109dd3b2 2959 res = self._parse_sig_js(code)
ac668111 2960 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2961 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2962 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2963
2964 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2965
60064c53 2966 def _print_sig_code(self, func, example_sig):
404f611f 2967 if not self.get_param('youtube_print_sig_code'):
2968 return
2969
edf3e38e
PH
2970 def gen_sig_code(idxs):
2971 def _genslice(start, end, step):
78caa52a 2972 starts = '' if start == 0 else str(start)
8bcc8756 2973 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2974 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2975 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2976
2977 step = None
7af808a5
PH
2978 # Quelch pyflakes warnings - start will be set when step is set
2979 start = '(Never used)'
edf3e38e
PH
2980 for i, prev in zip(idxs[1:], idxs[:-1]):
2981 if step is not None:
2982 if i - prev == step:
2983 continue
2984 yield _genslice(start, prev, step)
2985 step = None
2986 continue
2987 if i - prev in [-1, 1]:
2988 step = i - prev
2989 start = prev
2990 continue
2991 else:
78caa52a 2992 yield 's[%d]' % prev
edf3e38e 2993 if step is None:
78caa52a 2994 yield 's[%d]' % i
edf3e38e
PH
2995 else:
2996 yield _genslice(start, i, step)
2997
ac668111 2998 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2999 cache_res = func(test_string)
edf3e38e 3000 cache_spec = [ord(c) for c in cache_res]
78caa52a 3001 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 3002 signature_id_tuple = '(%s)' % (
14f25df2 3003 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 3004 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 3005 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 3006 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 3007
e0df6211
PH
3008 def _parse_sig_js(self, jscode):
3009 funcname = self._search_regex(
abefc03f
S
3010 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3011 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
3012 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3013 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
4823ec9f 3014 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
31ce6e99 3015 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f 3016 # Obsolete patterns
4823ec9f 3017 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 3018 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
3019 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3020 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3021 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f 3022 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 3023 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
3024
3025 jsi = JSInterpreter(jscode)
3026 initial_function = jsi.extract_function(funcname)
e0df6211
PH
3027 return lambda s: initial_function([s])
3028
580ce007 3029 def _cached(self, func, *cache_id):
3030 def inner(*args, **kwargs):
3031 if cache_id not in self._player_cache:
3032 try:
3033 self._player_cache[cache_id] = func(*args, **kwargs)
3034 except ExtractorError as e:
3035 self._player_cache[cache_id] = e
3036 except Exception as e:
3037 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3038
3039 ret = self._player_cache[cache_id]
3040 if isinstance(ret, Exception):
3041 raise ret
3042 return ret
3043 return inner
3044
545cc85d 3045 def _decrypt_signature(self, s, video_id, player_url):
257a2501 3046 """Turn the encrypted s field into a working signature"""
580ce007 3047 extract_sig = self._cached(
3048 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3049 func = extract_sig(video_id, player_url, s)
3050 self._print_sig_code(func, s)
3051 return func(s)
404f611f 3052
3053 def _decrypt_nsig(self, s, video_id, player_url):
3054 """Turn the encrypted n field into a working signature"""
3055 if player_url is None:
3056 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 3057 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 3058
b505e851 3059 try:
3060 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3061 except ExtractorError as e:
3062 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 3063 if self.get_param('youtube_print_sig_code'):
3064 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 3065
25836db6 3066 try:
3067 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3068 ret = extract_nsig(jsi, func_code)(s)
3069 except JSInterpreter.Exception as e:
3070 try:
992dc6b4 3071 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 3072 except ExtractorError:
3073 raise e
3074 self.report_warning(
3075 f'Native nsig extraction failed: Trying with PhantomJS\n'
3076 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 3077 self.write_debug(e, only_once=True)
25836db6 3078
3079 args, func_body = func_code
3080 ret = jsi.execute(
3081 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3082 video_id=video_id, note='Executing signature code').strip()
580ce007 3083
3084 self.write_debug(f'Decrypted nsig {s} => {ret}')
3085 return ret
3086
90a1df30 3087 def _extract_n_function_name(self, jscode):
3088 funcname, idx = self._search_regex(
3089 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3090 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3091 if not idx:
3092 return funcname
3093
3094 return json.loads(js_to_json(self._search_regex(
3095 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
3096 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3097
580ce007 3098 def _extract_n_function_code(self, video_id, player_url):
404f611f 3099 player_id = self._extract_player_info(player_url)
05deb747 3100 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3101 jscode = func_code or self._load_player(video_id, player_url)
3102 jsi = JSInterpreter(jscode)
404f611f 3103
3104 if func_code:
580ce007 3105 return jsi, player_id, func_code
404f611f 3106
b505e851 3107 func_name = self._extract_n_function_name(jscode)
3108
3109 # For redundancy
3110 func_code = self._search_regex(
3111 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3112 # NB: The end of the regex is intentionally kept strict
3113 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3114 jscode, 'nsig function', group=('var', 'code'), default=None)
3115 if func_code:
3116 func_code = ([func_code[0]], func_code[1])
3117 else:
3118 self.write_debug('Extracting nsig function with jsinterp')
3119 func_code = jsi.extract_function_code(func_name)
3120
580ce007 3121 self.cache.store('youtube-nsig', player_id, func_code)
3122 return jsi, player_id, func_code
3123
3124 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3125 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3126
580ce007 3127 def extract_nsig(s):
25836db6 3128 try:
3129 ret = func([s])
3130 except JSInterpreter.Exception:
3131 raise
3132 except Exception as e:
3133 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3134
f6ca640b 3135 if ret.startswith('enhanced_except_'):
25836db6 3136 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3137 return ret
580ce007 3138
3139 return extract_nsig
e0df6211 3140
109dd3b2 3141 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3142 """
3143 Extract signatureTimestamp (sts)
3144 Required to tell API what sig/player version is in use.
3145 """
3146 sts = None
3147 if isinstance(ytcfg, dict):
3148 sts = int_or_none(ytcfg.get('STS'))
3149
3150 if not sts:
3151 # Attempt to extract from player
3152 if player_url is None:
3153 error_msg = 'Cannot extract signature timestamp without player_url.'
3154 if fatal:
3155 raise ExtractorError(error_msg)
3156 self.report_warning(error_msg)
3157 return
404f611f 3158 code = self._load_player(video_id, player_url, fatal=fatal)
3159 if code:
109dd3b2 3160 sts = int_or_none(self._search_regex(
3161 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3162 'JS player signature timestamp', group='sts', fatal=fatal))
3163 return sts
3164
11f9be09 3165 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3166 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3167 label = 'fully ' if is_full else ''
3168 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3169 expected_type=url_or_none)
3170 if not url:
3171 self.report_warning(f'Unable to mark {label}watched')
3172 return
14f25df2 3173 parsed_url = urllib.parse.urlparse(url)
3174 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3175
3176 # cpn generation algorithm is reverse engineered from base.js.
3177 # In fact it works even with dummy cpn.
3178 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3179 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3180
3181 # # more consistent results setting it to right before the end
3182 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3183
3184 qs.update({
3185 'ver': ['2'],
3186 'cpn': [cpn],
3187 'cmt': video_length,
3188 'el': 'detailpage', # otherwise defaults to "shorts"
3189 })
3190
3191 if is_full:
3192 # these seem to mark watchtime "history" in the real world
3193 # they're required, so send in a single value
3194 qs.update({
5318156f 3195 'st': 0,
06cc8f10
B
3196 'et': video_length,
3197 })
3198
14f25df2 3199 url = urllib.parse.urlunparse(
3200 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3201
3202 self._download_webpage(
3203 url, video_id, f'Marking {label}watched',
3204 'Unable to mark watched', fatal=False)
d77ab8e2 3205
bfd973ec 3206 @classmethod
3207 def _extract_from_webpage(cls, url, webpage):
3208 # Invidious Instances
3209 # https://github.com/yt-dlp/yt-dlp/issues/195
3210 # https://github.com/iv-org/invidious/pull/1730
3211 mobj = re.search(
3212 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3213 webpage)
3214 if mobj:
3215 yield cls.url_result(mobj.group('url'), cls)
3216 raise cls.StopExtraction()
3217
3218 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3219
3220 # lazyYT YouTube embed
bfd973ec 3221 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3222 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3223
3224 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3225 for m in re.findall(r'''(?x)<div[^>]+
3226 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3227 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3228 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3229
97665381
PH
3230 @classmethod
3231 def extract_id(cls, url):
ae61d108 3232 video_id = cls.get_temp_id(url)
3233 if not video_id:
3234 raise ExtractorError(f'Invalid URL: {url}')
3235 return video_id
c5e8d7af 3236
7c365c21 3237 def _extract_chapters_from_json(self, data, duration):
3238 chapter_list = traverse_obj(
3239 data, (
3240 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3241 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3242 ), expected_type=list)
3243
22ccd542 3244 return self._extract_chapters_helper(
7c365c21 3245 chapter_list,
22ccd542 3246 start_function=lambda chapter: float_or_none(
7c365c21 3247 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
22ccd542 3248 title_function=lambda chapter: traverse_obj(
7c365c21 3249 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3250 duration=duration)
3251
3252 def _extract_chapters_from_engagement_panel(self, data, duration):
3253 content_list = traverse_obj(
8bdd16b4 3254 data,
7c365c21 3255 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3256 expected_type=list)
052e1350 3257 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3258 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3259
1890fc63 3260 return next(filter(None, (
22ccd542 3261 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3262 chapter_time, chapter_title, duration)
1890fc63 3263 for contents in content_list)), [])
7c365c21 3264
5caf30db
A
3265 def _extract_heatmap_from_player_overlay(self, data):
3266 content_list = traverse_obj(data, (
3267 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
3268 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
3269 return next(filter(None, (
3270 traverse_obj(contents, (..., 'heatMarkerRenderer', {
3271 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
3272 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
3273 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
3274 })) for contents in content_list)), None)
3275
a1c5d2ca
M
3276 def _extract_comment(self, comment_renderer, parent=None):
3277 comment_id = comment_renderer.get('commentId')
3278 if not comment_id:
3279 return
fe93e2c4 3280
c35448b7 3281 info = {
3282 'id': comment_id,
3283 'text': self._get_text(comment_renderer, 'contentText'),
3284 'like_count': self._get_count(comment_renderer, 'voteCount'),
3285 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3286 'author': self._get_text(comment_renderer, 'authorText'),
3287 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3288 'parent': parent or 'root',
3289 }
fe93e2c4 3290
c26f9b99 3291 # Timestamp is an estimate calculated from the current time and time_text
3292 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3293 timestamp = self._parse_time_text(time_text)
3294
c35448b7 3295 info.update({
3296 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3297 '_time_text': time_text,
3298 'timestamp': timestamp,
3299 })
fe93e2c4 3300
c35448b7 3301 info['author_url'] = urljoin(
3302 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3303 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3304 expected_type=str, get_all=False))
a1c5d2ca 3305
c35448b7 3306 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3307 if author_is_uploader is not None:
3308 info['author_is_uploader'] = author_is_uploader
3309
3310 comment_abr = traverse_obj(
3311 comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3312 if comment_abr is not None:
3313 info['is_favorited'] = 'creatorHeart' in comment_abr
3314
3315 comment_ab_icontype = traverse_obj(
3316 comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
3317 if comment_ab_icontype is not None:
3318 info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
3319
3320 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3321 if is_pinned:
3322 info['is_pinned'] = True
3323
3324 return info
a1c5d2ca 3325
46383212 3326 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3327
3328 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3329
3330 def extract_header(contents):
2d6659b9 3331 _continuation = None
3332 for content in contents:
46383212 3333 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3334 expected_comment_count = self._get_count(
3335 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3336
18f8fba7 3337 if expected_comment_count is not None:
46383212 3338 tracker['est_total'] = expected_comment_count
3339 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3340 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3341
3342 sort_menu_item = try_get(
3343 comments_header_renderer,
3344 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3345 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3346
3347 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3348 if not _continuation:
3349 continue
3350
46383212 3351 sort_text = str_or_none(sort_menu_item.get('title'))
3352 if not sort_text:
2d6659b9 3353 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3354 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3355 break
a2160aa4 3356 return _continuation
a1c5d2ca 3357
2d6659b9 3358 def extract_thread(contents):
a1c5d2ca 3359 if not parent:
46383212 3360 tracker['current_page_thread'] = 0
a1c5d2ca 3361 for content in contents:
46383212 3362 if not parent and tracker['total_parent_comments'] >= max_parents:
3363 yield
a1c5d2ca 3364 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3365 comment_renderer = get_first(
3366 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3367 expected_type=dict, default={})
a1c5d2ca 3368
a1c5d2ca
M
3369 comment = self._extract_comment(comment_renderer, parent)
3370 if not comment:
3371 continue
141a8dff 3372 comment_id = comment['id']
c35448b7 3373 if comment.get('is_pinned'):
141a8dff 3374 tracker['pinned_comment_ids'].add(comment_id)
7f51861b 3375 # Sometimes YouTube may break and give us infinite looping comments.
3376 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
141a8dff 3377 if comment_id in tracker['seen_comment_ids']:
c35448b7 3378 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
141a8dff 3379 # Pinned comments may appear a second time in newest first sort
3380 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3381 continue
7f51861b 3382 self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
3383 yield
3384 else:
3385 tracker['seen_comment_ids'].add(comment['id'])
46383212 3386
3387 tracker['running_total'] += 1
3388 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3389 yield comment
46383212 3390
a1c5d2ca
M
3391 # Attempt to get the replies
3392 comment_replies_renderer = try_get(
3393 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3394
3395 if comment_replies_renderer:
46383212 3396 tracker['current_page_thread'] += 1
a1c5d2ca 3397 comment_entries_iter = self._comment_entries(
99e9e001 3398 comment_replies_renderer, ytcfg, video_id,
46383212 3399 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3400 yield from itertools.islice(comment_entries_iter, min(
3401 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3402
46383212 3403 # Keeps track of counts across recursive calls
3404 if not tracker:
3405 tracker = dict(
3406 running_total=0,
18f8fba7 3407 est_total=None,
46383212 3408 current_page_thread=0,
3409 total_parent_comments=0,
7f51861b 3410 total_reply_comments=0,
141a8dff 3411 seen_comment_ids=set(),
3412 pinned_comment_ids=set()
3413 )
46383212 3414
3415 # TODO: Deprecated
2d6659b9 3416 # YouTube comments have a max depth of 2
46383212 3417 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3418 if max_depth:
da4db748 3419 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3420 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3421 if max_depth == 1 and parent:
3422 return
a1c5d2ca 3423
46383212 3424 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3425 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3426
46383212 3427 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3428
46383212 3429 response = None
6e634cbe 3430 is_forced_continuation = False
2d6659b9 3431 is_first_continuation = parent is None
6e634cbe 3432 if is_first_continuation and not continuation:
3433 # Sometimes you can get comments by generating the continuation yourself,
3434 # even if YouTube initially reports them being disabled - e.g. stories comments.
3435 # Note: if the comment section is actually disabled, YouTube may return a response with
3436 # required check_get_keys missing. So we will disable that check initially in this case.
3437 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3438 is_forced_continuation = True
a1c5d2ca 3439
18f8fba7 3440 continuation_items_path = (
3441 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
a1c5d2ca
M
3442 for page_num in itertools.count(0):
3443 if not continuation:
3444 break
46383212 3445 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
18f8fba7 3446 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
2d6659b9 3447 if page_num == 0:
3448 if is_first_continuation:
3449 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3450 else:
2d6659b9 3451 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3452 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3453 else:
3454 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3455 ' ' if parent else '', ' replies' if parent else '',
3456 page_num, comment_prog_str)
18f8fba7 3457
3458 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3459 # Ignore check if YouTube says the comment count is 0.
3460 check_get_keys = None
3461 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3462 check_get_keys = [[*continuation_items_path, ..., (
3463 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
e72e48c5
M
3464 try:
3465 response = self._extract_response(
3466 item_id=None, query=continuation,
3467 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
18f8fba7 3468 check_get_keys=check_get_keys)
e72e48c5
M
3469 except ExtractorError as e:
3470 # Ignore incomplete data error for replies if retries didn't work.
3471 # This is to allow any other parent comments and comment threads to be downloaded.
3472 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3473 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3474 self.report_warning(
3475 'Received incomplete data for a comment reply thread and retrying did not help. '
3476 'Ignoring to let other comments be downloaded.')
3477 else:
3478 raise
6e634cbe 3479 is_forced_continuation = False
2d6659b9 3480 continuation = None
18f8fba7 3481 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
46383212 3482 if is_first_continuation:
3483 continuation = extract_header(continuation_items)
3484 is_first_continuation = False
2d6659b9 3485 if continuation:
a1c5d2ca 3486 break
46383212 3487 continue
a1c5d2ca 3488
46383212 3489 for entry in extract_thread(continuation_items):
3490 if not entry:
3491 return
3492 yield entry
3493 continuation = self._extract_continuation({'contents': continuation_items})
3494 if continuation:
2d6659b9 3495 break
a1c5d2ca 3496
6e634cbe 3497 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3498 if message and not parent and tracker['running_total'] == 0:
3499 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3500 raise self.CommentsDisabled
6e634cbe 3501
3502 @staticmethod
3503 def _generate_comment_continuation(video_id):
3504 """
3505 Generates initial comment section continuation token from given video id
3506 """
3507 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3508 return base64.b64encode(token.encode()).decode()
3509
a2160aa4 3510 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3511 """Entry for comment extraction"""
2d6659b9 3512 def _real_comment_extract(contents):
aae16f6e 3513 renderer = next((
3514 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3515 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3516 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3517
a2160aa4 3518 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3519 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3520
109dd3b2 3521 @staticmethod
99e9e001 3522 def _get_checkok_params():
3523 return {'contentCheckOk': True, 'racyCheckOk': True}
3524
3525 @classmethod
3526 def _generate_player_context(cls, sts=None):
109dd3b2 3527 context = {
3528 'html5Preference': 'HTML5_PREF_WANTS',
3529 }
3530 if sts is not None:
3531 context['signatureTimestamp'] = sts
3532 return {
3533 'playbackContext': {
3534 'contentPlaybackContext': context
a1a7907b 3535 },
99e9e001 3536 **cls._get_checkok_params()
109dd3b2 3537 }
3538
e7e94f2a
D
3539 @staticmethod
3540 def _is_agegated(player_response):
3541 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3542 return True
e7e94f2a 3543
6839ae1f 3544 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3545 AGE_GATE_REASONS = (
3546 'confirm your age', 'age-restricted', 'inappropriate', # reason
3547 'age_verification_required', 'age_check_required', # status
3548 )
3549 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3550
3551 @staticmethod
3552 def _is_unplayable(player_response):
3553 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3554
50ac0e54 3555 _STORY_PLAYER_PARAMS = '8AEB'
3556
3557 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3558
11f9be09 3559 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3560 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3561 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3562 headers = self.generate_api_headers(
99e9e001 3563 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3564
6e634cbe 3565 yt_query = {
3566 'videoId': video_id,
6e634cbe 3567 }
50ac0e54 3568 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3569 yt_query['params'] = self._STORY_PLAYER_PARAMS
3570
11f9be09 3571 yt_query.update(self._generate_player_context(sts))
3572 return self._extract_response(
3573 item_id=video_id, ep='player', query=yt_query,
379e44ed 3574 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3575 default_client=client,
11f9be09 3576 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3577 ) or None
3578
11f9be09 3579 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3580 requested_clients = []
d0d012d4 3581 default = ['android', 'web']
000c15a4 3582 allowed_clients = sorted(
86e5f3ed 3583 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3584 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3585 for client in self._configuration_arg('player_client'):
3586 if client in allowed_clients:
3587 requested_clients.append(client)
d0d012d4 3588 elif client == 'default':
3589 requested_clients.extend(default)
b4c055ba 3590 elif client == 'all':
3591 requested_clients.extend(allowed_clients)
3592 else:
3593 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3594 if not requested_clients:
d0d012d4 3595 requested_clients = default
cf7e015f 3596
11f9be09 3597 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3598 requested_clients.extend(
e7e94f2a 3599 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3600
11f9be09 3601 return orderedSet(requested_clients)
cf7e015f 3602
50ac0e54 3603 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3604 initial_pr = None
3605 if webpage:
b7c47b74 3606 initial_pr = self._search_json(
3607 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3608
ae729626 3609 all_clients = set(clients)
c0bc527b 3610 clients = clients[::-1]
b6de707d 3611 prs = []
e7e94f2a 3612
ae729626 3613 def append_client(*client_names):
e7870111 3614 """ Append the first client name that exists but not already used """
ae729626 3615 for client_name in client_names:
e7870111
D
3616 actual_client = _split_innertube_client(client_name)[0]
3617 if actual_client in INNERTUBE_CLIENTS:
3618 if actual_client not in all_clients:
ae729626 3619 clients.append(client_name)
e7870111
D
3620 all_clients.add(actual_client)
3621 return
e7e94f2a 3622
379e44ed 3623 # Android player_response does not have microFormats which are needed for
3624 # extraction of some data. So we return the initial_pr with formats
3625 # stripped out even if not requested by the user
3626 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3627 if initial_pr:
3628 pr = dict(initial_pr)
3629 pr['streamingData'] = None
b6de707d 3630 prs.append(pr)
379e44ed 3631
3632 last_error = None
b6de707d 3633 tried_iframe_fallback = False
3634 player_url = None
c0bc527b 3635 while clients:
e7870111 3636 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3637 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3638 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3639 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3640
b6de707d 3641 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3642 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3643 if 'js' in self._configuration_arg('player_skip'):
3644 require_js_player = False
3645 player_url = None
3646
3647 if not player_url and not tried_iframe_fallback and require_js_player:
3648 player_url = self._download_player_url(video_id)
3649 tried_iframe_fallback = True
3650
379e44ed 3651 try:
3652 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3653 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3654 except ExtractorError as e:
3655 if last_error:
3656 self.report_warning(last_error)
3657 last_error = e
3658 continue
3659
11f9be09 3660 if pr:
a3e96421 3661 # YouTube may return a different video player response than expected.
3662 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3663 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3664 if pr_video_id and pr_video_id != video_id:
3665 self.report_warning(
c7dcf0b3 3666 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3667 else:
c795c39f
L
3668 # Save client name for introspection later
3669 name = short_client_name(client)
3670 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3671 sd[STREAMING_DATA_CLIENT_NAME] = name
3672 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3673 f[STREAMING_DATA_CLIENT_NAME] = name
a3e96421 3674 prs.append(pr)
c0bc527b 3675
e7e94f2a 3676 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3677 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3678 append_client(f'{base_client}_creator')
e7e94f2a 3679 elif self._is_agegated(pr):
e7870111
D
3680 if variant == 'tv_embedded':
3681 append_client(f'{base_client}_embedded')
3682 elif not variant:
3683 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3684
379e44ed 3685 if last_error:
b6de707d 3686 if not len(prs):
379e44ed 3687 raise last_error
3688 self.report_warning(last_error)
b6de707d 3689 return prs, player_url
11f9be09 3690
4d37720a
L
3691 def _needs_live_processing(self, live_status, duration):
3692 if (live_status == 'is_live' and self.get_param('live_from_start')
3693 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3694 return live_status
3695
3696 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
e389d172 3697 CHUNK_SIZE = 10 << 20
a4894d3e 3698 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3699 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3700 q = qualities([
2a9c6dcd 3701 # Normally tiny is the smallest video-only formats. But
3702 # audio-only formats with unknown quality may get tagged as tiny
3703 'tiny',
3704 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3705 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3706 ])
6839ae1f 3707 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
86cb9221 3708 all_formats = self._configuration_arg('include_duplicate_formats')
9297939e 3709
e389d172 3710 def build_fragments(f):
3711 return LazyList({
3712 'url': update_url_query(f['url'], {
3713 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
3714 })
3715 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3716
545cc85d 3717 for fmt in streaming_formats:
727029c5 3718 if fmt.get('targetDurationSec'):
545cc85d 3719 continue
321bf820 3720
cc2db878 3721 itag = str_or_none(fmt.get('itag'))
9297939e 3722 audio_track = fmt.get('audioTrack') or {}
9bb85699 3723 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
86cb9221 3724 if not all_formats:
3725 if stream_id in stream_ids:
3726 continue
9297939e 3727
cc2db878 3728 quality = fmt.get('quality')
2a9c6dcd 3729 height = int_or_none(fmt.get('height'))
d3fc8074 3730 if quality == 'tiny' or not quality:
3731 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3732 # The 3gp format (17) in android client has a quality of "small",
3733 # but is actually worse than other formats
3734 if itag == '17':
3735 quality = 'tiny'
3736 if quality:
3737 if itag:
3738 itag_qualities[itag] = quality
3739 if height:
3740 res_qualities[height] = quality
cc2db878 3741 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3742 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3743 # number of fragment that would subsequently requested with (`&sq=N`)
3744 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3745 continue
3746
545cc85d 3747 fmt_url = fmt.get('url')
3748 if not fmt_url:
14f25df2 3749 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3750 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3751 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3752 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3753 continue
52023f12 3754 try:
3755 fmt_url += '&%s=%s' % (
3756 traverse_obj(sc, ('sp', -1)) or 'signature',
3757 self._decrypt_signature(encrypted_sig, video_id, player_url)
3758 )
3759 except ExtractorError as e:
580ce007 3760 self.report_warning('Signature extraction failed: Some formats may be missing',
3761 video_id=video_id, only_once=True)
52023f12 3762 self.write_debug(e, only_once=True)
201e9eaa 3763 continue
545cc85d 3764
404f611f 3765 query = parse_qs(fmt_url)
3766 throttled = False
b2916526 3767 if query.get('n'):
404f611f 3768 try:
580ce007 3769 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3770 fmt_url = update_url_query(fmt_url, {
580ce007 3771 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3772 })
404f611f 3773 except ExtractorError as e:
25836db6 3774 phantomjs_hint = ''
3775 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3776 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3777 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3778 if player_url:
3779 self.report_warning(
3780 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3781 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3782 self.write_debug(e, only_once=True)
3783 else:
3784 self.report_warning(
3785 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3786 video_id=video_id, only_once=True)
404f611f 3787 throttled = True
3788
0ad92dfb 3789 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3790 language_preference = (
3791 10 if audio_track.get('audioIsDefault') and 10
3792 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3793 else -1)
0ad92dfb 3794 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3795 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3796 # Make sure to avoid false positives with small duration differences.
62b58c09 3797 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3798 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3799 if is_damaged:
0f06bcd7 3800 self.report_warning(
3801 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
c795c39f
L
3802
3803 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
545cc85d 3804 dct = {
3805 'asr': int_or_none(fmt.get('audioSampleRate')),
3806 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3807 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3808 'format_note': join_nonempty(
392389b7 3809 join_nonempty(audio_track.get('displayName'),
3810 language_preference > 0 and ' (default)', delim=''),
404f611f 3811 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
392389b7 3812 fmt.get('isDrc') and 'DRC',
a4166234 3813 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3814 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
c795c39f 3815 throttled and 'THROTTLED', is_damaged and 'DAMAGED',
86cb9221 3816 (self.get_param('verbose') or all_formats) and client_name,
c795c39f 3817 delim=', '),
91e5e839 3818 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3819 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3820 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3821 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3822 'height': height,
9bb85699 3823 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3824 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3825 'tbr': tbr,
545cc85d 3826 'url': fmt_url,
2a9c6dcd 3827 'width': int_or_none(fmt.get('width')),
ab6df717 3828 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3829 'desc' if language_preference < -1 else '') or None,
ab6df717 3830 'language_preference': language_preference,
a405b38f 3831 # Strictly de-prioritize damaged and 3gp formats
3832 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3833 }
60bdb7bd 3834 mime_mobj = re.match(
3835 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3836 if mime_mobj:
3837 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3838 dct.update(parse_codecs(mime_mobj.group(2)))
86cb9221 3839 if itag:
3840 itags[itag].add(('https', dct.get('language')))
3841 stream_ids.append(stream_id)
c9abebb8 3842 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3843 if single_stream and dct.get('ext'):
3844 dct['container'] = dct['ext'] + '_dash'
86cb9221 3845
c6786ff3 3846 if all_formats and dct['filesize']:
86cb9221 3847 yield {
3848 **dct,
3849 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
5038f6d7 3850 'protocol': 'http_dash_segments',
e389d172 3851 'fragments': build_fragments(dct),
86cb9221 3852 }
86cb9221 3853 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
11f9be09 3854 yield dct
545cc85d 3855
4d37720a
L
3856 needs_live_processing = self._needs_live_processing(live_status, duration)
3857 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3858
3859 skip_manifests = set(self._configuration_arg('skip'))
3860 if (not self.get_param('youtube_include_hls_manifest', True)
3861 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3862 or needs_live_processing and skip_bad_formats):
3863 skip_manifests.add('hls')
3864
0f06bcd7 3865 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3866 skip_manifests.add('dash')
3867 if self._configuration_arg('include_live_dash'):
3868 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3869 'Use include_incomplete_formats extractor argument instead')
3870 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3871 skip_manifests.add('dash')
5d3a0e79 3872
c795c39f 3873 def process_manifest_format(f, proto, client_name, itag):
a4894d3e 3874 key = (proto, f.get('language'))
86cb9221 3875 if not all_formats and key in itags[itag]:
a4894d3e 3876 return False
3877 itags[itag].add(key)
3878
86cb9221 3879 if itag and all_formats:
3880 f['format_id'] = f'{itag}-{proto}'
3881 elif any(p != proto for p, _ in itags[itag]):
a4894d3e 3882 f['format_id'] = f'{itag}-{proto}'
3883 elif itag:
a0bb6ce5 3884 f['format_id'] = itag
a0bb6ce5 3885
b25cac65 3886 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3887 if f['quality'] == -1 and f.get('height'):
3888 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
c795c39f
L
3889 if self.get_param('verbose'):
3890 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
a0bb6ce5 3891 return True
2a9c6dcd 3892
c646d76f 3893 subtitles = {}
11f9be09 3894 for sd in streaming_data:
c795c39f
L
3895 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
3896
4d37720a 3897 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3898 if hls_manifest_url:
4d37720a
L
3899 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3900 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3901 subtitles = self._merge_subtitles(subs, subtitles)
3902 for f in fmts:
c795c39f 3903 if process_manifest_format(f, 'hls', client_name, self._search_regex(
a0bb6ce5 3904 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3905 yield f
545cc85d 3906
4d37720a 3907 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3908 if dash_manifest_url:
c646d76f 3909 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3910 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3911 for f in formats:
c795c39f 3912 if process_manifest_format(f, 'dash', client_name, f['format_id']):
a0bb6ce5 3913 f['filesize'] = int_or_none(self._search_regex(
3914 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3915 if needs_live_processing:
adbc4ec4
THD
3916 f['is_from_start'] = True
3917
a0bb6ce5 3918 yield f
c646d76f 3919 yield subtitles
11f9be09 3920
720c3099 3921 def _extract_storyboard(self, player_responses, duration):
3922 spec = get_first(
3923 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3924 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3925 if not base_url:
720c3099 3926 return
720c3099 3927 L = len(spec) - 1
3928 for i, args in enumerate(spec):
3929 args = args.split('#')
3930 counts = list(map(int_or_none, args[:5]))
3931 if len(args) != 8 or not all(counts):
3932 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3933 continue
3934 width, height, frame_count, cols, rows = counts
3935 N, sigh = args[6:]
3936
3937 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3938 fragment_count = frame_count / (cols * rows)
3939 fragment_duration = duration / fragment_count
3940 yield {
3941 'format_id': f'sb{i}',
3942 'format_note': 'storyboard',
3943 'ext': 'mhtml',
3944 'protocol': 'mhtml',
3945 'acodec': 'none',
3946 'vcodec': 'none',
3947 'url': url,
3948 'width': width,
3949 'height': height,
45e8a04e 3950 'fps': frame_count / duration,
3951 'rows': rows,
3952 'columns': cols,
720c3099 3953 'fragments': [{
b3edc806 3954 'url': url.replace('$M', str(j)),
720c3099 3955 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3956 } for j in range(math.ceil(fragment_count))],
3957 }
3958
adbc4ec4 3959 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3960 webpage = None
3961 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3962 query = {'bpctr': '9999999999', 'has_verified': '1'}
3963 if smuggled_data.get('is_story'):
3964 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3965 webpage = self._download_webpage(
50ac0e54 3966 webpage_url, video_id, fatal=False, query=query)
11f9be09 3967
3968 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3969
b6de707d 3970 player_responses, player_url = self._extract_player_responses(
11f9be09 3971 self._get_requested_clients(url, smuggled_data),
50ac0e54 3972 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3973
adbc4ec4
THD
3974 return webpage, master_ytcfg, player_responses, player_url
3975
a1b2d843 3976 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3977 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3978 is_live = get_first(video_details, 'isLive')
3979 if is_live is None:
3980 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
3981 live_content = get_first(video_details, 'isLiveContent')
3982 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
3983 post_live = get_first(video_details, 'isPostLiveDvr')
3984 live_status = ('post_live' if post_live
3985 else 'is_live' if is_live
3986 else 'is_upcoming' if is_upcoming
6678a4f0 3987 else 'was_live' if live_content
3988 else 'not_live' if False in (is_live, live_content)
3989 else None)
6839ae1f 3990 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 3991 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
adbc4ec4 3992
4d37720a 3993 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
3994
3995 def _real_extract(self, url):
3996 url, smuggled_data = unsmuggle_url(url, {})
3997 video_id = self._match_id(url)
3998
3999 base_url = self.http_scheme() + '//www.youtube.com/'
4000 webpage_url = base_url + 'watch?v=' + video_id
4001
4002 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4003
11f9be09 4004 playability_statuses = traverse_obj(
6839ae1f 4005 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 4006
4007 trailer_video_id = get_first(
4008 playability_statuses,
4009 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4010 expected_type=str)
4011 if trailer_video_id:
4012 return self.url_result(
4013 trailer_video_id, self.ie_key(), trailer_video_id)
4014
4015 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4016 if webpage else (lambda x: None))
4017
6839ae1f 4018 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 4019 microformats = traverse_obj(
4020 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 4021 expected_type=dict)
c26f9b99 4022
4023 translated_title = self._get_text(microformats, (..., 'title'))
4024 video_title = (self._preferred_lang and translated_title
4025 or get_first(video_details, 'title') # primary
4026 or translated_title
4027 or search_meta(['og:title', 'twitter:title', 'title']))
4028 translated_description = self._get_text(microformats, (..., 'description'))
4029 original_description = get_first(video_details, 'shortDescription')
4030 video_description = (
4031 self._preferred_lang and translated_description
4032 # If original description is blank, it will be an empty string.
4033 # Do not prefer translated description in this case.
4034 or original_description if original_description is not None else translated_description)
11f9be09 4035
d89257f3 4036 multifeed_metadata_list = get_first(
4037 player_responses,
4038 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4039 expected_type=str)
4040 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4041 if self.get_param('noplaylist'):
11f9be09 4042 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 4043 else:
4044 entries = []
4045 feed_ids = []
4046 for feed in multifeed_metadata_list.split(','):
4047 # Unquote should take place before split on comma (,) since textual
4048 # fields may contain comma as well (see
4049 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 4050 feed_data = urllib.parse.parse_qs(
ac668111 4051 urllib.parse.unquote_plus(feed))
d89257f3 4052
4053 def feed_entry(name):
4054 return try_get(
14f25df2 4055 feed_data, lambda x: x[name][0], str)
d89257f3 4056
4057 feed_id = feed_entry('id')
4058 if not feed_id:
4059 continue
4060 feed_title = feed_entry('title')
4061 title = video_title
4062 if feed_title:
4063 title += ' (%s)' % feed_title
4064 entries.append({
4065 '_type': 'url_transparent',
4066 'ie_key': 'Youtube',
4067 'url': smuggle_url(
4068 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
4069 {'force_singlefeed': True}),
4070 'title': title,
4071 })
4072 feed_ids.append(feed_id)
4073 self.to_screen(
4074 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
4075 % (', '.join(feed_ids), video_id))
4076 return self.playlist_result(
4077 entries, video_id, video_title, video_description)
11f9be09 4078
9da6612b 4079 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4080 or int_or_none(get_first(microformats, 'lengthSeconds'))
4081 or parse_duration(search_meta('duration')) or None)
a1b2d843 4082
4d37720a
L
4083 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4084 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4085 if live_status == 'post_live':
4086 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 4087
545cc85d 4088 if not formats:
11f9be09 4089 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 4090 self.report_drm(video_id)
11f9be09 4091 pemr = get_first(
4092 playability_statuses,
4093 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4094 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4095 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 4096 if subreason:
545cc85d 4097 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 4098 countries = get_first(microformats, 'availableCountries')
545cc85d 4099 if not countries:
4100 regions_allowed = search_meta('regionsAllowed')
4101 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 4102 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 4103 reason += f'. {subreason}'
545cc85d 4104 if reason:
b7da73eb 4105 self.raise_no_formats(reason, expected=True)
bf1317d2 4106
11f9be09 4107 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 4108 if not keywords and webpage:
4109 keywords = [
4110 unescapeHTML(m.group('content'))
4111 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4112 for keyword in keywords:
4113 if keyword.startswith('yt:stretch='):
201c1459 4114 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4115 if mobj:
4116 # NB: float is intentional for forcing float division
4117 w, h = (float(v) for v in mobj.groups())
4118 if w > 0 and h > 0:
4119 ratio = w / h
4120 for f in formats:
4121 if f.get('vcodec') != 'none':
4122 f['stretched_ratio'] = ratio
4123 break
a709d873 4124 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4125 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4126 if thumbnail_url:
4127 thumbnails.append({
4128 'url': thumbnail_url,
ff2751ac 4129 })
fccf5021 4130 original_thumbnails = thumbnails.copy()
4131
0ba692ac 4132 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4133 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4134 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4135 thumbnail_names = [
962ffcf8 4136 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4137 # in resolution, these are not the custom thumbnail. So de-prioritize them
4138 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4139 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 4140 ]
cca80fe6 4141 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4142 thumbnails.extend({
4143 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4144 video_id=video_id, name=name, ext=ext,
4d37720a 4145 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4146 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4147 for thumb in thumbnails:
cca80fe6 4148 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4149 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4150 self._remove_duplicate_formats(thumbnails)
fccf5021 4151 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4152
7ea65411 4153 category = get_first(microformats, 'category') or search_meta('genre')
7666b936 4154 channel_id = self.ucid_or_none(str_or_none(
7ea65411 4155 get_first(video_details, 'channelId')
4156 or get_first(microformats, 'externalChannelId')
7666b936 4157 or search_meta('channelId')))
7ea65411 4158 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4159
adbc4ec4
THD
4160 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4161 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4162 if not duration and live_end_time and live_start_time:
4163 duration = live_end_time - live_start_time
4164
4d37720a
L
4165 needs_live_processing = self._needs_live_processing(live_status, duration)
4166
4167 def is_bad_format(fmt):
4168 if needs_live_processing and not fmt.get('is_from_start'):
4169 return True
4170 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4171 and fmt.get('protocol') == 'http_dash_segments'):
4172 return True
4173
4174 for fmt in filter(is_bad_format, formats):
4175 fmt['preference'] = (fmt.get('preference') or -1) - 10
4176 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
4177
4178 if needs_live_processing:
4179 self._prepare_live_from_start_formats(
4180 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4181
720c3099 4182 formats.extend(self._extract_storyboard(player_responses, duration))
4183
7666b936 4184 channel_handle = self.handle_from_url(owner_profile_url)
4185
545cc85d 4186 info = {
4187 'id': video_id,
39ca3b5c 4188 'title': video_title,
545cc85d 4189 'formats': formats,
4190 'thumbnails': thumbnails,
fccf5021 4191 # The best thumbnail that we are sure exists. Prevents unnecessary
4192 # URL checking if user don't care about getting the best possible thumbnail
4193 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4194 'description': video_description,
545cc85d 4195 'channel_id': channel_id,
7666b936 4196 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
545cc85d 4197 'duration': duration,
4198 'view_count': int_or_none(
11f9be09 4199 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4200 or search_meta('interactionCount')),
11f9be09 4201 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4202 'age_limit': 18 if (
11f9be09 4203 get_first(microformats, 'isFamilySafe') is False
545cc85d 4204 or search_meta('isFamilyFriendly') == 'false'
4205 or search_meta('og:restrictions:age') == '18+') else 0,
4206 'webpage_url': webpage_url,
4207 'categories': [category] if category else None,
4208 'tags': keywords,
11f9be09 4209 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4210 'live_status': live_status,
adbc4ec4 4211 'release_timestamp': live_start_time,
9f14daf2 4212 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4213 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
545cc85d 4214 }
b477fc13 4215
c646d76f 4216 subtitles = {}
3944e7af 4217 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4218 if pctr:
ecdc9049 4219 def get_lang_code(track):
4220 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4221 or track.get('languageCode'))
4222
4223 # Converted into dicts to remove duplicates
4224 captions = {
4225 get_lang_code(sub): sub
6839ae1f 4226 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4227 translation_languages = {
4228 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4229 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4230
774d79cc 4231 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4232 lang_subs = container.setdefault(lang_code, [])
545cc85d 4233 for fmt in self._SUBTITLE_FORMATS:
4234 query.update({
4235 'fmt': fmt,
4236 })
4237 lang_subs.append({
4238 'ext': fmt,
60f393e4 4239 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4240 'name': sub_name,
545cc85d 4241 })
7e72694b 4242
07b47084 4243 # NB: Constructing the full subtitle dictionary is slow
4244 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4245 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4246 for lang_code, caption_track in captions.items():
4247 base_url = caption_track.get('baseUrl')
1235d333 4248 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4249 if not base_url:
4250 continue
ecdc9049 4251 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4252 if caption_track.get('kind') != 'asr':
545cc85d 4253 if not lang_code:
4254 continue
4255 process_language(
ecdc9049 4256 subtitles, base_url, lang_code, lang_name, {})
4257 if not caption_track.get('isTranslatable'):
4258 continue
3944e7af 4259 for trans_code, trans_name in translation_languages.items():
4260 if not trans_code:
545cc85d 4261 continue
1235d333 4262 orig_trans_code = trans_code
71eb82d1 4263 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4264 if not get_translated_subs:
18e49408 4265 continue
ecdc9049 4266 trans_code += f'-{lang_code}'
a70635b8 4267 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 4268 # Add an "-orig" label to the original language so that it can be distinguished.
4269 # The subs are returned without "-orig" as well for compatibility
1235d333 4270 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 4271 process_language(
d49669ac 4272 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4273 # Setting tlang=lang returns damaged subtitles.
d49669ac 4274 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4275 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4276
4277 info['automatic_captions'] = automatic_captions
4278 info['subtitles'] = subtitles
7e72694b 4279
14f25df2 4280 parsed_url = urllib.parse.urlparse(url)
545cc85d 4281 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4282 query = urllib.parse.parse_qs(component)
545cc85d 4283 for k, v in query.items():
4284 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4285 d_k += '_time'
4286 if d_k not in info and k in s_ks:
4287 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4288
4289 # Youtube Music Auto-generated description
822b9d9c 4290 if video_description:
1890fc63 4291 mobj = re.search(
4292 r'''(?xs)
4293 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4294 (?P<album>[^\n]+)
4295 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4296 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4297 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4298 .+\nAuto-generated\ by\ YouTube\.\s*$
4299 ''', video_description)
822b9d9c 4300 if mobj:
822b9d9c
RA
4301 release_year = mobj.group('release_year')
4302 release_date = mobj.group('release_date')
4303 if release_date:
4304 release_date = release_date.replace('-', '')
4305 if not release_year:
545cc85d 4306 release_year = release_date[:4]
4307 info.update({
4308 'album': mobj.group('album'.strip()),
4309 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4310 'track': mobj.group('track').strip(),
4311 'release_date': release_date,
cc2db878 4312 'release_year': int_or_none(release_year),
545cc85d 4313 })
7e72694b 4314
545cc85d 4315 initial_data = None
4316 if webpage:
56ba69e4 4317 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
607510b9 4318 if not traverse_obj(initial_data, 'contents'):
4319 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4320 initial_data = None
545cc85d 4321 if not initial_data:
99e9e001 4322 query = {'videoId': video_id}
4323 query.update(self._get_checkok_params())
109dd3b2 4324 initial_data = self._extract_response(
4325 item_id=video_id, ep='next', fatal=False,
607510b9 4326 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
99e9e001 4327 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4328 note='Downloading initial data API JSON')
545cc85d 4329
0df111a3 4330 info['comment_count'] = traverse_obj(initial_data, (
4331 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
071670cb 4332 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
0df111a3 4333 ), (
4334 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
071670cb
ND
4335 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
4336 ), expected_type=self._get_count, get_all=False)
0df111a3 4337
19a03940 4338 try: # This will error if there is no livechat
c60ee3a2 4339 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4340 except (KeyError, IndexError, TypeError):
4341 pass
4342 else:
ecdc9049 4343 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4344 # url is needed to set cookies
4345 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4346 'video_id': video_id,
4347 'ext': 'json',
4d37720a
L
4348 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4349 else 'youtube_live_chat_replay'),
c60ee3a2 4350 }]
545cc85d 4351
4352 if initial_data:
7c365c21 4353 info['chapters'] = (
4354 self._extract_chapters_from_json(initial_data, duration)
4355 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4356 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4357 or None)
545cc85d 4358
5caf30db
A
4359 info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
4360
17322130 4361 contents = traverse_obj(
4362 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4363 expected_type=list, default=[])
4364
4365 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4366 if vpir:
4367 stl = vpir.get('superTitleLink')
4368 if stl:
4369 stl = self._get_text(stl)
4370 if try_get(
4371 vpir,
4372 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4373 info['location'] = stl
4374 else:
affc4fef 4375 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4376 if mobj:
545cc85d 4377 info.update({
17322130 4378 'series': mobj.group(1),
4379 'season_number': int(mobj.group(2)),
4380 'episode_number': int(mobj.group(3)),
545cc85d 4381 })
17322130 4382 for tlb in (try_get(
4383 vpir,
4384 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4385 list) or []):
3ffb2f5b 4386 tbrs = variadic(
4387 traverse_obj(
6839ae1f
SS
4388 tlb, ('toggleButtonRenderer', ...),
4389 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4390 for tbr in tbrs:
4391 for getter, regex in [(
4392 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4393 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4394 lambda x: x['accessibility'],
4395 lambda x: x['accessibilityData']['accessibilityData'],
4396 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4397 label = (try_get(tbr, getter, dict) or {}).get('label')
4398 if label:
4399 mobj = re.match(regex, label)
4400 if mobj:
4401 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4402 break
17322130 4403 sbr_tooltip = try_get(
4404 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4405 if sbr_tooltip:
4406 like_count, dislike_count = sbr_tooltip.split(' / ')
4407 info.update({
4408 'like_count': str_to_int(like_count),
4409 'dislike_count': str_to_int(dislike_count),
4410 })
867c66ff
M
4411 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4412 if vcr:
4413 vc = self._get_count(vcr, 'viewCount')
4414 # Upcoming premieres with waiting count are treated as live here
4415 if vcr.get('isLive'):
4416 info['concurrent_view_count'] = vc
4417 elif info.get('view_count') is None:
4418 info['view_count'] = vc
4419
17322130 4420 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4421 if vsir:
4422 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4423 info.update({
4424 'channel': self._get_text(vor, 'title'),
4425 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4426
7666b936 4427 if not channel_handle:
4428 channel_handle = self.handle_from_url(
4429 traverse_obj(vor, (
4430 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4431 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4432 {str}), get_all=False))
4433
17322130 4434 rows = try_get(
4435 vsir,
4436 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4437 list) or []
4438 multiple_songs = False
4439 for row in rows:
4440 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4441 multiple_songs = True
4442 break
4443 for row in rows:
4444 mrr = row.get('metadataRowRenderer') or {}
4445 mrr_title = mrr.get('title')
4446 if not mrr_title:
4447 continue
4448 mrr_title = self._get_text(mrr, 'title')
4449 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4450 if mrr_title == 'License':
4451 info['license'] = mrr_contents_text
4452 elif not multiple_songs:
4453 if mrr_title == 'Album':
4454 info['album'] = mrr_contents_text
4455 elif mrr_title == 'Artist':
4456 info['artist'] = mrr_contents_text
4457 elif mrr_title == 'Song':
4458 info['track'] = mrr_contents_text
545cc85d 4459
7666b936 4460 info.update({
4461 'uploader': info.get('channel'),
4462 'uploader_id': channel_handle,
4463 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4464 })
17322130 4465 # The upload date for scheduled, live and past live streams / premieres in microformats
4466 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4467 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4468 upload_date = (
4469 unified_strdate(get_first(microformats, 'uploadDate'))
4470 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4471 if not upload_date or (
4d37720a 4472 live_status in ('not_live', None)
1ff88b7a 4473 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4474 ):
c26f9b99 4475 upload_date = strftime_or_none(
4476 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4477 info['upload_date'] = upload_date
992f9a73 4478
545cc85d 4479 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4480 v = info.get(s_k)
4481 if v:
4482 info[d_k] = v
b84071c0 4483
c26f9b99 4484 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4485
4486 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4487 or get_first(video_details, 'isPrivate', expected_type=bool))
4488
4489 info['availability'] = (
4490 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4491 else self._availability(
4492 is_private=is_private,
4493 needs_premium=(
4494 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4495 or False if initial_data and is_private is not None else None),
4496 needs_subscription=(
4497 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4498 or False if initial_data and is_private is not None else None),
4499 needs_auth=info['age_limit'] >= 18,
4500 is_unlisted=None if is_private is None else (
4501 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4502 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4503
a2160aa4 4504 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4505
11f9be09 4506 self.mark_watched(video_id, player_responses)
d77ab8e2 4507
545cc85d 4508 return info
c5e8d7af 4509
a61fd4cf 4510
a6213a49 4511class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4512 @staticmethod
4513 def passthrough_smuggled_data(func):
bd7e919a 4514 def _smuggle(info, smuggled_data):
4515 if info.get('_type') not in ('url', 'url_transparent'):
4516 return info
4517 if smuggled_data.get('is_music_url'):
4518 parsed_url = urllib.parse.urlparse(info['url'])
4519 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4520 smuggled_data.pop('is_music_url')
4521 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4522 if smuggled_data:
4523 info['url'] = smuggle_url(info['url'], smuggled_data)
4524 return info
182bda88 4525
4526 @functools.wraps(func)
4527 def wrapper(self, url):
4528 url, smuggled_data = unsmuggle_url(url, {})
4529 if self.is_music_url(url):
4530 smuggled_data['is_music_url'] = True
4531 info_dict = func(self, url, smuggled_data)
bd7e919a 4532 if smuggled_data:
4533 _smuggle(info_dict, smuggled_data)
4534 if info_dict.get('entries'):
a8c754cc 4535 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4536 return info_dict
4537 return wrapper
4538
8bdd16b4 4539 @staticmethod
cd7c66cf 4540 def _extract_basic_item_renderer(item):
4541 # Modified from _extract_grid_item_renderer
201c1459 4542 known_basic_renderers = (
a17526e4 4543 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4544 )
4545 for key, renderer in item.items():
201c1459 4546 if not isinstance(renderer, dict):
cd7c66cf 4547 continue
201c1459 4548 elif key in known_basic_renderers:
4549 return renderer
4550 elif key.startswith('grid') and key.endswith('Renderer'):
4551 return renderer
8bdd16b4 4552
c7335551 4553 def _extract_channel_renderer(self, renderer):
7666b936 4554 channel_id = self.ucid_or_none(renderer['channelId'])
c7335551 4555 title = self._get_text(renderer, 'title')
7666b936 4556 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4557 # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
4558 # However we can expect them to change that in the future.
4559 channel_handle = self.handle_from_url(
4560 traverse_obj(renderer, (
4561 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4562 ('browseEndpoint', 'canonicalBaseUrl')),
4563 {str}), get_all=False))
c7335551
M
4564 return {
4565 '_type': 'url',
4566 'url': channel_url,
4567 'id': channel_id,
4568 'ie_key': YoutubeTabIE.ie_key(),
4569 'channel': title,
7666b936 4570 'uploader': title,
c7335551
M
4571 'channel_id': channel_id,
4572 'channel_url': channel_url,
4573 'title': title,
7666b936 4574 'uploader_id': channel_handle,
4575 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
c7335551
M
4576 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
4577 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4578 'playlist_count': self._get_count(renderer, 'videoCountText'),
4579 'description': self._get_text(renderer, 'descriptionSnippet'),
4580 }
4581
8bdd16b4 4582 def _grid_entries(self, grid_renderer):
4583 for item in grid_renderer['items']:
4584 if not isinstance(item, dict):
39b62db1 4585 continue
cd7c66cf 4586 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4587 if not isinstance(renderer, dict):
4588 continue
052e1350 4589 title = self._get_text(renderer, 'title')
fe93e2c4 4590
8bdd16b4 4591 # playlist
4592 playlist_id = renderer.get('playlistId')
4593 if playlist_id:
4594 yield self.url_result(
4595 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4596 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4597 video_title=title)
201c1459 4598 continue
8bdd16b4 4599 # video
4600 video_id = renderer.get('videoId')
4601 if video_id:
4602 yield self._extract_video(renderer)
201c1459 4603 continue
8bdd16b4 4604 # channel
4605 channel_id = renderer.get('channelId')
4606 if channel_id:
c7335551 4607 yield self._extract_channel_renderer(renderer)
201c1459 4608 continue
4609 # generic endpoint URL support
4610 ep_url = urljoin('https://www.youtube.com/', try_get(
4611 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4612 str))
201c1459 4613 if ep_url:
4614 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4615 if ie.suitable(ep_url):
4616 yield self.url_result(
4617 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4618 break
8bdd16b4 4619
16aa9ea4 4620 def _music_reponsive_list_entry(self, renderer):
4621 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4622 if video_id:
69a40e4a 4623 title = traverse_obj(renderer, (
4624 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4625 'text', 'runs', 0, 'text'))
16aa9ea4 4626 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
69a40e4a 4627 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
16aa9ea4 4628 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4629 if playlist_id:
4630 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4631 if video_id:
4632 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4633 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4634 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4635 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4636 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4637 if browse_id:
4638 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4639 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4640
3d3dddc9 4641 def _shelf_entries_from_content(self, shelf_renderer):
4642 content = shelf_renderer.get('content')
4643 if not isinstance(content, dict):
8bdd16b4 4644 return
cd7c66cf 4645 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4646 if renderer:
4647 # TODO: add support for nested playlists so each shelf is processed
4648 # as separate playlist
4649 # TODO: this includes only first N items
86e5f3ed 4650 yield from self._grid_entries(renderer)
3d3dddc9 4651 renderer = content.get('horizontalListRenderer')
4652 if renderer:
4653 # TODO
4654 pass
8bdd16b4 4655
29f7c58a 4656 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4657 ep = try_get(
4658 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4659 str)
8bdd16b4 4660 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4661 if shelf_url:
29f7c58a 4662 # Skipping links to another channels, note that checking for
4663 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4664 # will not work
4665 if skip_channels and '/channels?' in shelf_url:
4666 return
052e1350 4667 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4668 yield self.url_result(shelf_url, video_title=title)
4669 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4670 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4671
8bdd16b4 4672 def _playlist_entries(self, video_list_renderer):
4673 for content in video_list_renderer['contents']:
4674 if not isinstance(content, dict):
4675 continue
4676 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4677 if not isinstance(renderer, dict):
4678 continue
4679 video_id = renderer.get('videoId')
4680 if not video_id:
4681 continue
4682 yield self._extract_video(renderer)
07aeced6 4683
3462ffa8 4684 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4685 renderer = traverse_obj(
447afb9e 4686 rich_grid_renderer,
4687 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
3462ffa8 4688 video_id = renderer.get('videoId')
447afb9e 4689 if video_id:
4690 yield self._extract_video(renderer)
4691 return
4692 playlist_id = renderer.get('playlistId')
4693 if playlist_id:
4694 yield self.url_result(
4695 f'https://www.youtube.com/playlist?list={playlist_id}',
4696 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4697 video_title=self._get_text(renderer, 'title'))
3462ffa8 4698 return
3462ffa8 4699
8bdd16b4 4700 def _video_entry(self, video_renderer):
4701 video_id = video_renderer.get('videoId')
4702 if video_id:
4703 return self._extract_video(video_renderer)
dacb3a86 4704
ad210f4f 4705 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4706 url = urljoin('https://youtube.com', traverse_obj(
4707 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4708 if url:
4709 return self.url_result(
4710 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4711
8bdd16b4 4712 def _post_thread_entries(self, post_thread_renderer):
4713 post_renderer = try_get(
4714 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4715 if not post_renderer:
4716 return
4717 # video attachment
4718 video_renderer = try_get(
895b0931 4719 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4720 video_id = video_renderer.get('videoId')
4721 if video_id:
4722 entry = self._extract_video(video_renderer)
8bdd16b4 4723 if entry:
4724 yield entry
895b0931 4725 # playlist attachment
4726 playlist_id = try_get(
14f25df2 4727 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4728 if playlist_id:
4729 yield self.url_result(
e28f1c0a 4730 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4731 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4732 # inline video links
4733 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4734 for run in runs:
4735 if not isinstance(run, dict):
4736 continue
4737 ep_url = try_get(
14f25df2 4738 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4739 if not ep_url:
4740 continue
4741 if not YoutubeIE.suitable(ep_url):
4742 continue
4743 ep_video_id = YoutubeIE._match_id(ep_url)
4744 if video_id == ep_video_id:
4745 continue
895b0931 4746 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4747
8bdd16b4 4748 def _post_thread_continuation_entries(self, post_thread_continuation):
4749 contents = post_thread_continuation.get('contents')
4750 if not isinstance(contents, list):
4751 return
4752 for content in contents:
4753 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4754 if isinstance(renderer, dict):
4755 yield from self._post_thread_entries(renderer)
8bdd16b4 4756 continue
6b0b0a28 4757 renderer = content.get('videoRenderer')
4758 if isinstance(renderer, dict):
4759 yield self._video_entry(renderer)
07aeced6 4760
39ed931e 4761 r''' # unused
4762 def _rich_grid_entries(self, contents):
4763 for content in contents:
4764 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4765 if video_renderer:
4766 entry = self._video_entry(video_renderer)
4767 if entry:
4768 yield entry
4769 '''
52efa4b3 4770
0a5095fe 4771 def _report_history_entries(self, renderer):
4772 for url in traverse_obj(renderer, (
7a32c70d 4773 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4774 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4775 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4776 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4777
a6213a49 4778 def _extract_entries(self, parent_renderer, continuation_list):
4779 # continuation_list is modified in-place with continuation_list = [continuation_token]
4780 continuation_list[:] = [None]
4781 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4782 for content in contents:
4783 if not isinstance(content, dict):
4784 continue
16aa9ea4 4785 is_renderer = traverse_obj(
4786 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4787 expected_type=dict)
a6213a49 4788 if not is_renderer:
0a5095fe 4789 if content.get('richItemRenderer'):
4790 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4791 yield entry
4792 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4793 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4794 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4795 yield from self._report_history_entries(table)
4796 continuation_list[0] = self._extract_continuation(table)
a6213a49 4797 continue
0a5095fe 4798
a6213a49 4799 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4800 for isr_content in isr_contents:
4801 if not isinstance(isr_content, dict):
8bdd16b4 4802 continue
69184e41 4803
a6213a49 4804 known_renderers = {
4805 'playlistVideoListRenderer': self._playlist_entries,
4806 'gridRenderer': self._grid_entries,
a17526e4 4807 'reelShelfRenderer': self._grid_entries,
4808 'shelfRenderer': self._shelf_entries,
16aa9ea4 4809 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4810 'backstagePostThreadRenderer': self._post_thread_entries,
4811 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4812 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4813 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4814 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4815 }
4816 for key, renderer in isr_content.items():
4817 if key not in known_renderers:
4818 continue
4819 for entry in known_renderers[key](renderer):
4820 if entry:
4821 yield entry
4822 continuation_list[0] = self._extract_continuation(renderer)
4823 break
70d5c17b 4824
4825 if not continuation_list[0]:
a6213a49 4826 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4827
a6213a49 4828 if not continuation_list[0]:
4829 continuation_list[0] = self._extract_continuation(parent_renderer)
4830
4831 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4832 continuation_list = [None]
4833 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4834 tab_content = try_get(tab, lambda x: x['content'], dict)
4835 if not tab_content:
4836 return
3462ffa8 4837 parent_renderer = (
29f7c58a 4838 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4839 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4840 yield from extract_entries(parent_renderer)
3462ffa8 4841 continuation = continuation_list[0]
d069eca7 4842
8bdd16b4 4843 for page_num in itertools.count(1):
4844 if not continuation:
4845 break
99e9e001 4846 headers = self.generate_api_headers(
4847 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4848 response = self._extract_response(
86e5f3ed 4849 item_id=f'{item_id} page {page_num}',
fe93e2c4 4850 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4851 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4852
4853 if not response:
8bdd16b4 4854 break
ac56cf38 4855 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4856 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4857 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4858
a1b535bd 4859 known_renderers = {
e4b98809 4860 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4861 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4862 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4863 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4864 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4865 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4866 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4867 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4868 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4869 'playlistVideoListContinuation': (self._playlist_entries, None),
4870 'gridContinuation': (self._grid_entries, None),
4871 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4872 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4873 }
1fb53b94 4874
4875 continuation_items = traverse_obj(response, (
4876 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4877 'appendContinuationItemsAction', 'continuationItems'
4878 ), 'continuationContents', get_all=False)
4879 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4880
a1b535bd 4881 video_items_renderer = None
1fb53b94 4882 for key in continuation_item.keys():
a1b535bd 4883 if key not in known_renderers:
8bdd16b4 4884 continue
1fb53b94 4885 func, parent_key = known_renderers[key]
4886 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4887 continuation_list = [None]
1fb53b94 4888 yield from func(video_items_renderer)
9ba5705a 4889 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4890
4891 if not video_items_renderer:
a1b535bd 4892 break
9558dcec 4893
8bdd16b4 4894 @staticmethod
7c219ea6 4895 def _extract_selected_tab(tabs, fatal=True):
86973308
M
4896 for tab_renderer in tabs:
4897 if tab_renderer.get('selected'):
4898 return tab_renderer
4899 if fatal:
4900 raise ExtractorError('Unable to find selected tab')
4901
4902 @staticmethod
4903 def _extract_tab_renderers(response):
4904 return traverse_obj(
4905 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 4906
ac56cf38 4907 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 4908 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 4909
8bdd16b4 4910 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 4911 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4912 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4913
4914 return self.playlist_result(
4915 self._entries(
4916 selected_tab, metadata['id'], ytcfg,
4917 self._extract_account_syncid(ytcfg, data),
4918 self._extract_visitor_data(data, ytcfg)),
4919 **metadata)
39ed931e 4920
bd7e919a 4921 def _extract_metadata_from_tabs(self, item_id, data):
4922 info = {'id': item_id}
4923
4924 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 4925 if metadata_renderer:
7666b936 4926 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
4823ec9f 4927 ('channelUrl', {self.ucid_from_url}))
bd7e919a 4928 info.update({
7666b936 4929 'channel': metadata_renderer.get('title'),
4930 'channel_id': channel_id,
bd7e919a 4931 })
7666b936 4932 if info['channel_id']:
4933 info['id'] = info['channel_id']
bd7e919a 4934 else:
4935 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 4936
301d07fc 4937 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4938 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4939 def _get_uncropped(url):
4940 return url_or_none((url or '').split('=')[0] + '=s0')
4941
6141346d 4942 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 4943 if avatar_thumbnails:
4944 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4945 if uncropped_avatar:
4946 avatar_thumbnails.append({
4947 'url': uncropped_avatar,
4948 'id': 'avatar_uncropped',
4949 'preference': 1
4950 })
4951
4952 channel_banners = self._extract_thumbnails(
bd7e919a 4953 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 4954 for banner in channel_banners:
4955 banner['preference'] = -10
4956
4957 if channel_banners:
4958 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4959 if uncropped_banner:
4960 channel_banners.append({
4961 'url': uncropped_banner,
4962 'id': 'banner_uncropped',
4963 'preference': -5
4964 })
4965
bd7e919a 4966 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4967 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4968 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4969
301d07fc 4970 primary_thumbnails = self._extract_thumbnails(
a17526e4 4971 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
4972 playlist_thumbnails = self._extract_thumbnails(
4973 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4974
bd7e919a 4975 info.update({
4976 'title': (traverse_obj(metadata_renderer, 'title')
4977 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4978 or info['id']),
4979 'availability': self._extract_availability(data),
4980 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4981 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4982 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4983 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4984 })
f0d785d3 4985
7666b936 4986 channel_handle = (
4987 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
4988 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
4989
4990 if channel_handle:
4991 info.update({
4992 'uploader_id': channel_handle,
4993 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4994 })
6141346d
M
4995 # Playlist stats is a text runs array containing [video count, view count, last updated].
4996 # last updated or (view count and last updated) may be missing.
4997 playlist_stats = get_first(
bd7e919a 4998 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4999
6141346d
M
5000 last_updated_unix = self._parse_time_text(
5001 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5002 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
bd7e919a 5003 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
6141346d 5004
bd7e919a 5005 info['view_count'] = self._get_count(playlist_stats, 1)
5006 if info['view_count'] is None: # 0 is allowed
5007 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
31e18355 5008 if info['view_count'] is None:
5009 info['view_count'] = self._get_count(data, (
5010 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5011 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
bd7e919a 5012
5013 info['playlist_count'] = self._get_count(playlist_stats, 0)
5014 if info['playlist_count'] is None: # 0 is allowed
5015 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5016
7666b936 5017 if not info.get('channel_id'):
6141346d 5018 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 5019 if not owner: # Deprecated
6141346d
M
5020 owner = traverse_obj(
5021 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5022 ('videoOwner', 'videoOwnerRenderer', 'title'))
5023 owner_text = self._get_text(owner)
5024 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 5025 info.update({
7666b936 5026 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5027 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5028 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
bd7e919a 5029 })
6141346d 5030
bd7e919a 5031 info.update({
7666b936 5032 'uploader': info['channel'],
5033 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5034 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
bd7e919a 5035 })
7666b936 5036
bd7e919a 5037 return info
73c4ac2c 5038
6e634cbe 5039 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 5040 first_id = last_id = response = None
2be71994 5041 for page_num in itertools.count(1):
cd7c66cf 5042 videos = list(self._playlist_entries(playlist))
5043 if not videos:
5044 return
2be71994 5045 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5046 if start >= len(videos):
5047 return
24146491 5048 yield from videos[start:]
2be71994 5049 first_id = first_id or videos[0]['id']
5050 last_id = videos[-1]['id']
79360d99 5051 watch_endpoint = try_get(
5052 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 5053 headers = self.generate_api_headers(
5054 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5055 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 5056 query = {
5057 'playlistId': playlist_id,
5058 'videoId': watch_endpoint.get('videoId') or last_id,
5059 'index': watch_endpoint.get('index') or len(videos),
5060 'params': watch_endpoint.get('params') or 'OAE%3D'
5061 }
5062 response = self._extract_response(
5063 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 5064 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 5065 check_get_keys='contents'
5066 )
cd7c66cf 5067 playlist = try_get(
79360d99 5068 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 5069
ac56cf38 5070 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 5071 title = playlist.get('title') or try_get(
14f25df2 5072 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 5073 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 5074
5075 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 5076 playlist_url = urljoin(url, try_get(
5077 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 5078 str))
6e634cbe 5079
5080 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5081 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5082 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5083
5084 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 5085 return self.url_result(
5086 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5087 video_title=title)
cd7c66cf 5088
8bdd16b4 5089 return self.playlist_result(
6e634cbe 5090 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 5091 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 5092
47193e02 5093 def _extract_availability(self, data):
5094 """
5095 Gets the availability of a given playlist/tab.
5096 Note: Unless YouTube tells us explicitly, we do not assume it is public
5097 @param data: response
5098 """
6141346d
M
5099 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5100 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5101 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 5102
6141346d 5103 badges = self._extract_badges(sidebar_renderer)
47193e02 5104
5105 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
5106 privacy_setting_icon = get_first(
5107 (playlist_header_renderer, sidebar_renderer),
5108 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5109 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5110 expected_type=str)
5111
5112 microformats_is_unlisted = traverse_obj(
5113 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 5114
c26f9b99 5115 return (
5116 'public' if (
5117 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5118 or player_header_privacy == 'PUBLIC'
5119 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5120 else self._availability(
5121 is_private=(
5122 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5123 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5124 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5125 is_unlisted=(
5126 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5127 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
5128 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5129 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 5130 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5131 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5132 needs_auth=False))
47193e02 5133
5134 @staticmethod
5135 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5136 sidebar_renderer = try_get(
5137 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5138 for item in sidebar_renderer:
5139 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5140 if renderer:
5141 return renderer
5142
ac56cf38 5143 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 5144 """
6141346d 5145 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5146 """
6141346d
M
5147 is_playlist = bool(traverse_obj(
5148 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5149 if not is_playlist:
47193e02 5150 return
11f9be09 5151 headers = self.generate_api_headers(
99e9e001 5152 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5153 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5154 query = {
6141346d
M
5155 'params': 'wgYCCAA=',
5156 'browseId': f'VL{item_id}'
47193e02 5157 }
5158 return self._extract_response(
5159 item_id=item_id, headers=headers, query=query,
fe93e2c4 5160 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5161 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5162
2762dbb1 5163 @functools.cached_property
a25bca9f 5164 def skip_webpage(self):
5165 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5166
ac56cf38 5167 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5168 webpage, data = None, None
5169 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5170 try:
be5c1ae8 5171 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5172 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5173 except ExtractorError as e:
5174 if isinstance(e.cause, network_exceptions):
14f25df2 5175 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 5176 retry.error = e
5177 continue
5178 self._error_or_warning(e, fatal=fatal)
14fdfea9 5179 break
ac56cf38 5180
be5c1ae8 5181 try:
5182 self._extract_and_report_alerts(data)
5183 except ExtractorError as e:
5184 self._error_or_warning(e, fatal=fatal)
5185 break
ac56cf38 5186
be5c1ae8 5187 # Sometimes youtube returns a webpage with incomplete ytInitialData
5188 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5189 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5190 retry.error = ExtractorError('Incomplete yt initial data received')
5191 continue
ac56cf38 5192
cd7c66cf 5193 return webpage, data
5194
a25bca9f 5195 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5196 """Use if failed to extract ytcfg (and data) from initial webpage"""
5197 if not ytcfg and self.is_authenticated:
5198 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5199 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5200 raise ExtractorError(
5201 f'{msg}. If you are not downloading private content, or '
5202 'your cookies are only for the first account and channel,'
5203 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5204 expected=True)
5205 self.report_warning(msg, only_once=True)
5206
ac56cf38 5207 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5208 data = None
a25bca9f 5209 if not self.skip_webpage:
ac56cf38 5210 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5211 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5212 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5213 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5214 if (url != 'https://www.youtube.com/feed/recommended'
5215 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5216 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5217 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5218 if fatal:
5219 raise ExtractorError(msg, expected=True)
5220 self.report_warning(msg, only_once=True)
ac56cf38 5221 if not data:
a25bca9f 5222 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5223 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5224 return data, ytcfg
5225
5226 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5227 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5228 resolve_response = self._extract_response(
5229 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5230 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5231 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5232 for ep_key, ep in endpoints.items():
5233 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5234 if params:
5235 return self._extract_response(
5236 item_id=item_id, query=params, ep=ep, headers=headers,
5237 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5238 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5239 err_note = 'Failed to resolve url (does the playlist exist?)'
5240 if fatal:
5241 raise ExtractorError(err_note, expected=True)
5242 self.report_warning(err_note, item_id)
5243
a6213a49 5244 _SEARCH_PARAMS = None
5245
af5c1c55 5246 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5247 data = {'query': query}
5248 if params is NO_DEFAULT:
5249 params = self._SEARCH_PARAMS
5250 if params:
5251 data['params'] = params
16aa9ea4 5252
5253 content_keys = (
5254 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5255 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5256 # ytmusic search
5257 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5258 ('continuationContents', ),
5259 )
a25bca9f 5260 display_id = f'query "{query}"'
86e5f3ed 5261 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5262 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5263 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5264
a61fd4cf 5265 continuation_list = [None]
a25bca9f 5266 search = None
a6213a49 5267 for page_num in itertools.count(1):
a61fd4cf 5268 data.update(continuation_list[0] or {})
a25bca9f 5269 headers = self.generate_api_headers(
5270 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5271 search = self._extract_response(
a25bca9f 5272 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5273 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5274 slr_contents = traverse_obj(search, *content_keys)
5275 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5276 if not continuation_list[0]:
a6213a49 5277 break
5278
5279
5280class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5281 IE_DESC = 'YouTube Tabs'
5282 _VALID_URL = r'''(?x:
5283 https?://
b032ff0f 5284 (?!consent\.)(?:\w+\.)?
a6213a49 5285 (?:
5286 youtube(?:kids)?\.com|
5287 %(invidious)s
5288 )/
5289 (?:
5290 (?P<channel_type>channel|c|user|browse)/|
5291 (?P<not_channel>
5292 feed/|hashtag/|
5293 (?:playlist|watch)\?.*?\blist=
5294 )|
5295 (?!(?:%(reserved_names)s)\b) # Direct URLs
5296 )
5297 (?P<id>[^/?\#&]+)
5298 )''' % {
5299 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5300 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5301 }
5302 IE_NAME = 'youtube:tab'
5303
5304 _TESTS = [{
5305 'note': 'playlists, multipage',
5306 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5307 'playlist_mincount': 94,
5308 'info_dict': {
5309 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5310 'title': 'Igor Kleiner - Playlists',
a6213a49 5311 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5312 'uploader': 'Igor Kleiner',
7666b936 5313 'uploader_id': '@IgorDataScience',
5314 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
976ae3ea 5315 'channel': 'Igor Kleiner',
5316 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5317 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5318 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5319 'channel_follower_count': int
a6213a49 5320 },
5321 }, {
5322 'note': 'playlists, multipage, different order',
5323 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5324 'playlist_mincount': 94,
5325 'info_dict': {
5326 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5327 'title': 'Igor Kleiner - Playlists',
a6213a49 5328 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5329 'uploader': 'Igor Kleiner',
7666b936 5330 'uploader_id': '@IgorDataScience',
5331 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
976ae3ea 5332 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5333 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5334 'channel': 'Igor Kleiner',
5335 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5336 'channel_follower_count': int
a6213a49 5337 },
5338 }, {
5339 'note': 'playlists, series',
5340 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5341 'playlist_mincount': 5,
5342 'info_dict': {
5343 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5344 'title': '3Blue1Brown - Playlists',
5345 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
976ae3ea 5346 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5347 'channel': '3Blue1Brown',
5348 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5349 'uploader_id': '@3blue1brown',
5350 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5351 'uploader': '3Blue1Brown',
976ae3ea 5352 'tags': ['Mathematics'],
6c73052c 5353 'channel_follower_count': int
a6213a49 5354 },
5355 }, {
5356 'note': 'playlists, singlepage',
5357 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5358 'playlist_mincount': 4,
5359 'info_dict': {
5360 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5361 'title': 'ThirstForScience - Playlists',
5362 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5363 'uploader': 'ThirstForScience',
7666b936 5364 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5365 'uploader_id': '@ThirstForScience',
976ae3ea 5366 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
7666b936 5367 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5368 'tags': 'count:13',
5369 'channel': 'ThirstForScience',
6c73052c 5370 'channel_follower_count': int
a6213a49 5371 }
5372 }, {
5373 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5374 'only_matching': True,
5375 }, {
5376 'note': 'basic, single video playlist',
5377 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5378 'info_dict': {
a6213a49 5379 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5380 'title': 'youtube-dl public playlist',
976ae3ea 5381 'description': '',
5382 'tags': [],
5383 'view_count': int,
5384 'modified_date': '20201130',
5385 'channel': 'Sergey M.',
5386 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
976ae3ea 5387 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5388 'availability': 'public',
7666b936 5389 'uploader': 'Sergey M.',
5390 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5391 'uploader_id': '@sergeym.6173',
a6213a49 5392 },
5393 'playlist_count': 1,
5394 }, {
5395 'note': 'empty playlist',
5396 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5397 'info_dict': {
a6213a49 5398 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5399 'title': 'youtube-dl empty playlist',
976ae3ea 5400 'tags': [],
5401 'channel': 'Sergey M.',
5402 'description': '',
5403 'modified_date': '20160902',
5404 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5405 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5406 'availability': 'public',
7666b936 5407 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5408 'uploader_id': '@sergeym.6173',
5409 'uploader': 'Sergey M.',
a6213a49 5410 },
5411 'playlist_count': 0,
5412 }, {
5413 'note': 'Home tab',
5414 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5415 'info_dict': {
5416 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5417 'title': 'lex will - Home',
5418 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5419 'uploader': 'lex will',
7666b936 5420 'uploader_id': '@lexwill718',
976ae3ea 5421 'channel': 'lex will',
5422 'tags': ['bible', 'history', 'prophesy'],
7666b936 5423 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5424 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5425 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5426 'channel_follower_count': int
a6213a49 5427 },
5428 'playlist_mincount': 2,
5429 }, {
5430 'note': 'Videos tab',
5431 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5432 'info_dict': {
5433 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5434 'title': 'lex will - Videos',
5435 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5436 'uploader': 'lex will',
7666b936 5437 'uploader_id': '@lexwill718',
976ae3ea 5438 'tags': ['bible', 'history', 'prophesy'],
5439 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5440 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5441 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5442 'channel': 'lex will',
6c73052c 5443 'channel_follower_count': int
a6213a49 5444 },
5445 'playlist_mincount': 975,
5446 }, {
5447 'note': 'Videos tab, sorted by popular',
5448 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5449 'info_dict': {
5450 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5451 'title': 'lex will - Videos',
5452 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5453 'uploader': 'lex will',
7666b936 5454 'uploader_id': '@lexwill718',
976ae3ea 5455 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5456 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5457 'channel': 'lex will',
5458 'tags': ['bible', 'history', 'prophesy'],
5459 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5460 'channel_follower_count': int
a6213a49 5461 },
5462 'playlist_mincount': 199,
5463 }, {
5464 'note': 'Playlists tab',
5465 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5466 'info_dict': {
5467 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5468 'title': 'lex will - Playlists',
5469 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5470 'uploader': 'lex will',
7666b936 5471 'uploader_id': '@lexwill718',
5472 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5473 'channel': 'lex will',
5474 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5475 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5476 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5477 'channel_follower_count': int
a6213a49 5478 },
5479 'playlist_mincount': 17,
5480 }, {
5481 'note': 'Community tab',
5482 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5483 'info_dict': {
5484 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5485 'title': 'lex will - Community',
5486 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5487 'channel': 'lex will',
5488 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5489 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5490 'tags': ['bible', 'history', 'prophesy'],
7666b936 5491 'channel_follower_count': int,
5492 'uploader_url': 'https://www.youtube.com/@lexwill718',
5493 'uploader_id': '@lexwill718',
5494 'uploader': 'lex will',
a6213a49 5495 },
5496 'playlist_mincount': 18,
5497 }, {
5498 'note': 'Channels tab',
5499 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5500 'info_dict': {
5501 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5502 'title': 'lex will - Channels',
5503 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5504 'channel': 'lex will',
5505 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5506 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5507 'tags': ['bible', 'history', 'prophesy'],
7666b936 5508 'channel_follower_count': int,
5509 'uploader_url': 'https://www.youtube.com/@lexwill718',
5510 'uploader_id': '@lexwill718',
5511 'uploader': 'lex will',
a6213a49 5512 },
5513 'playlist_mincount': 12,
5514 }, {
5515 'note': 'Search tab',
5516 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5517 'playlist_mincount': 40,
5518 'info_dict': {
5519 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5520 'title': '3Blue1Brown - Search - linear algebra',
5521 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
976ae3ea 5522 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5523 'tags': ['Mathematics'],
5524 'channel': '3Blue1Brown',
5525 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5526 'channel_follower_count': int,
5527 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5528 'uploader_id': '@3blue1brown',
5529 'uploader': '3Blue1Brown',
a6213a49 5530 },
5531 }, {
5532 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5533 'only_matching': True,
5534 }, {
5535 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5536 'only_matching': True,
5537 }, {
5538 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5539 'only_matching': True,
5540 }, {
5541 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5542 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5543 'info_dict': {
5544 'title': '29C3: Not my department',
5545 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
a6213a49 5546 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5547 'tags': [],
976ae3ea 5548 'view_count': int,
5549 'modified_date': '20150605',
5550 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
7666b936 5551 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
976ae3ea 5552 'channel': 'Christiaan008',
c26f9b99 5553 'availability': 'public',
7666b936 5554 'uploader_id': '@ChRiStIaAn008',
5555 'uploader': 'Christiaan008',
5556 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
a6213a49 5557 },
5558 'playlist_count': 96,
5559 }, {
5560 'note': 'Large playlist',
5561 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5562 'info_dict': {
5563 'title': 'Uploads from Cauchemar',
5564 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
7666b936 5565 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5566 'tags': [],
5567 'modified_date': r're:\d{8}',
5568 'channel': 'Cauchemar',
976ae3ea 5569 'view_count': int,
5570 'description': '',
5571 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5572 'availability': 'public',
7666b936 5573 'uploader_id': '@Cauchemar89',
5574 'uploader': 'Cauchemar',
5575 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
a6213a49 5576 },
5577 'playlist_mincount': 1123,
976ae3ea 5578 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5579 }, {
5580 'note': 'even larger playlist, 8832 videos',
5581 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5582 'only_matching': True,
5583 }, {
5584 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5585 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5586 'info_dict': {
5587 'title': 'Uploads from Interstellar Movie',
5588 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5589 'tags': [],
5590 'view_count': int,
5591 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
7666b936 5592 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5593 'channel': 'Interstellar Movie',
5594 'description': '',
5595 'modified_date': r're:\d{8}',
c26f9b99 5596 'availability': 'public',
7666b936 5597 'uploader_id': '@InterstellarMovie',
5598 'uploader': 'Interstellar Movie',
5599 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
a6213a49 5600 },
5601 'playlist_mincount': 21,
5602 }, {
5603 'note': 'Playlist with "show unavailable videos" button',
5604 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5605 'info_dict': {
5606 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5607 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5608 'view_count': int,
5609 'channel': 'Phim Siêu Nhân Nhật Bản',
5610 'tags': [],
976ae3ea 5611 'description': '',
5612 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5613 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5614 'modified_date': r're:\d{8}',
c26f9b99 5615 'availability': 'public',
7666b936 5616 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5617 'uploader_id': '@phimsieunhannhatban',
5618 'uploader': 'Phim Siêu Nhân Nhật Bản',
a6213a49 5619 },
5620 'playlist_mincount': 200,
976ae3ea 5621 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5622 }, {
5623 'note': 'Playlist with unavailable videos in page 7',
5624 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5625 'info_dict': {
5626 'title': 'Uploads from BlankTV',
5627 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5628 'channel': 'BlankTV',
7666b936 5629 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5630 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5631 'view_count': int,
5632 'tags': [],
976ae3ea 5633 'modified_date': r're:\d{8}',
5634 'description': '',
c26f9b99 5635 'availability': 'public',
7666b936 5636 'uploader_id': '@blanktv',
5637 'uploader': 'BlankTV',
5638 'uploader_url': 'https://www.youtube.com/@blanktv',
a6213a49 5639 },
5640 'playlist_mincount': 1000,
976ae3ea 5641 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5642 }, {
5643 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5644 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5645 'info_dict': {
5646 'title': 'Data Analysis with Dr Mike Pound',
5647 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
a6213a49 5648 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5649 'tags': [],
5650 'view_count': int,
5651 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
7666b936 5652 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
976ae3ea 5653 'channel': 'Computerphile',
c26f9b99 5654 'availability': 'public',
6141346d 5655 'modified_date': '20190712',
7666b936 5656 'uploader_id': '@Computerphile',
5657 'uploader': 'Computerphile',
5658 'uploader_url': 'https://www.youtube.com/@Computerphile',
a6213a49 5659 },
5660 'playlist_mincount': 11,
5661 }, {
5662 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5663 'only_matching': True,
5664 }, {
5665 'note': 'Playlist URL that does not actually serve a playlist',
5666 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5667 'info_dict': {
5668 'id': 'FqZTN594JQw',
5669 'ext': 'webm',
5670 'title': "Smiley's People 01 detective, Adventure Series, Action",
a6213a49 5671 'upload_date': '20150526',
5672 'license': 'Standard YouTube License',
5673 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5674 'categories': ['People & Blogs'],
5675 'tags': list,
5676 'view_count': int,
5677 'like_count': int,
a6213a49 5678 },
5679 'params': {
5680 'skip_download': True,
5681 },
5682 'skip': 'This video is not available.',
5683 'add_ie': [YoutubeIE.ie_key()],
5684 }, {
5685 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5686 'only_matching': True,
5687 }, {
5688 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5689 'only_matching': True,
5690 }, {
5691 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5692 'info_dict': {
7666b936 5693 'id': 'AlTsmyW4auo', # This will keep changing
a6213a49 5694 'ext': 'mp4',
976ae3ea 5695 'title': str,
a6213a49 5696 'upload_date': r're:\d{8}',
976ae3ea 5697 'description': str,
a6213a49 5698 'categories': ['News & Politics'],
5699 'tags': list,
5700 'like_count': int,
86973308 5701 'release_timestamp': int,
976ae3ea 5702 'channel': 'Sky News',
5703 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5704 'age_limit': 0,
5705 'view_count': int,
86973308 5706 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5707 'playable_in_embed': True,
86973308 5708 'release_date': r're:\d+',
976ae3ea 5709 'availability': 'public',
5710 'live_status': 'is_live',
5711 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5712 'channel_follower_count': int,
5713 'concurrent_view_count': int,
7666b936 5714 'uploader_url': 'https://www.youtube.com/@SkyNews',
5715 'uploader_id': '@SkyNews',
5716 'uploader': 'Sky News',
a6213a49 5717 },
5718 'params': {
5719 'skip_download': True,
5720 },
976ae3ea 5721 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5722 }, {
5723 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5724 'info_dict': {
5725 'id': 'a48o2S1cPoo',
5726 'ext': 'mp4',
5727 'title': 'The Young Turks - Live Main Show',
a6213a49 5728 'upload_date': '20150715',
5729 'license': 'Standard YouTube License',
5730 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5731 'categories': ['News & Politics'],
5732 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5733 'like_count': int,
a6213a49 5734 },
5735 'params': {
5736 'skip_download': True,
5737 },
5738 'only_matching': True,
5739 }, {
5740 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5741 'only_matching': True,
5742 }, {
5743 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5744 'only_matching': True,
5745 }, {
5746 'note': 'A channel that is not live. Should raise error',
5747 'url': 'https://www.youtube.com/user/numberphile/live',
5748 'only_matching': True,
5749 }, {
5750 'url': 'https://www.youtube.com/feed/trending',
5751 'only_matching': True,
5752 }, {
5753 'url': 'https://www.youtube.com/feed/library',
5754 'only_matching': True,
5755 }, {
5756 'url': 'https://www.youtube.com/feed/history',
5757 'only_matching': True,
5758 }, {
5759 'url': 'https://www.youtube.com/feed/subscriptions',
5760 'only_matching': True,
5761 }, {
5762 'url': 'https://www.youtube.com/feed/watch_later',
5763 'only_matching': True,
5764 }, {
5765 'note': 'Recommended - redirects to home page.',
5766 'url': 'https://www.youtube.com/feed/recommended',
5767 'only_matching': True,
5768 }, {
5769 'note': 'inline playlist with not always working continuations',
5770 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5771 'only_matching': True,
5772 }, {
5773 'url': 'https://www.youtube.com/course',
5774 'only_matching': True,
5775 }, {
5776 'url': 'https://www.youtube.com/zsecurity',
5777 'only_matching': True,
5778 }, {
5779 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5780 'only_matching': True,
5781 }, {
5782 'url': 'https://www.youtube.com/TheYoungTurks/live',
5783 'only_matching': True,
5784 }, {
5785 'url': 'https://www.youtube.com/hashtag/cctv9',
5786 'info_dict': {
5787 'id': 'cctv9',
5788 'title': '#cctv9',
976ae3ea 5789 'tags': [],
a6213a49 5790 },
4dc23a80 5791 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5792 }, {
5793 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5794 'only_matching': True,
5795 }, {
5796 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5797 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5798 'only_matching': True
5799 }, {
5800 'note': '/browse/ should redirect to /channel/',
5801 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5802 'only_matching': True
5803 }, {
5804 'note': 'VLPL, should redirect to playlist?list=PL...',
5805 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5806 'info_dict': {
5807 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
a6213a49 5808 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
12a1b225 5809 'title': 'NCS : All Releases 💿',
7666b936 5810 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
976ae3ea 5811 'modified_date': r're:\d{8}',
5812 'view_count': int,
5813 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5814 'tags': [],
5815 'channel': 'NoCopyrightSounds',
c26f9b99 5816 'availability': 'public',
7666b936 5817 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
5818 'uploader': 'NoCopyrightSounds',
5819 'uploader_id': '@NoCopyrightSounds',
a6213a49 5820 },
5821 'playlist_mincount': 166,
7666b936 5822 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
a6213a49 5823 }, {
7666b936 5824 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
a6213a49 5825 'note': 'Topic, should redirect to playlist?list=UU...',
5826 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5827 'info_dict': {
5828 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5829 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 5830 'tags': [],
5831 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5832 'channel': 'Royalty Free Music - Topic',
5833 'view_count': int,
5834 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
976ae3ea 5835 'modified_date': r're:\d{8}',
976ae3ea 5836 'description': '',
c26f9b99 5837 'availability': 'public',
7666b936 5838 'uploader': 'Royalty Free Music - Topic',
a6213a49 5839 },
a6213a49 5840 'playlist_mincount': 101,
7666b936 5841 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5842 }, {
86973308
M
5843 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5844 # Treat as a general feed
a6213a49 5845 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5846 'info_dict': {
5847 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5848 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5849 'tags': [],
a6213a49 5850 },
a6213a49 5851 'playlist_mincount': 9,
5852 }, {
5853 'note': 'Youtube music Album',
5854 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5855 'info_dict': {
5856 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5857 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5858 'tags': [],
5859 'view_count': int,
5860 'description': '',
5861 'availability': 'unlisted',
5862 'modified_date': r're:\d{8}',
a6213a49 5863 },
5864 'playlist_count': 50,
7666b936 5865 'expected_warnings': ['YouTube Music is not directly supported'],
a6213a49 5866 }, {
5867 'note': 'unlisted single video playlist',
5868 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5869 'info_dict': {
a6213a49 5870 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5871 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5872 'availability': 'unlisted',
5873 'tags': [],
12a1b225 5874 'modified_date': '20220418',
976ae3ea 5875 'channel': 'colethedj',
5876 'view_count': int,
5877 'description': '',
976ae3ea 5878 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5879 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
7666b936 5880 'uploader_url': 'https://www.youtube.com/@colethedj1894',
5881 'uploader_id': '@colethedj1894',
5882 'uploader': 'colethedj',
a6213a49 5883 },
93e12ed7 5884 'playlist': [{
5885 'info_dict': {
5886 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
5887 'id': 'BaW_jenozKc',
5888 '_type': 'url',
5889 'ie_key': 'Youtube',
5890 'duration': 10,
5891 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
5892 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
5893 'view_count': int,
5894 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
5895 'channel': 'Philipp Hagemeister',
5896 'uploader_id': '@PhilippHagemeister',
5897 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
5898 'uploader': 'Philipp Hagemeister',
5899 }
5900 }],
a6213a49 5901 'playlist_count': 1,
93e12ed7 5902 'params': {'extract_flat': True},
a6213a49 5903 }, {
5904 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5905 'url': 'https://www.youtube.com/feed/recommended',
5906 'info_dict': {
5907 'id': 'recommended',
5908 'title': 'recommended',
6c73052c 5909 'tags': [],
a6213a49 5910 },
5911 'playlist_mincount': 50,
5912 'params': {
5913 'skip_download': True,
5914 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5915 },
5916 }, {
5917 'note': 'API Fallback: /videos tab, sorted by oldest first',
5918 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5919 'info_dict': {
5920 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5921 'title': 'Cody\'sLab - Videos',
5922 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
976ae3ea 5923 'channel': 'Cody\'sLab',
5924 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5925 'tags': [],
5926 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5927 'channel_follower_count': int
a6213a49 5928 },
5929 'playlist_mincount': 650,
5930 'params': {
5931 'skip_download': True,
5932 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5933 },
86973308 5934 'skip': 'Query for sorting no longer works',
a6213a49 5935 }, {
5936 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5937 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5938 'info_dict': {
5939 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5940 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 5941 'modified_date': r're:\d{8}',
5942 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5943 'description': '',
5944 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5945 'tags': [],
5946 'channel': 'Royalty Free Music - Topic',
5947 'view_count': int,
c26f9b99 5948 'availability': 'public',
7666b936 5949 'uploader': 'Royalty Free Music - Topic',
a6213a49 5950 },
a6213a49 5951 'playlist_mincount': 101,
5952 'params': {
5953 'skip_download': True,
5954 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5955 },
7666b936 5956 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
7c219ea6 5957 }, {
5958 'note': 'non-standard redirect to regional channel',
5959 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5960 'only_matching': True
61d3665d 5961 }, {
5962 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5963 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5964 'info_dict': {
5965 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5966 'modified_date': '20220407',
5967 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5968 'tags': [],
61d3665d 5969 'availability': 'unlisted',
5970 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5971 'channel': 'pukkandan',
5972 'description': 'Test for collaborative playlist',
5973 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5974 'view_count': int,
7666b936 5975 'uploader_url': 'https://www.youtube.com/@pukkandan',
5976 'uploader_id': '@pukkandan',
5977 'uploader': 'pukkandan',
61d3665d 5978 },
5979 'playlist_mincount': 2
c26f9b99 5980 }, {
5981 'note': 'translated tab name',
5982 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5983 'info_dict': {
5984 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5985 'tags': [],
c26f9b99 5986 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 5987 'description': 'test description',
c26f9b99 5988 'title': 'cole-dlp-test-acc - 再生リスト',
c26f9b99 5989 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5990 'channel': 'cole-dlp-test-acc',
7666b936 5991 'uploader_url': 'https://www.youtube.com/@coletdjnz',
5992 'uploader_id': '@coletdjnz',
5993 'uploader': 'cole-dlp-test-acc',
c26f9b99 5994 },
5995 'playlist_mincount': 1,
5996 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5997 'expected_warnings': ['Preferring "ja"'],
5998 }, {
5999 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6000 'note': 'preferred lang set with playlist with translated video titles',
6001 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6002 'info_dict': {
6003 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6004 'tags': [],
6005 'view_count': int,
6006 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
c26f9b99 6007 'channel': 'cole-dlp-test-acc',
6008 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6009 'description': 'test',
c26f9b99 6010 'title': 'dlp test playlist',
6011 'availability': 'public',
7666b936 6012 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6013 'uploader_id': '@coletdjnz',
6014 'uploader': 'cole-dlp-test-acc',
c26f9b99 6015 },
6016 'playlist_mincount': 1,
6017 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6018 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 6019 }, {
6020 # shorts audio pivot for 2GtVksBMYFM.
6021 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6022 'info_dict': {
6023 'id': 'sfv_audio_pivot',
6024 'title': 'sfv_audio_pivot',
6025 'tags': [],
6026 },
6027 'playlist_mincount': 50,
6028
86973308
M
6029 }, {
6030 # Channel with a real live tab (not to be mistaken with streams tab)
6031 # Do not treat like it should redirect to live stream
6032 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6033 'info_dict': {
6034 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6035 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6036 'tags': [],
6037 },
6038 'playlist_mincount': 20,
6039 }, {
6040 # Tab name is not the same as tab id
6041 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6042 'info_dict': {
6043 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6044 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6045 'tags': [],
6046 },
6047 'playlist_mincount': 8,
6048 }, {
6049 # Home tab id is literally home. Not to get mistaken with featured
6050 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6051 'info_dict': {
6052 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6053 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6054 'tags': [],
6055 },
6056 'playlist_mincount': 8,
6057 }, {
6058 # Should get three playlists for videos, shorts and streams tabs
6059 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6060 'info_dict': {
6061 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 6062 'title': 'Polka Ch. 尾丸ポルカ',
6063 'channel_follower_count': int,
6064 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6065 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
7666b936 6066 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
bd7e919a 6067 'channel': 'Polka Ch. 尾丸ポルカ',
6068 'tags': 'count:35',
7666b936 6069 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6070 'uploader': 'Polka Ch. 尾丸ポルカ',
6071 'uploader_id': '@OmaruPolka',
86973308
M
6072 },
6073 'playlist_count': 3,
6074 }, {
6075 # Shorts tab with channel with handle
7666b936 6076 # TODO: fix channel description
86973308
M
6077 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6078 'info_dict': {
6079 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6080 'title': 'Not Just Bikes - Shorts',
6081 'tags': 'count:12',
86973308 6082 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
7666b936 6083 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
86973308 6084 'channel_follower_count': int,
86973308 6085 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
86973308 6086 'channel': 'Not Just Bikes',
7666b936 6087 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6088 'uploader': 'Not Just Bikes',
6089 'uploader_id': '@NotJustBikes',
86973308
M
6090 },
6091 'playlist_mincount': 10,
6092 }, {
6093 # Streams tab
6094 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6095 'info_dict': {
6096 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6097 'title': '中村悠一 - Live',
6098 'tags': 'count:7',
6099 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6100 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
86973308 6101 'channel': '中村悠一',
86973308 6102 'channel_follower_count': int,
86973308 6103 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
7666b936 6104 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6105 'uploader_id': '@Yuichi-Nakamura',
6106 'uploader': '中村悠一',
86973308
M
6107 },
6108 'playlist_mincount': 60,
6109 }, {
6110 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6111 # See test_youtube_lists
6112 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6113 'only_matching': True,
6114 }, {
6115 # No uploads and no UCID given. Should fail with no uploads error
6116 # See test_youtube_lists
6117 'url': 'https://www.youtube.com/news',
6118 'only_matching': True
6119 }, {
6120 # No videos tab but has a shorts tab
6121 'url': 'https://www.youtube.com/c/TKFShorts',
6122 'info_dict': {
6123 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6124 'title': 'Shorts Break - Shorts',
7666b936 6125 'tags': 'count:48',
86973308
M
6126 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6127 'channel': 'Shorts Break',
7666b936 6128 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
86973308 6129 'channel_follower_count': int,
86973308 6130 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
7666b936 6131 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6132 'uploader': 'Shorts Break',
6133 'uploader_id': '@ShortsBreak_Official',
86973308
M
6134 },
6135 'playlist_mincount': 30,
6136 }, {
6137 # Trending Now Tab. tab id is empty
6138 'url': 'https://www.youtube.com/feed/trending',
6139 'info_dict': {
6140 'id': 'trending',
6141 'title': 'trending - Now',
6142 'tags': [],
6143 },
6144 'playlist_mincount': 30,
6145 }, {
6146 # Trending Gaming Tab. tab id is empty
6147 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6148 'info_dict': {
6149 'id': 'trending',
6150 'title': 'trending - Gaming',
6151 'tags': [],
6152 },
6153 'playlist_mincount': 30,
4dc23a80
M
6154 }, {
6155 # Shorts url result in shorts tab
7666b936 6156 # TODO: Fix channel id extraction
4dc23a80
M
6157 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6158 'info_dict': {
6159 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6160 'title': 'cole-dlp-test-acc - Shorts',
4dc23a80 6161 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6162 'description': 'test description',
6163 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6164 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6165 'tags': [],
7666b936 6166 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6167 'uploader_id': '@coletdjnz',
4dc23a80 6168 'uploader': 'cole-dlp-test-acc',
4dc23a80
M
6169 },
6170 'playlist': [{
6171 'info_dict': {
7666b936 6172 # Channel data is not currently available for short renderers (as of 2023-03-01)
4dc23a80
M
6173 '_type': 'url',
6174 'ie_key': 'Youtube',
6175 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6176 'id': 'sSM9J5YH_60',
4dc23a80 6177 'title': 'SHORT short',
4dc23a80
M
6178 'view_count': int,
6179 'thumbnails': list,
6180 }
6181 }],
6182 'params': {'extract_flat': True},
6183 }, {
6184 # Live video status should be extracted
6185 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6186 'info_dict': {
6187 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6188 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6189 'tags': []
6190 },
6191 'playlist': [{
6192 'info_dict': {
6193 '_type': 'url',
6194 'ie_key': 'Youtube',
6195 'url': 'startswith:https://www.youtube.com/watch?v=',
6196 'id': str,
6197 'title': str,
6198 'live_status': 'is_live',
6199 'channel_id': str,
6200 'channel_url': str,
6201 'concurrent_view_count': int,
6202 'channel': str,
93e12ed7 6203 'uploader': str,
6204 'uploader_url': str,
6205 'uploader_id': str
4dc23a80
M
6206 }
6207 }],
c7335551 6208 'params': {'extract_flat': True, 'playlist_items': '1'},
4dc23a80 6209 'playlist_mincount': 1
c7335551
M
6210 }, {
6211 # Channel renderer metadata. Contains number of videos on the channel
6212 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6213 'info_dict': {
6214 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6215 'title': 'cole-dlp-test-acc - Channels',
c7335551
M
6216 'channel': 'cole-dlp-test-acc',
6217 'description': 'test description',
6218 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6219 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6220 'tags': [],
7666b936 6221 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6222 'uploader_id': '@coletdjnz',
c7335551 6223 'uploader': 'cole-dlp-test-acc',
c7335551
M
6224 },
6225 'playlist': [{
6226 'info_dict': {
6227 '_type': 'url',
6228 'ie_key': 'YoutubeTab',
6229 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6230 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6231 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6232 'title': 'PewDiePie',
6233 'channel': 'PewDiePie',
6234 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6235 'thumbnails': list,
6236 'channel_follower_count': int,
7666b936 6237 'playlist_count': int,
6238 'uploader': 'PewDiePie',
6239 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6240 'uploader_id': '@PewDiePie',
c7335551
M
6241 }
6242 }],
6243 'params': {'extract_flat': True},
31e18355 6244 }, {
6245 'url': 'https://www.youtube.com/@3blue1brown/about',
6246 'info_dict': {
6247 'id': 'UCYO_jab_esuFRV4b17AJtAw',
6248 'tags': ['Mathematics'],
6249 'title': '3Blue1Brown - About',
31e18355 6250 'channel_follower_count': int,
6251 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
31e18355 6252 'channel': '3Blue1Brown',
31e18355 6253 'view_count': int,
6254 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6255 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
7666b936 6256 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6257 'uploader_id': '@3blue1brown',
6258 'uploader': '3Blue1Brown',
31e18355 6259 },
6260 'playlist_count': 0,
447afb9e 6261 }, {
6262 # Podcasts tab, with rich entry playlistRenderers
6263 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6264 'info_dict': {
6265 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6266 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6267 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6268 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6269 'title': '99 Percent Invisible - Podcasts',
6270 'uploader': '99 Percent Invisible',
6271 'channel_follower_count': int,
6272 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6273 'tags': [],
6274 'channel': '99 Percent Invisible',
6275 'uploader_id': '@99percentinvisiblepodcast',
6276 },
6277 'playlist_count': 1,
6278 }, {
6279 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6280 'url': 'https://www.youtube.com/@AHimitsu/releases',
6281 'info_dict': {
6282 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6283 'channel': 'A Himitsu',
6284 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6285 'title': 'A Himitsu - Releases',
6286 'uploader_id': '@AHimitsu',
6287 'uploader': 'A Himitsu',
6288 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6289 'tags': 'count:16',
6290 'description': 'I make music',
6291 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6292 'channel_follower_count': int,
6293 },
6294 'playlist_mincount': 10,
a6213a49 6295 }]
6296
6297 @classmethod
6298 def suitable(cls, url):
86e5f3ed 6299 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6300
86973308
M
6301 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6302
6303 def _get_url_mobj(self, url):
6304 mobj = self._URL_RE.match(url).groupdict()
6305 mobj.update((k, '') for k, v in mobj.items() if v is None)
6306 return mobj
6307
6308 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6309 tab_name = (tab.get('title') or '').lower()
6310 tab_url = urljoin(base_url, traverse_obj(
6311 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6312
bd7e919a 6313 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6314 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6315 if tab_id:
bd7e919a 6316 return {
6317 'TAB_ID_SPONSORSHIPS': 'membership',
6318 }.get(tab_id, tab_id), tab_name
86973308
M
6319
6320 # Fallback to tab name if we cannot get the tab id.
6321 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6322 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6323 if tab_name:
6324 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6325 return {
6326 'home': 'featured',
6327 'live': 'streams',
6328 }.get(tab_name, tab_name), tab_name
6329
6330 def _has_tab(self, tabs, tab_id):
6331 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6332
182bda88 6333 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6334 def _real_extract(self, url, smuggled_data):
cd7c66cf 6335 item_id = self._match_id(url)
14f25df2 6336 url = urllib.parse.urlunparse(
6337 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6338 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6339
86973308
M
6340 mobj = self._get_url_mobj(url)
6341 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6342 if is_channel and smuggled_data.get('is_music_url'):
6343 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6344 return self.url_result(
6345 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6346 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6347 mdata = self._extract_tab_endpoint(
6348 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6349 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6350 get_all=False, expected_type=str)
6351 if not murl:
6352 raise ExtractorError('Failed to resolve album to playlist')
6353 return self.url_result(murl, YoutubeTabIE)
6354 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6355 return self.url_result(
6356 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6357
6358 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6359 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6360 url = f'{pre}/videos{post}'
5b28cef7 6361 if smuggled_data.get('is_music_url'):
6362 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
cd7c66cf 6363
6364 # Handle both video/playlist URLs
201c1459 6365 qs = parse_qs(url)
bd7e919a 6366 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 6367 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6368 if not playlist_id:
fe03a6cd 6369 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6370 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6371 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6372 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6373 return self.url_result(
6374 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6375
86973308
M
6376 if not self._yes_playlist(playlist_id, video_id):
6377 return self.url_result(
6378 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6379
bd7e919a 6380 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6381
7c219ea6 6382 # YouTube may provide a non-standard redirect to the regional channel
6383 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6384 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6385 redirect_url = traverse_obj(
6386 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6387 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6388 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6389 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6390 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6391
bd7e919a 6392 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6393 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6394 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6395 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6396 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6397
6398 if not original_tab_id and selected_tab_name:
bd7e919a 6399 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6400 'To download only the videos in a specific tab, pass the tab\'s URL')
6401 if self._has_tab(tabs, 'streams'):
bd7e919a 6402 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6403 if self._has_tab(tabs, 'shorts'):
bd7e919a 6404 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6405 # XXX: Members-only tab should also be extracted
6406
bd7e919a 6407 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6408 # Channel does not have streams, shorts or videos tabs
6409 if item_id[:2] != 'UC':
6410 raise ExtractorError('This channel has no uploads', expected=True)
6411
6412 # Topic channels don't have /videos. Use the equivalent playlist instead
6413 pl_id = f'UU{item_id[2:]}'
6414 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6415 try:
6416 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6417 except ExtractorError:
6418 raise ExtractorError('This channel has no uploads', expected=True)
64f36541 6419 else:
86973308
M
6420 item_id, url = pl_id, pl_url
6421 self.to_screen(
6422 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6423
bd7e919a 6424 elif extra_tabs and selected_tab_id != 'videos':
86973308 6425 # When there are shorts/live tabs but not videos tab
bd7e919a 6426 url, data = f'{pre}{post}', None
86973308
M
6427
6428 elif (original_tab_id or 'videos') != selected_tab_id:
6429 if original_tab_id == 'live':
6430 # Live tab should have redirected to the video
6431 # Except in the case the channel has an actual live tab
6432 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6433 raise UserNotLive(video_id=item_id)
86973308
M
6434 elif selected_tab_name:
6435 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6436
6437 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6438 url = f'{pre}{post}'
18db7548 6439
358de58c 6440 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6441 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6442 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6443 self._extract_and_report_alerts(data, only_once=True)
86973308 6444
bd7e919a 6445 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6446 if tabs:
bd7e919a 6447 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6448 entries[0].update({
86973308
M
6449 'extractor_key': YoutubeTabIE.ie_key(),
6450 'extractor': YoutubeTabIE.IE_NAME,
6451 'webpage_url': url,
6452 })
bd7e919a 6453 if self.get_param('playlist_items') == '0':
6454 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6455 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6456 entries.extend(map(self._real_extract, extra_tabs))
6457
6458 if len(entries) == 1:
6459 return entries[0]
6460 elif entries:
6461 metadata = self._extract_metadata_from_tabs(item_id, data)
6462 uploads_url = 'the Uploads (UU) playlist URL'
6463 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6464 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6465 self.to_screen(
6466 'Downloading as multiple playlists, separated by tabs. '
6467 f'To download as a single playlist instead, pass {uploads_url}')
6468 return self.playlist_result(entries, item_id, **metadata)
6469
6470 # Inline playlist
37e57a9f 6471 playlist = traverse_obj(
6472 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6473 if playlist:
ac56cf38 6474 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6475
37e57a9f 6476 video_id = traverse_obj(
6477 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6478 if video_id:
bd7e919a 6479 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6480 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6481 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6482
8bdd16b4 6483 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6484
c5e8d7af 6485
8bdd16b4 6486class YoutubePlaylistIE(InfoExtractor):
96565c7e 6487 IE_DESC = 'YouTube playlists'
8bdd16b4 6488 _VALID_URL = r'''(?x)(?:
6489 (?:https?://)?
6490 (?:\w+\.)?
6491 (?:
6492 (?:
6493 youtube(?:kids)?\.com|
d9190e44 6494 %(invidious)s
8bdd16b4 6495 )
6496 /.*?\?.*?\blist=
6497 )?
6498 (?P<id>%(playlist_id)s)
d9190e44
RH
6499 )''' % {
6500 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6501 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6502 }
8bdd16b4 6503 IE_NAME = 'youtube:playlist'
cdc628a4 6504 _TESTS = [{
8bdd16b4 6505 'note': 'issue #673',
6506 'url': 'PLBB231211A4F62143',
cdc628a4 6507 'info_dict': {
8bdd16b4 6508 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6509 'id': 'PLBB231211A4F62143',
976ae3ea 6510 'uploader': 'Wickman',
7666b936 6511 'uploader_id': '@WickmanVT',
11f9be09 6512 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6513 'view_count': int,
7666b936 6514 'uploader_url': 'https://www.youtube.com/@WickmanVT',
976ae3ea 6515 'modified_date': r're:\d{8}',
6516 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6517 'channel': 'Wickman',
6518 'tags': [],
7666b936 6519 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
86973308 6520 'availability': 'public',
8bdd16b4 6521 },
6522 'playlist_mincount': 29,
6523 }, {
6524 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6525 'info_dict': {
6526 'title': 'YDL_safe_search',
6527 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6528 },
6529 'playlist_count': 2,
6530 'skip': 'This playlist is private',
9558dcec 6531 }, {
8bdd16b4 6532 'note': 'embedded',
6533 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6534 'playlist_count': 4,
9558dcec 6535 'info_dict': {
8bdd16b4 6536 'title': 'JODA15',
6537 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6538 'uploader': 'milan',
7666b936 6539 'uploader_id': '@milan5503',
976ae3ea 6540 'description': '',
6541 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6542 'tags': [],
6543 'modified_date': '20140919',
6544 'view_count': int,
6545 'channel': 'milan',
6546 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7666b936 6547 'uploader_url': 'https://www.youtube.com/@milan5503',
c26f9b99 6548 'availability': 'public',
976ae3ea 6549 },
86973308 6550 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
cdc628a4 6551 }, {
8bdd16b4 6552 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6553 'playlist_mincount': 455,
8bdd16b4 6554 'info_dict': {
6555 'title': '2018 Chinese New Singles (11/6 updated)',
6556 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6557 'uploader': 'LBK',
7666b936 6558 'uploader_id': '@music_king',
11f9be09 6559 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6560 'channel': 'LBK',
6561 'view_count': int,
7666b936 6562 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
976ae3ea 6563 'tags': [],
7666b936 6564 'uploader_url': 'https://www.youtube.com/@music_king',
976ae3ea 6565 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6566 'modified_date': r're:\d{8}',
c26f9b99 6567 'availability': 'public',
976ae3ea 6568 },
6569 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6570 }, {
29f7c58a 6571 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6572 'only_matching': True,
6573 }, {
6574 # music album playlist
6575 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6576 'only_matching': True,
6577 }]
6578
6579 @classmethod
6580 def suitable(cls, url):
201c1459 6581 if YoutubeTabIE.suitable(url):
6582 return False
49a57e70 6583 from ..utils import parse_qs
201c1459 6584 qs = parse_qs(url)
6585 if qs.get('v', [None])[0]:
6586 return False
86e5f3ed 6587 return super().suitable(url)
29f7c58a 6588
6589 def _real_extract(self, url):
6590 playlist_id = self._match_id(url)
46953e7e 6591 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6592 url = update_url_query(
6593 'https://www.youtube.com/playlist',
6594 parse_qs(url) or {'list': playlist_id})
6595 if is_music_url:
6596 url = smuggle_url(url, {'is_music_url': True})
6597 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6598
6599
6600class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6601 IE_DESC = 'youtu.be'
29f7c58a 6602 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6603 _TESTS = [{
8bdd16b4 6604 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6605 'info_dict': {
6606 'id': 'yeWKywCrFtk',
6607 'ext': 'mp4',
6608 'title': 'Small Scale Baler and Braiding Rugs',
6609 'uploader': 'Backus-Page House Museum',
7666b936 6610 'uploader_id': '@backuspagemuseum',
6611 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
8bdd16b4 6612 'upload_date': '20161008',
6613 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6614 'categories': ['Nonprofits & Activism'],
6615 'tags': list,
6616 'like_count': int,
976ae3ea 6617 'age_limit': 0,
6618 'playable_in_embed': True,
7666b936 6619 'thumbnail': r're:^https?://.*\.webp',
976ae3ea 6620 'channel': 'Backus-Page House Museum',
6621 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6622 'live_status': 'not_live',
6623 'view_count': int,
6624 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6625 'availability': 'public',
6626 'duration': 59,
12a1b225
A
6627 'comment_count': int,
6628 'channel_follower_count': int
8bdd16b4 6629 },
6630 'params': {
6631 'noplaylist': True,
6632 'skip_download': True,
6633 },
39e7107d 6634 }, {
8bdd16b4 6635 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6636 'only_matching': True,
cdc628a4
PH
6637 }]
6638
8bdd16b4 6639 def _real_extract(self, url):
5ad28e7f 6640 mobj = self._match_valid_url(url)
29f7c58a 6641 video_id = mobj.group('id')
6642 playlist_id = mobj.group('playlist_id')
8bdd16b4 6643 return self.url_result(
29f7c58a 6644 update_url_query('https://www.youtube.com/watch', {
6645 'v': video_id,
6646 'list': playlist_id,
6647 'feature': 'youtu.be',
6648 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6649
6650
b6ce9bb0 6651class YoutubeLivestreamEmbedIE(InfoExtractor):
6652 IE_DESC = 'YouTube livestream embeds'
6653 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6654 _TESTS = [{
6655 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6656 'only_matching': True,
6657 }]
6658
6659 def _real_extract(self, url):
6660 channel_id = self._match_id(url)
6661 return self.url_result(
6662 f'https://www.youtube.com/channel/{channel_id}/live',
6663 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6664
6665
8bdd16b4 6666class YoutubeYtUserIE(InfoExtractor):
96565c7e 6667 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6668 IE_NAME = 'youtube:user'
8bdd16b4 6669 _VALID_URL = r'ytuser:(?P<id>.+)'
6670 _TESTS = [{
6671 'url': 'ytuser:phihag',
6672 'only_matching': True,
6673 }]
6674
6675 def _real_extract(self, url):
6676 user_id = self._match_id(url)
08270da5 6677 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6678
b05654f0 6679
3d3dddc9 6680class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6681 IE_NAME = 'youtube:favorites'
96565c7e 6682 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6683 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6684 _LOGIN_REQUIRED = True
6685 _TESTS = [{
6686 'url': ':ytfav',
6687 'only_matching': True,
6688 }, {
6689 'url': ':ytfavorites',
6690 'only_matching': True,
6691 }]
6692
6693 def _real_extract(self, url):
6694 return self.url_result(
6695 'https://www.youtube.com/playlist?list=LL',
6696 ie=YoutubeTabIE.ie_key())
6697
6698
ca5300c7 6699class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6700 IE_NAME = 'youtube:notif'
6701 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6702 _VALID_URL = r':ytnotif(?:ication)?s?'
6703 _LOGIN_REQUIRED = True
6704 _TESTS = [{
6705 'url': ':ytnotif',
6706 'only_matching': True,
6707 }, {
6708 'url': ':ytnotifications',
6709 'only_matching': True,
6710 }]
6711
6712 def _extract_notification_menu(self, response, continuation_list):
6713 notification_list = traverse_obj(
6714 response,
6715 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6716 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6717 expected_type=list) or []
6718 continuation_list[0] = None
6719 for item in notification_list:
6720 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6721 if entry:
6722 yield entry
6723 continuation = item.get('continuationItemRenderer')
6724 if continuation:
6725 continuation_list[0] = continuation
6726
6727 def _extract_notification_renderer(self, notification):
6728 video_id = traverse_obj(
6729 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6730 url = f'https://www.youtube.com/watch?v={video_id}'
6731 channel_id = None
6732 if not video_id:
6733 browse_ep = traverse_obj(
6734 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7666b936 6735 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
ca5300c7 6736 post_id = self._search_regex(
6737 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6738 'post id', default=None)
6739 if not channel_id or not post_id:
6740 return
6741 # The direct /post url redirects to this in the browser
6742 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6743
6744 channel = traverse_obj(
6745 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6746 expected_type=str)
c7a7baaa 6747 notification_title = self._get_text(notification, 'shortMessage')
6748 if notification_title:
6749 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6750 # TODO: handle recommended videos
ca5300c7 6751 title = self._search_regex(
c7a7baaa 6752 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6753 'video title', default=None)
5225df50 6754 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6755 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6756 else None)
ca5300c7 6757 return {
6758 '_type': 'url',
6759 'url': url,
6760 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6761 'video_id': video_id,
6762 'title': title,
6763 'channel_id': channel_id,
6764 'channel': channel,
7666b936 6765 'uploader': channel,
ca5300c7 6766 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6767 'timestamp': timestamp,
ca5300c7 6768 }
6769
6770 def _notification_menu_entries(self, ytcfg):
6771 continuation_list = [None]
6772 response = None
6773 for page in itertools.count(1):
6774 ctoken = traverse_obj(
6775 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6776 response = self._extract_response(
6777 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6778 ep='notification/get_notification_menu', check_get_keys='actions',
6779 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6780 yield from self._extract_notification_menu(response, continuation_list)
6781 if not continuation_list[0]:
6782 break
6783
6784 def _real_extract(self, url):
6785 display_id = 'notifications'
6786 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6787 self._report_playlist_authcheck(ytcfg)
6788 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6789
6790
a6213a49 6791class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6792 IE_DESC = 'YouTube search'
78caa52a 6793 IE_NAME = 'youtube:search'
b05654f0 6794 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6795 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6796 _TESTS = [{
6797 'url': 'ytsearch5:youtube-dl test video',
6798 'playlist_count': 5,
6799 'info_dict': {
6800 'id': 'youtube-dl test video',
6801 'title': 'youtube-dl test video',
6802 }
6803 }]
b05654f0 6804
a61fd4cf 6805
5f7cb91a 6806class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6807 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6808 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6809 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6810 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6811 _TESTS = [{
6812 'url': 'ytsearchdate5:youtube-dl test video',
6813 'playlist_count': 5,
6814 'info_dict': {
6815 'id': 'youtube-dl test video',
6816 'title': 'youtube-dl test video',
6817 }
6818 }]
75dff0ee 6819
c9ae7b95 6820
a6213a49 6821class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6822 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6823 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6824 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6825 _TESTS = [{
6826 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6827 'playlist_mincount': 5,
6828 'info_dict': {
11f9be09 6829 'id': 'youtube-dl test video',
3462ffa8 6830 'title': 'youtube-dl test video',
6831 }
a61fd4cf 6832 }, {
6833 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6834 'playlist_mincount': 5,
6835 'info_dict': {
6836 'id': 'python',
6837 'title': 'python',
6838 }
ad210f4f 6839 }, {
6840 'url': 'https://www.youtube.com/results?search_query=%23cats',
6841 'playlist_mincount': 1,
6842 'info_dict': {
6843 'id': '#cats',
6844 'title': '#cats',
12a1b225
A
6845 # The test suite does not have support for nested playlists
6846 # 'entries': [{
6847 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6848 # 'title': '#cats',
6849 # }],
ad210f4f 6850 },
c7335551
M
6851 }, {
6852 # Channel results
6853 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6854 'info_dict': {
6855 'id': 'kurzgesagt',
6856 'title': 'kurzgesagt',
6857 },
6858 'playlist': [{
6859 'info_dict': {
6860 '_type': 'url',
6861 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6862 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6863 'ie_key': 'YoutubeTab',
6864 'channel': 'Kurzgesagt – In a Nutshell',
6865 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6866 'title': 'Kurzgesagt – In a Nutshell',
6867 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6868 'playlist_count': int, # XXX: should have a way of saying > 1
6869 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7666b936 6870 'thumbnails': list,
6871 'uploader_id': '@kurzgesagt',
6872 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
6873 'uploader': 'Kurzgesagt – In a Nutshell',
c7335551
M
6874 }
6875 }],
6876 'params': {'extract_flat': True, 'playlist_items': '1'},
6877 'playlist_mincount': 1,
3462ffa8 6878 }, {
6879 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6880 'only_matching': True,
6881 }]
6882
6883 def _real_extract(self, url):
4dfbf869 6884 qs = parse_qs(url)
386e1dd9 6885 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6886 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6887
6888
16aa9ea4 6889class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6890 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6891 IE_NAME = 'youtube:music:search_url'
6892 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6893 _TESTS = [{
6894 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6895 'playlist_count': 16,
6896 'info_dict': {
6897 'id': 'royalty free music',
6898 'title': 'royalty free music',
6899 }
6900 }, {
6901 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6902 'playlist_mincount': 30,
6903 'info_dict': {
6904 'id': 'royalty free music - songs',
6905 'title': 'royalty free music - songs',
6906 },
6907 'params': {'extract_flat': 'in_playlist'}
6908 }, {
6909 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6910 'playlist_mincount': 30,
6911 'info_dict': {
6912 'id': 'royalty free music - community playlists',
6913 'title': 'royalty free music - community playlists',
6914 },
6915 'params': {'extract_flat': 'in_playlist'}
6916 }]
6917
6918 _SECTIONS = {
6919 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6920 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6921 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6922 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6923 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6924 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6925 }
6926
6927 def _real_extract(self, url):
6928 qs = parse_qs(url)
6929 query = (qs.get('search_query') or qs.get('q'))[0]
6930 params = qs.get('sp', (None,))[0]
6931 if params:
6932 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6933 else:
ac668111 6934 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6935 params = self._SECTIONS.get(section)
6936 if not params:
6937 section = None
6938 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6939 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6940
6941
182bda88 6942class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6943 """
25f14e9f 6944 Base class for feed extractors
82d02080 6945 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6946 """
b2e8bc1b 6947 _LOGIN_REQUIRED = True
82d02080 6948 _FEED_NAME = 'feeds'
a25bca9f 6949
6950 def _real_initialize(self):
6951 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6952
82d02080 6953 @classproperty
d7ae0639 6954 def IE_NAME(self):
82d02080 6955 return f'youtube:{self._FEED_NAME}'
04cc9617 6956
3853309f 6957 def _real_extract(self, url):
3d3dddc9 6958 return self.url_result(
182bda88 6959 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6960
6961
ef2f3c7f 6962class YoutubeWatchLaterIE(InfoExtractor):
6963 IE_NAME = 'youtube:watchlater'
96565c7e 6964 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6965 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6966 _TESTS = [{
8bdd16b4 6967 'url': ':ytwatchlater',
bc7a9cd8
S
6968 'only_matching': True,
6969 }]
25f14e9f
S
6970
6971 def _real_extract(self, url):
ef2f3c7f 6972 return self.url_result(
6973 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6974
6975
25f14e9f 6976class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6977 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6978 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6979 _FEED_NAME = 'recommended'
45db527f 6980 _LOGIN_REQUIRED = False
3d3dddc9 6981 _TESTS = [{
6982 'url': ':ytrec',
6983 'only_matching': True,
6984 }, {
6985 'url': ':ytrecommended',
6986 'only_matching': True,
6987 }, {
6988 'url': 'https://youtube.com',
6989 'only_matching': True,
6990 }]
1ed5b5c9 6991
1ed5b5c9 6992
25f14e9f 6993class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6994 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6995 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6996 _FEED_NAME = 'subscriptions'
3d3dddc9 6997 _TESTS = [{
6998 'url': ':ytsubs',
6999 'only_matching': True,
7000 }, {
7001 'url': ':ytsubscriptions',
7002 'only_matching': True,
7003 }]
1ed5b5c9 7004
1ed5b5c9 7005
25f14e9f 7006class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 7007 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 7008 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 7009 _FEED_NAME = 'history'
3d3dddc9 7010 _TESTS = [{
7011 'url': ':ythistory',
7012 'only_matching': True,
7013 }]
1ed5b5c9
JMF
7014
7015
6e634cbe 7016class YoutubeStoriesIE(InfoExtractor):
7017 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
7018 IE_NAME = 'youtube:stories'
7019 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
7020 _TESTS = [{
7021 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
7022 'only_matching': True,
7023 }]
7024
7025 def _real_extract(self, url):
7026 playlist_id = f'RLTD{self._match_id(url)}'
7027 return self.url_result(
50ac0e54 7028 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 7029 ie=YoutubeTabIE, video_id=playlist_id)
7030
7031
80eb0bd9 7032class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 7033 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 7034 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 7035 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 7036 _TESTS = [{
1dd18a88 7037 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 7038 'only_matching': True,
7039 }]
7040
7041 @staticmethod
7042 def _generate_audio_pivot_params(video_id):
7043 """
7044 Generates sfv_audio_pivot browse params for this video id
7045 """
7046 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7047 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7048
7049 def _real_extract(self, url):
7050 video_id = self._match_id(url)
7051 return self.url_result(
7052 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7053 ie=YoutubeTabIE)
7054
7055
15870e90
PH
7056class YoutubeTruncatedURLIE(InfoExtractor):
7057 IE_NAME = 'youtube:truncated_url'
7058 IE_DESC = False # Do not list
975d35db 7059 _VALID_URL = r'''(?x)
b95aab84
PH
7060 (?:https?://)?
7061 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7062 (?:watch\?(?:
c4808c60 7063 feature=[a-z_]+|
b95aab84
PH
7064 annotation_id=annotation_[^&]+|
7065 x-yt-cl=[0-9]+|
c1708b89 7066 hl=[^&]*|
287be8c6 7067 t=[0-9]+
b95aab84
PH
7068 )?
7069 |
7070 attribution_link\?a=[^&]+
7071 )
7072 $
975d35db 7073 '''
15870e90 7074
c4808c60 7075 _TESTS = [{
2d3d2997 7076 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 7077 'only_matching': True,
dc2fc736 7078 }, {
2d3d2997 7079 'url': 'https://www.youtube.com/watch?',
dc2fc736 7080 'only_matching': True,
b95aab84
PH
7081 }, {
7082 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7083 'only_matching': True,
7084 }, {
7085 'url': 'https://www.youtube.com/watch?feature=foo',
7086 'only_matching': True,
c1708b89
PH
7087 }, {
7088 'url': 'https://www.youtube.com/watch?hl=en-GB',
7089 'only_matching': True,
287be8c6
PH
7090 }, {
7091 'url': 'https://www.youtube.com/watch?t=2372',
7092 'only_matching': True,
c4808c60
PH
7093 }]
7094
15870e90
PH
7095 def _real_extract(self, url):
7096 raise ExtractorError(
78caa52a
PH
7097 'Did you forget to quote the URL? Remember that & is a meta '
7098 'character in most shells, so you want to put the URL in quotes, '
3867038a 7099 'like youtube-dl '
2d3d2997 7100 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 7101 ' or simply youtube-dl BaW_jenozKc .',
15870e90 7102 expected=True)
772fd5cc
PH
7103
7104
471d0367 7105class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 7106 IE_NAME = 'youtube:clip'
471d0367 7107 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7108 _TESTS = [{
7109 # FIXME: Other metadata should be extracted from the clip, not from the base video
7110 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7111 'info_dict': {
7112 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7113 'ext': 'mp4',
7114 'section_start': 29.0,
7115 'section_end': 39.7,
7116 'duration': 10.7,
12a1b225
A
7117 'age_limit': 0,
7118 'availability': 'public',
7119 'categories': ['Gaming'],
7120 'channel': 'Scott The Woz',
7121 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7122 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7123 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7124 'like_count': int,
7125 'playable_in_embed': True,
7126 'tags': 'count:17',
7127 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7128 'title': 'Mobile Games on Console - Scott The Woz',
7129 'upload_date': '20210920',
7130 'uploader': 'Scott The Woz',
7666b936 7131 'uploader_id': '@ScottTheWoz',
7132 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
12a1b225
A
7133 'view_count': int,
7134 'live_status': 'not_live',
7666b936 7135 'channel_follower_count': int,
7136 'chapters': 'count:20',
471d0367 7137 }
7138 }]
3cd786db 7139
7140 def _real_extract(self, url):
471d0367 7141 clip_id = self._match_id(url)
7142 _, data = self._extract_webpage(url, clip_id)
7143
7144 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7145 if not video_id:
7146 raise ExtractorError('Unable to find video ID')
7147
7148 clip_data = traverse_obj(data, (
7149 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7150 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7151 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7152 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7153
7154 return {
7155 '_type': 'url_transparent',
7156 'url': f'https://www.youtube.com/watch?v={video_id}',
7157 'ie_key': YoutubeIE.ie_key(),
7158 'id': clip_id,
7159 'section_start': int(clip_data['startTimeMs']) / 1000,
7160 'section_end': int(clip_data['endTimeMs']) / 1000,
7161 }
3cd786db 7162
7163
b032ff0f 7164class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7165 IE_NAME = 'youtube:consent'
7166 IE_DESC = False # Do not list
7167 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7168 _TESTS = [{
7169 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7170 'info_dict': {
7171 'id': 'qVv6vCqciTM',
7172 'ext': 'mp4',
7173 'age_limit': 0,
7666b936 7174 'uploader_id': '@sana_natori',
b032ff0f 7175 'comment_count': int,
7176 'chapters': 'count:13',
7177 'upload_date': '20221223',
7178 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7179 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7666b936 7180 'uploader_url': 'https://www.youtube.com/@sana_natori',
b032ff0f 7181 'like_count': int,
7182 'release_date': '20221223',
7183 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7184 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7185 'view_count': int,
7186 'playable_in_embed': True,
7187 'duration': 4438,
7188 'availability': 'public',
7189 'channel_follower_count': int,
7190 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7191 'categories': ['Entertainment'],
7192 'live_status': 'was_live',
7193 'release_timestamp': 1671793345,
7194 'channel': 'さなちゃんねる',
7195 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7196 'uploader': 'さなちゃんねる',
7197 },
7198 'add_ie': ['Youtube'],
7199 'params': {'skip_download': 'Youtube'},
7200 }]
7201
7202 def _real_extract(self, url):
7203 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7204 if not redirect_url:
7205 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7206 return self.url_result(redirect_url)
7207
7208
772fd5cc
PH
7209class YoutubeTruncatedIDIE(InfoExtractor):
7210 IE_NAME = 'youtube:truncated_id'
7211 IE_DESC = False # Do not list
b95aab84 7212 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
7213
7214 _TESTS = [{
7215 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7216 'only_matching': True,
7217 }]
7218
7219 def _real_extract(self, url):
7220 video_id = self._match_id(url)
7221 raise ExtractorError(
86e5f3ed 7222 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7223 expected=True)