]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
fe93e2c4 5import datetime
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
46383212 14import sys
f8271158 15import threading
8a784c74 16import time
e0df6211 17import traceback
ac668111 18import urllib.parse
c5e8d7af 19
b05654f0 20from .common import InfoExtractor, SearchInfoExtractor
25836db6 21from .openload import PhantomJSwrapper
14f25df2 22from ..compat import functools
545cc85d 23from ..jsinterp import JSInterpreter
3d2623a8 24from ..networking.exceptions import HTTPError, network_exceptions
4bb4a188 25from ..utils import (
f8271158 26 NO_DEFAULT,
27 ExtractorError,
4d37720a 28 LazyList,
693f0600 29 UserNotLive,
720c3099 30 bug_reports_message,
82d02080 31 classproperty,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
7a32c70d 35 filter_dict,
2d30521a 36 float_or_none,
11f9be09 37 format_field,
ff91cf74 38 get_first,
dd27fd17 39 int_or_none,
641ad5d8 40 is_html,
34921b43 41 join_nonempty,
48416bc4 42 js_to_json,
94278f72 43 mimetype2ext,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
4dfbf869 49 parse_qs,
dca3ff4a 50 qualities,
3995d37d 51 remove_start,
cf7e015f 52 smuggle_url,
dbdaaa23 53 str_or_none,
c93d53f5 54 str_to_int,
f3aa3c3f 55 strftime_or_none,
7c365c21 56 traverse_obj,
556dbe7f 57 try_get,
c5e8d7af
PH
58 unescapeHTML,
59 unified_strdate,
f0d785d3 60 unified_timestamp,
cf7e015f 61 unsmuggle_url,
8bdd16b4 62 update_url_query,
21c340b8 63 url_or_none,
fe93e2c4 64 urljoin,
7c365c21 65 variadic,
c5e8d7af
PH
66)
67
c795c39f 68STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
962ffcf8 69# any clients starting with _ cannot be explicitly requested by the user
000c15a4 70INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
a0c830f4 76 'clientVersion': '2.20220801.00.00',
000c15a4 77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 86 'clientVersion': '1.20220731.00.00',
000c15a4 87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
a0c830f4 97 'clientVersion': '1.20220727.01.00',
000c15a4 98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
e7e94f2a 102 'web_creator': {
18c7683d 103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
a0c830f4 107 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
000c15a4 112 'android': {
18c7683d 113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
50ac0e54 117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 123 'REQUIRE_JS_PLAYER': False
000c15a4 124 },
125 'android_embedded': {
18c7683d 126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
a0c830f4 143 'clientVersion': '5.16.51',
50ac0e54 144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 149 'REQUIRE_JS_PLAYER': False
000c15a4 150 },
e7e94f2a 151 'android_creator': {
18c7683d 152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
50ac0e54 156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
18c7683d 164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 166 'ios': {
18c7683d 167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
224b5a35 171 'clientVersion': '17.33.2',
18c7683d 172 'deviceModel': 'iPhone14,3',
224b5a35 173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 174 }
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_embedded': {
000c15a4 180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 183 'clientVersion': '17.33.2',
18c7683d 184 'deviceModel': 'iPhone14,3',
224b5a35 185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 186 },
187 },
b6de707d 188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
000c15a4 190 },
191 'ios_music': {
18c7683d 192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
224b5a35
SF
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 199 },
200 },
b6de707d 201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
000c15a4 203 },
e7e94f2a
D
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
224b5a35
SF
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
211 },
212 },
b6de707d 213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
e7e94f2a 215 },
3619f78d 216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 218 'mweb': {
18c7683d 219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
a0c830f4 223 'clientVersion': '2.20220801.00.00',
000c15a4 224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
000c15a4 240}
241
242
e7870111
D
243def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
c795c39f
L
251def short_client_name(client_name):
252 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
253 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
254
255
000c15a4 256def build_innertube_clients():
2e4cacd0 257 THIRD_PARTY = {
e7870111 258 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 259 }
1e75d97d 260 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
2e4cacd0 261 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 262
263 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 264 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 265 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 266 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 267 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 268
e7870111 269 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 270 ytcfg['priority'] = 10 * priority(base_client)
271
e48b3875 272 if not variant:
e7870111
D
273 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
274 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
275 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
276 embedscreen['priority'] -= 3
277 elif variant == 'embedded':
e48b3875 278 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 279 ytcfg['priority'] -= 2
e48b3875 280 else:
000c15a4 281 ytcfg['priority'] -= 3
282
283
284build_innertube_clients()
285
286
c26f9b99 287class BadgeType(enum.Enum):
288 AVAILABILITY_UNLISTED = enum.auto()
289 AVAILABILITY_PRIVATE = enum.auto()
290 AVAILABILITY_PUBLIC = enum.auto()
291 AVAILABILITY_PREMIUM = enum.auto()
292 AVAILABILITY_SUBSCRIPTION = enum.auto()
293 LIVE_NOW = enum.auto()
14a14335 294 VERIFIED = enum.auto()
c26f9b99 295
296
de7f3446 297class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 298 """Provide base functions for Youtube extractors"""
e00eb564 299
3462ffa8 300 _RESERVED_NAMES = (
08e29b9f 301 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 302 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 303 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 304 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 305
3619f78d 306 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
307
52efa4b3 308 # _NETRC_MACHINE = 'youtube'
3619f78d 309
b2e8bc1b
JMF
310 # If True it will raise an error if no login info is provided
311 _LOGIN_REQUIRED = False
312
d9190e44
RH
313 _INVIDIOUS_SITES = (
314 # invidious-redirect websites
315 r'(?:www\.)?redirect\.invidious\.io',
316 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 317 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
318 r'(?:www\.)?invidious\.pussthecat\.org',
319 r'(?:www\.)?invidious\.zee\.li',
320 r'(?:www\.)?invidious\.ethibox\.fr',
05799a48
RH
321 r'(?:www\.)?iv\.ggtyler\.dev',
322 r'(?:www\.)?inv\.vern\.i2p',
323 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
324 r'(?:www\.)?inv\.riverside\.rocks',
325 r'(?:www\.)?invidious\.silur\.me',
326 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
327 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
328 r'(?:www\.)?invidious\.slipfox\.xyz',
329 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
330 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
331 r'(?:www\.)?invidious\.tiekoetter\.com',
332 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
333 r'(?:www\.)?invidious\.nerdvpn\.de',
334 r'(?:www\.)?invidious\.weblibre\.org',
335 r'(?:www\.)?inv\.odyssey346\.dev',
336 r'(?:www\.)?invidious\.dhusch\.de',
337 r'(?:www\.)?iv\.melmac\.space',
338 r'(?:www\.)?watch\.thekitty\.zone',
339 r'(?:www\.)?invidious\.privacydev\.net',
340 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
341 r'(?:www\.)?invidious\.drivet\.xyz',
342 r'(?:www\.)?vid\.priv\.au',
343 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
344 r'(?:www\.)?inv\.vern\.cc',
345 r'(?:www\.)?invidious\.esmailelbob\.xyz',
346 r'(?:www\.)?invidious\.sethforprivacy\.com',
347 r'(?:www\.)?yt\.oelrichsgarcia\.de',
348 r'(?:www\.)?yt\.artemislena\.eu',
349 r'(?:www\.)?invidious\.flokinet\.to',
350 r'(?:www\.)?invidious\.baczek\.me',
351 r'(?:www\.)?y\.com\.sb',
352 r'(?:www\.)?invidious\.epicsite\.xyz',
353 r'(?:www\.)?invidious\.lidarshield\.cloud',
354 r'(?:www\.)?yt\.funami\.tech',
d9190e44 355 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
356 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
357 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
358 # youtube-dl invidious instances list
359 r'(?:(?:www|no)\.)?invidiou\.sh',
360 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
361 r'(?:www\.)?invidious\.kabi\.tk',
362 r'(?:www\.)?invidious\.mastodon\.host',
363 r'(?:www\.)?invidious\.zapashcanon\.fr',
364 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
365 r'(?:www\.)?invidious\.tinfoil-hat\.net',
366 r'(?:www\.)?invidious\.himiko\.cloud',
367 r'(?:www\.)?invidious\.reallyancient\.tech',
368 r'(?:www\.)?invidious\.tube',
369 r'(?:www\.)?invidiou\.site',
370 r'(?:www\.)?invidious\.site',
371 r'(?:www\.)?invidious\.xyz',
372 r'(?:www\.)?invidious\.nixnet\.xyz',
373 r'(?:www\.)?invidious\.048596\.xyz',
374 r'(?:www\.)?invidious\.drycat\.fr',
375 r'(?:www\.)?inv\.skyn3t\.in',
376 r'(?:www\.)?tube\.poal\.co',
377 r'(?:www\.)?tube\.connect\.cafe',
378 r'(?:www\.)?vid\.wxzm\.sx',
379 r'(?:www\.)?vid\.mint\.lgbt',
380 r'(?:www\.)?vid\.puffyan\.us',
381 r'(?:www\.)?yewtu\.be',
382 r'(?:www\.)?yt\.elukerio\.org',
383 r'(?:www\.)?yt\.lelux\.fi',
384 r'(?:www\.)?invidious\.ggc-project\.de',
385 r'(?:www\.)?yt\.maisputain\.ovh',
386 r'(?:www\.)?ytprivate\.com',
387 r'(?:www\.)?invidious\.13ad\.de',
388 r'(?:www\.)?invidious\.toot\.koeln',
389 r'(?:www\.)?invidious\.fdn\.fr',
390 r'(?:www\.)?watch\.nettohikari\.com',
391 r'(?:www\.)?invidious\.namazso\.eu',
392 r'(?:www\.)?invidious\.silkky\.cloud',
393 r'(?:www\.)?invidious\.exonip\.de',
394 r'(?:www\.)?invidious\.riverside\.rocks',
395 r'(?:www\.)?invidious\.blamefran\.net',
396 r'(?:www\.)?invidious\.moomoo\.de',
397 r'(?:www\.)?ytb\.trom\.tf',
398 r'(?:www\.)?yt\.cyberhost\.uk',
399 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
400 r'(?:www\.)?qklhadlycap4cnod\.onion',
401 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
402 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
403 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
404 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
405 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
406 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
407 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
408 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
409 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
410 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
411 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
412 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 413 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 414 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 415 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
416 r'(?:www\.)?watch\.whatever\.social',
417 r'(?:www\.)?piped\.garudalinux\.org',
418 r'(?:www\.)?piped\.rivo\.lol',
419 r'(?:www\.)?piped-libre\.kavin\.rocks',
420 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 421 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
422 r'(?:www\.)?il\.ax',
423 r'(?:www\.)?piped\.esmailelbob\.xyz',
424 r'(?:www\.)?piped\.projectsegfau\.lt',
425 r'(?:www\.)?piped\.privacydev\.net',
426 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
427 r'(?:www\.)?piped\.smnz\.de',
428 r'(?:www\.)?piped\.adminforge\.de',
429 r'(?:www\.)?watch\.whatevertinfoil\.de',
430 r'(?:www\.)?piped\.qdi\.fi',
bc87dac7
B
431 r'(?:www\.)?piped\.video',
432 r'(?:www\.)?piped\.aeong\.one',
05799a48
RH
433 r'(?:www\.)?piped\.moomoo\.me',
434 r'(?:www\.)?piped\.chauvet\.pro',
435 r'(?:www\.)?watch\.leptons\.xyz',
436 r'(?:www\.)?pd\.vern\.cc',
437 r'(?:www\.)?piped\.hostux\.net',
438 r'(?:www\.)?piped\.lunar\.icu',
78a78fa7
BG
439 # Hyperpipe instances from https://hyperpipe.codeberg.page/
440 r'(?:www\.)?hyperpipe\.surge\.sh',
441 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
442 r'(?:www\.)?listen\.whatever\.social',
443 r'(?:www\.)?music\.adminforge\.de',
d9190e44
RH
444 )
445
c26f9b99 446 # extracted from account/account_menu ep
447 # XXX: These are the supported YouTube UI and API languages,
448 # which is slightly different from languages supported for translation in YouTube studio
449 _SUPPORTED_LANG_CODES = [
450 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
451 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
452 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
453 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
454 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
455 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
456 ]
457
a057779d 458 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
459
7666b936 460 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
461 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
462
463 def ucid_or_none(self, ucid):
464 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
465
466 def handle_or_none(self, handle):
467 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
468
469 def handle_from_url(self, url):
470 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
471 url, 'channel handle', default=None)
472
473 def ucid_from_url(self, url):
474 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
475 url, 'channel id', default=None)
476
c26f9b99 477 @functools.cached_property
478 def _preferred_lang(self):
479 """
480 Returns a language code supported by YouTube for the user preferred language.
481 Returns None if no preferred language set.
482 """
483 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
484 if not preferred_lang:
485 return
486 if preferred_lang not in self._SUPPORTED_LANG_CODES:
487 raise ExtractorError(
488 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
489 expected=True)
490 elif preferred_lang != 'en':
491 self.report_warning(
492 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
493 return preferred_lang
494
cce889b9 495 def _initialize_consent(self):
496 cookies = self._get_cookies('https://www.youtube.com/')
497 if cookies.get('__Secure-3PSID'):
498 return
499 consent_id = None
500 consent = cookies.get('CONSENT')
501 if consent:
502 if 'YES' in consent.value:
503 return
504 consent_id = self._search_regex(
505 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
506 if not consent_id:
507 consent_id = random.randint(100, 999)
508 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 509
f3aa3c3f 510 def _initialize_pref(self):
511 cookies = self._get_cookies('https://www.youtube.com/')
512 pref_cookie = cookies.get('PREF')
513 pref = {}
514 if pref_cookie:
515 try:
14f25df2 516 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 517 except ValueError:
518 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 519 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 520 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 521
b2e8bc1b 522 def _real_initialize(self):
f3aa3c3f 523 self._initialize_pref()
cce889b9 524 self._initialize_consent()
a25bca9f 525 self._check_login_required()
526
527 def _check_login_required(self):
24146491 528 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 529 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 530
b7c47b74 531 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
532 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 533
000c15a4 534 def _get_default_ytcfg(self, client='web'):
535 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 536
000c15a4 537 def _get_innertube_host(self, client='web'):
538 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 539
000c15a4 540 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 541 # try_get but with fallback to default ytcfg client values when present
542 _func = lambda y: try_get(y, getter, expected_type)
543 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
544
000c15a4 545 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 546 return self._ytcfg_get_safe(
547 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 548 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 549
000c15a4 550 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 551 return self._ytcfg_get_safe(
552 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 553 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 554
2ae778b8 555 def _select_api_hostname(self, req_api_hostname, default_client=None):
556 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
557 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
558
000c15a4 559 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 560 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 561
000c15a4 562 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 563 context = get_first(
564 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 565 # Enforce language and tz for extraction
566 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 567 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 568 return context
569
cf87314d 570 _SAPISID = None
571
109dd3b2 572 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 573 time_now = round(time.time())
cf87314d 574 if self._SAPISID is None:
575 yt_cookies = self._get_cookies('https://www.youtube.com')
576 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
577 # See: https://github.com/yt-dlp/yt-dlp/issues/393
578 sapisid_cookie = dict_get(
579 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
580 if sapisid_cookie and sapisid_cookie.value:
581 self._SAPISID = sapisid_cookie.value
582 self.write_debug('Extracted SAPISID cookie')
583 # SAPISID cookie is required if not already present
584 if not yt_cookies.get('SAPISID'):
585 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
586 self._set_cookie(
587 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
588 else:
589 self._SAPISID = False
590 if not self._SAPISID:
591 return None
1974e99f 592 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
593 sapisidhash = hashlib.sha1(
86e5f3ed 594 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 595 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
596
597 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 598 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 599 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 600
109dd3b2 601 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 602 data.update(query)
11f9be09 603 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 604 real_headers.update({'content-type': 'application/json'})
605 if headers:
606 real_headers.update(headers)
2ae778b8 607 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
608 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 609 return self._download_json(
2ae778b8 610 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 611 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 612 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 613 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 614
65141660 615 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
616 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 617
99e9e001 618 @staticmethod
619 def _extract_session_index(*data):
620 """
621 Index of current account in account list.
622 See: https://github.com/yt-dlp/yt-dlp/pull/519
623 """
624 for ytcfg in data:
625 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
626 if session_index is not None:
627 return session_index
628
629 # Deprecated?
630 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 631 if ytcfg:
14f25df2 632 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
633 if token:
634 return token
99e9e001 635 if webpage:
636 return self._search_regex(
637 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
638 'identity token', default=None, fatal=False)
a1c5d2ca
M
639
640 @staticmethod
fe93e2c4 641 def _extract_account_syncid(*args):
8ea3f7b9 642 """
643 Extract syncId required to download private playlists of secondary channels
fe93e2c4 644 @params response and/or ytcfg
8ea3f7b9 645 """
fe93e2c4 646 for data in args:
647 # ytcfg includes channel_syncid if on secondary channel
14f25df2 648 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 649 if delegated_sid:
650 return delegated_sid
651 sync_ids = (try_get(
652 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 653 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 654 if len(sync_ids) >= 2 and sync_ids[1]:
655 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
656 # and just "user_syncid||" for primary channel. We only want the channel_syncid
657 return sync_ids[0]
a1c5d2ca 658
ac56cf38 659 @staticmethod
660 def _extract_visitor_data(*args):
661 """
662 Extracts visitorData from an API response or ytcfg
663 Appears to be used to track session state
664 """
9222c381 665 return get_first(
6c73052c 666 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 667 expected_type=str)
ac56cf38 668
2762dbb1 669 @functools.cached_property
99e9e001 670 def is_authenticated(self):
671 return bool(self._generate_sapisidhash_header())
672
11f9be09 673 def extract_ytcfg(self, video_id, webpage):
8c54a305 674 if not webpage:
675 return {}
29f7c58a 676 return self._parse_json(
677 self._search_regex(
678 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 679 default='{}'), video_id, fatal=False) or {}
680
11f9be09 681 def generate_api_headers(
99e9e001 682 self, *, ytcfg=None, account_syncid=None, session_index=None,
683 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
684
2ae778b8 685 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 686 headers = {
14f25df2 687 'X-YouTube-Client-Name': str(
11f9be09 688 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
689 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 690 'Origin': origin,
691 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
692 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 693 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
694 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 695 }
696 if session_index is None:
314ee305 697 session_index = self._extract_session_index(ytcfg)
698 if account_syncid or session_index is not None:
699 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 700
109dd3b2 701 auth = self._generate_sapisidhash_header(origin)
f4f751af 702 if auth is not None:
703 headers['Authorization'] = auth
109dd3b2 704 headers['X-Origin'] = origin
7a32c70d 705 return filter_dict(headers)
29f7c58a 706
a25bca9f 707 def _download_ytcfg(self, client, video_id):
708 url = {
709 'web': 'https://www.youtube.com',
710 'web_music': 'https://music.youtube.com',
711 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
712 }.get(client)
713 if not url:
714 return {}
715 webpage = self._download_webpage(
716 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
717 return self.extract_ytcfg(video_id, webpage) or {}
718
2d6659b9 719 @staticmethod
720 def _build_api_continuation_query(continuation, ctp=None):
721 query = {
722 'continuation': continuation
723 }
724 # TODO: Inconsistency with clickTrackingParams.
725 # Currently we have a fixed ctp contained within context (from ytcfg)
726 # and a ctp in root query for continuation.
727 if ctp:
728 query['clickTracking'] = {'clickTrackingParams': ctp}
729 return query
730
2d6659b9 731 @classmethod
732 def _extract_next_continuation_data(cls, renderer):
733 next_continuation = try_get(
734 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
735 lambda x: x['continuation']['reloadContinuationData']), dict)
736 if not next_continuation:
737 return
738 continuation = next_continuation.get('continuation')
739 if not continuation:
740 return
741 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 742 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 743
744 @classmethod
745 def _extract_continuation_ep_data(cls, continuation_ep: dict):
746 if isinstance(continuation_ep, dict):
747 continuation = try_get(
14f25df2 748 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 749 if not continuation:
750 return
751 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 752 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 753
754 @classmethod
755 def _extract_continuation(cls, renderer):
756 next_continuation = cls._extract_next_continuation_data(renderer)
757 if next_continuation:
758 return next_continuation
fe93e2c4 759
7a32c70d 760 return traverse_obj(renderer, (
761 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
762 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
763 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 764
fe93e2c4 765 @classmethod
766 def _extract_alerts(cls, data):
109dd3b2 767 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
768 if not isinstance(alert_dict, dict):
769 continue
770 for alert in alert_dict.values():
771 alert_type = alert.get('type')
772 if not alert_type:
773 continue
052e1350 774 message = cls._get_text(alert, 'text')
109dd3b2 775 if message:
776 yield alert_type, message
777
c0ac49bc 778 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 779 errors, warnings = [], []
109dd3b2 780 for alert_type, alert_message in alerts:
641ad5d8 781 if alert_type.lower() == 'error' and fatal:
109dd3b2 782 errors.append([alert_type, alert_message])
a057779d 783 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 784 warnings.append([alert_type, alert_message])
785
786 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 787 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 788 if errors:
789 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
790
791 def _extract_and_report_alerts(self, data, *args, **kwargs):
792 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
793
14a14335 794 def _extract_badges(self, badge_list: list):
795 """
796 Extract known BadgeType's from a list of badge renderers.
797 @returns [{'type': BadgeType}]
798 """
799 icon_type_map = {
c26f9b99 800 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
801 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
14a14335 802 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
803 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
804 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
8213ce28 805 'CHECK': BadgeType.VERIFIED,
c26f9b99 806 }
807
808 badge_style_map = {
809 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
810 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
14a14335 811 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
8213ce28 812 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
ad54c913 813 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
c26f9b99 814 }
815
816 label_map = {
817 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
818 'private': BadgeType.AVAILABILITY_PRIVATE,
819 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
820 'live': BadgeType.LIVE_NOW,
14a14335 821 'premium': BadgeType.AVAILABILITY_PREMIUM,
8213ce28 822 'verified': BadgeType.VERIFIED,
ad54c913 823 'official artist channel': BadgeType.VERIFIED,
c26f9b99 824 }
825
826 badges = []
14a14335 827 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
c26f9b99 828 badge_type = (
14a14335 829 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
c26f9b99 830 or badge_style_map.get(traverse_obj(badge, 'style'))
831 )
832 if badge_type:
833 badges.append({'type': badge_type})
834 continue
835
836 # fallback, won't work in some languages
14a14335 837 label = traverse_obj(
838 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
c26f9b99 839 for match, label_badge_type in label_map.items():
840 if match in label.lower():
14a14335 841 badges.append({'type': label_badge_type})
842 break
c26f9b99 843
47193e02 844 return badges
845
c26f9b99 846 @staticmethod
847 def _has_badge(badges, badge_type):
848 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
849
47193e02 850 @staticmethod
052e1350 851 def _get_text(data, *path_list, max_runs=None):
852 for path in path_list or [None]:
853 if path is None:
854 obj = [data]
855 else:
856 obj = traverse_obj(data, path, default=[])
857 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
858 obj = [obj]
859 for item in obj:
14f25df2 860 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 861 if text:
862 return text
863 runs = try_get(item, lambda x: x['runs'], list) or []
864 if not runs and isinstance(item, list):
865 runs = item
866
867 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 868 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 869 if text:
870 return text
47193e02 871
f0d785d3 872 def _get_count(self, data, *path_list):
873 count_text = self._get_text(data, *path_list) or ''
874 count = parse_count(count_text)
875 if count is None:
876 count = str_to_int(
877 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
878 return count
879
a709d873 880 @staticmethod
881 def _extract_thumbnails(data, *path_list):
882 """
883 Extract thumbnails from thumbnails dict
884 @param path_list: path list to level that contains 'thumbnails' key
885 """
886 thumbnails = []
887 for path in path_list or [()]:
6839ae1f 888 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
a709d873 889 thumbnail_url = url_or_none(thumbnail.get('url'))
890 if not thumbnail_url:
891 continue
892 # Sometimes youtube gives a wrong thumbnail URL. See:
893 # https://github.com/yt-dlp/yt-dlp/issues/233
894 # https://github.com/ytdl-org/youtube-dl/issues/28023
895 if 'maxresdefault' in thumbnail_url:
896 thumbnail_url = thumbnail_url.split('?')[0]
897 thumbnails.append({
898 'url': thumbnail_url,
899 'height': int_or_none(thumbnail.get('height')),
900 'width': int_or_none(thumbnail.get('width')),
901 })
902 return thumbnails
903
f3aa3c3f 904 @staticmethod
905 def extract_relative_time(relative_time_text):
906 """
907 Extracts a relative time from string and converts to dt object
2fb35f60 908 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
f3aa3c3f 909 """
2fb35f60 910
911 # XXX: this could be moved to a general function in utils.py
912 # The relative time text strings are roughly the same as what
913 # Javascript's Intl.RelativeTimeFormat function generates.
914 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
915 mobj = re.search(
916 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
917 relative_time_text)
f3aa3c3f 918 if mobj:
f0d785d3 919 start = mobj.group('start')
920 if start:
921 return datetime_from_str(start)
f3aa3c3f 922 try:
f0d785d3 923 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 924 except ValueError:
925 return None
926
c26f9b99 927 def _parse_time_text(self, text):
928 if not text:
929 return
f3aa3c3f 930 dt = self.extract_relative_time(text)
931 timestamp = None
932 if isinstance(dt, datetime.datetime):
933 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 934
935 if timestamp is None:
936 timestamp = (
937 unified_timestamp(text) or unified_timestamp(
938 self._search_regex(
17322130 939 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 940 text.lower(), 'time text', default=None)))
f0d785d3 941
c26f9b99 942 if text and timestamp is None and self._preferred_lang in (None, 'en'):
943 self.report_warning(
944 f'Cannot parse localized time text "{text}"', only_once=True)
945 return timestamp
f3aa3c3f 946
109dd3b2 947 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
948 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 949 default_client='web'):
be5c1ae8 950 for retry in self.RetryManager():
109dd3b2 951 try:
952 response = self._call_api(
953 ep=ep, fatal=True, headers=headers,
be5c1ae8 954 video_id=item_id, query=query, note=note,
109dd3b2 955 context=self._extract_context(ytcfg, default_client),
956 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 957 api_hostname=api_hostname, default_client=default_client)
109dd3b2 958 except ExtractorError as e:
be5c1ae8 959 if not isinstance(e.cause, network_exceptions):
960 return self._error_or_warning(e, fatal=fatal)
3d2623a8 961 elif not isinstance(e.cause, HTTPError):
be5c1ae8 962 retry.error = e
963 continue
109dd3b2 964
3d2623a8 965 first_bytes = e.cause.response.read(512)
be5c1ae8 966 if not is_html(first_bytes):
967 yt_error = try_get(
968 self._parse_json(
3d2623a8 969 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
be5c1ae8 970 lambda x: x['error']['message'], str)
971 if yt_error:
972 self._report_alerts([('ERROR', yt_error)], fatal=False)
973 # Downloading page may result in intermittent 5xx HTTP error
974 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
975 # We also want to catch all other network exceptions since errors in later pages can be troublesome
976 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
3d2623a8 977 if e.cause.status not in (403, 429):
be5c1ae8 978 retry.error = e
979 continue
980 return self._error_or_warning(e, fatal=fatal)
981
982 try:
983 self._extract_and_report_alerts(response, only_once=True)
984 except ExtractorError as e:
985 # YouTube servers may return errors we want to retry on in a 200 OK response
986 # See: https://github.com/yt-dlp/yt-dlp/issues/839
987 if 'unknown error' in e.msg.lower():
988 retry.error = e
989 continue
990 return self._error_or_warning(e, fatal=fatal)
991 # Youtube sometimes sends incomplete data
992 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
993 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 994 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 995 continue
996
997 return response
109dd3b2 998
9297939e 999 @staticmethod
1000 def is_music_url(url):
5b28cef7 1001 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
9297939e 1002
30a074c2 1003 def _extract_video(self, renderer):
1004 video_id = renderer.get('videoId')
4dc23a80
M
1005
1006 reel_header_renderer = traverse_obj(renderer, (
1007 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1008 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1009
1010 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 1011 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
1012
1013 duration = int_or_none(renderer.get('lengthSeconds'))
1014 if duration is None:
1015 duration = parse_duration(self._get_text(
1016 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 1017 if duration is None:
4dc23a80 1018 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 1019 duration = parse_duration(self._search_regex(
1020 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1021 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1022 video_id, default=None, group='duration'))
1023
f3aa3c3f 1024 channel_id = traverse_obj(
a44ca5a4 1025 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1026 expected_type=str, get_all=False)
4dc23a80
M
1027 if not channel_id:
1028 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1029
7666b936 1030 channel_id = self.ucid_or_none(channel_id)
1031
f3aa3c3f 1032 overlay_style = traverse_obj(
a44ca5a4 1033 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1034 get_all=False, expected_type=str)
14a14335 1035 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
8213ce28 1036 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
fd2ad7cb 1037 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 1038 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1039 expected_type=str)) or ''
fd2ad7cb 1040 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 1041 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 1042 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 1043
4dc23a80
M
1044 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1045 or self._get_text(reel_header_renderer, 'timestampText') or '')
1046 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1047
867c66ff
M
1048 live_status = (
1049 'is_upcoming' if scheduled_timestamp is not None
1050 else 'was_live' if 'streamed' in time_text.lower()
1051 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1052 else None)
1053
4dc23a80
M
1054 # videoInfo is a string like '50K views • 10 years ago'.
1055 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1056 view_count = (0 if 'no views' in view_count_text.lower()
1057 else self._get_count({'simpleText': view_count_text}))
1058 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1059
93e12ed7 1060 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1061 or self._get_text(reel_header_renderer, 'channelTitleText'))
1062
1063 channel_handle = traverse_obj(renderer, (
1064 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1065 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1066 expected_type=self.handle_from_url, get_all=False)
30a074c2 1067 return {
39ed931e 1068 '_type': 'url',
30a074c2 1069 'ie_key': YoutubeIE.ie_key(),
1070 'id': video_id,
fd2ad7cb 1071 'url': url,
30a074c2 1072 'title': title,
1073 'description': description,
1074 'duration': duration,
f3aa3c3f 1075 'channel_id': channel_id,
93e12ed7 1076 'channel': channel,
4dc23a80 1077 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
93e12ed7 1078 'uploader': channel,
1079 'uploader_id': channel_handle,
1080 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4dc23a80 1081 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 1082 'timestamp': (self._parse_time_text(time_text)
1083 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1084 else None),
f3aa3c3f 1085 'release_timestamp': scheduled_timestamp,
c26f9b99 1086 'availability':
1087 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1088 else self._availability(
1089 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1090 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1091 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 1092 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 1093 view_count_field: view_count,
14a14335 1094 'live_status': live_status,
8213ce28 1095 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
30a074c2 1096 }
1097
0c148415 1098
360e1ca5 1099class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1100 IE_DESC = 'YouTube'
cb7dfeea 1101 _VALID_URL = r"""(?x)^
c5e8d7af 1102 (
edb53e2d 1103 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1104 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1105 (?:www\.)?deturl\.com/www\.youtube\.com|
1106 (?:www\.)?pwnyoutube\.com|
1107 (?:www\.)?hooktube\.com|
1108 (?:www\.)?yourepeat\.com|
1109 tube\.majestyc\.net|
1110 %(invidious)s|
1111 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1112 (?:.*?\#/)? # handle anchor (#/) redirect urls
1113 (?: # the various things that can precede the ID:
dad2210c 1114 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1115 |(?: # or the v= param in all its forms
f7000f3a 1116 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1117 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1118 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1119 v=
1120 )
f4b05232 1121 ))
cbaed4bb
S
1122 |(?:
1123 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1124 vid\.plus| # or vid.plus/xxxx
1125 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1126 %(invidious)s
cbaed4bb 1127 )/
edb53e2d 1128 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1129 )
c5e8d7af 1130 )? # all until now is optional -> you can pass the naked ID
201c1459 1131 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1132 (?(1).+)? # if we found the ID, everything can follow
9297939e 1133 (?:\#|$)""" % {
d9190e44 1134 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1135 }
7c6eb424 1136 _EMBED_REGEX = [
1137 r'''(?x)
1138 (?:
0ca0f881 1139 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1140 data-video-url=|
1141 <embed[^>]+?src=|
1142 embedSWF\(?:\s*|
1143 <object[^>]+data=|
1144 new\s+SWFObject\(
1145 )
1146 (["\'])
1147 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1148 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1149 \1''',
1150 # https://wordpress.org/plugins/lazy-load-for-videos/
1151 r'''(?xs)
1152 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1153 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1154 ]
6368e2e6 1155 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1156
e40c758c 1157 _PLAYER_INFO_RE = (
cc2db878 1158 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1159 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1160 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1161 )
2c62dc26 1162 _formats = {
c2d3cb4c 1163 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1164 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1165 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1166 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1167 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1168 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1169 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1170 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1171 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1172 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1173 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1174 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1175 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1176 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1177 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1178 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1179 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1180 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1181
1182
1183 # 3D videos
c2d3cb4c 1184 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1185 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1186 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1187 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1188 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1189 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1190 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1191
96fb5605 1192 # Apple HTTP Live Streaming
11f12195 1193 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1194 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1195 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1196 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1197 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1198 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1199 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1200 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1201
1202 # DASH mp4 video
d23028a8
S
1203 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1204 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1205 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1206 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1207 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1208 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1209 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1210 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1211 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1212 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1213 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1214 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1215
f6f1fc92 1216 # Dash mp4 audio
d23028a8
S
1217 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1218 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1219 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1220 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1221 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1222 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1223 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1224
1225 # Dash webm
d23028a8
S
1226 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1227 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1228 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1229 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1230 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1231 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1232 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1233 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1234 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1235 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1236 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1237 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1238 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1239 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1240 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1241 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1242 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1243 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1244 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1245 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1246 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1247 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1248
1249 # Dash webm audio
d23028a8
S
1250 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1251 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1252
0857baad 1253 # Dash webm audio with opus inside
d23028a8
S
1254 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1255 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1256 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1257
ce6b9a2d
PH
1258 # RTMP (unnamed)
1259 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1260
1261 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1262 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1263 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1264 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1265 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1266 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1267 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1268 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1269 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1270 }
29f7c58a 1271 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1272
fd5c4aab
S
1273 _GEO_BYPASS = False
1274
78caa52a 1275 IE_NAME = 'youtube'
2eb88d95
PH
1276 _TESTS = [
1277 {
2d3d2997 1278 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1279 'info_dict': {
1280 'id': 'BaW_jenozKc',
1281 'ext': 'mp4',
3867038a 1282 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
ff9f925b 1283 'channel': 'Philipp Hagemeister',
dd4c4492
S
1284 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1285 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1286 'upload_date': '20121002',
ff9f925b 1287 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1288 'categories': ['Science & Technology'],
3867038a 1289 'tags': ['youtube-dl'],
556dbe7f 1290 'duration': 10,
dbdaaa23 1291 'view_count': int,
3e7c1224 1292 'like_count': int,
ff9f925b 1293 'availability': 'public',
1294 'playable_in_embed': True,
1295 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1296 'live_status': 'not_live',
1297 'age_limit': 0,
7c80519c 1298 'start_time': 1,
297a564b 1299 'end_time': 9,
12a1b225 1300 'comment_count': int,
7666b936 1301 'channel_follower_count': int,
1302 'uploader': 'Philipp Hagemeister',
1303 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1304 'uploader_id': '@PhilippHagemeister',
5caf30db 1305 'heatmap': 'count:100',
2eb88d95 1306 }
0e853ca4 1307 },
fccd3771 1308 {
4bc3a23e
PH
1309 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1310 'note': 'Embed-only video (#1746)',
1311 'info_dict': {
1312 'id': 'yZIXLfi8CZQ',
1313 'ext': 'mp4',
1314 'upload_date': '20120608',
1315 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1316 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
94bfcd23 1317 'age_limit': 18,
545cc85d 1318 },
1319 'skip': 'Private video',
fccd3771 1320 },
11b56058 1321 {
8bdd16b4 1322 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1323 'note': 'Use the first video ID in the URL',
1324 'info_dict': {
1325 'id': 'BaW_jenozKc',
1326 'ext': 'mp4',
3867038a 1327 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976ae3ea 1328 'channel': 'Philipp Hagemeister',
1329 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1330 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1331 'upload_date': '20121002',
976ae3ea 1332 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1333 'categories': ['Science & Technology'],
3867038a 1334 'tags': ['youtube-dl'],
556dbe7f 1335 'duration': 10,
dbdaaa23 1336 'view_count': int,
11b56058 1337 'like_count': int,
976ae3ea 1338 'availability': 'public',
1339 'playable_in_embed': True,
1340 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1341 'live_status': 'not_live',
1342 'age_limit': 0,
12a1b225 1343 'comment_count': int,
7666b936 1344 'channel_follower_count': int,
1345 'uploader': 'Philipp Hagemeister',
1346 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1347 'uploader_id': '@PhilippHagemeister',
14a14335 1348 'heatmap': 'count:100',
34a7de29
S
1349 },
1350 'params': {
1351 'skip_download': True,
1352 },
11b56058 1353 },
dd27fd17 1354 {
2d3d2997 1355 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1356 'note': '256k DASH audio (format 141) via DASH manifest',
1357 'info_dict': {
1358 'id': 'a9LDPn-MO4I',
1359 'ext': 'm4a',
1360 'upload_date': '20121002',
4bc3a23e 1361 'description': '',
4bc3a23e 1362 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1363 },
4bc3a23e
PH
1364 'params': {
1365 'youtube_include_dash_manifest': True,
1366 'format': '141',
4919603f 1367 },
de3c7fe0 1368 'skip': 'format 141 not served anymore',
dd27fd17 1369 },
8bdd16b4 1370 # DASH manifest with encrypted signature
1371 {
1372 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1373 'info_dict': {
1374 'id': 'IB3lcPjvWLA',
1375 'ext': 'm4a',
1376 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1377 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1378 'duration': 244,
8bdd16b4 1379 'upload_date': '20131011',
cc2db878 1380 'abr': 129.495,
976ae3ea 1381 'like_count': int,
1382 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1383 'playable_in_embed': True,
1384 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1385 'view_count': int,
1386 'track': 'The Spark',
1387 'live_status': 'not_live',
1388 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1389 'channel': 'Afrojack',
976ae3ea 1390 'tags': 'count:19',
1391 'availability': 'public',
1392 'categories': ['Music'],
1393 'age_limit': 0,
1394 'alt_title': 'The Spark',
7666b936 1395 'channel_follower_count': int,
1396 'uploader': 'Afrojack',
1397 'uploader_url': 'https://www.youtube.com/@Afrojack',
1398 'uploader_id': '@Afrojack',
8bdd16b4 1399 },
1400 'params': {
1401 'youtube_include_dash_manifest': True,
1402 'format': '141/bestaudio[ext=m4a]',
1403 },
1404 },
65c2fde2 1405 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1406 {
65c2fde2 1407 'note': 'Embed allowed age-gate video',
2d3d2997 1408 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1409 'info_dict': {
1410 'id': 'HtVdAasjOgU',
1411 'ext': 'mp4',
1412 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1413 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1414 'duration': 142,
c522adb1 1415 'upload_date': '20140605',
34952f09 1416 'age_limit': 18,
976ae3ea 1417 'categories': ['Gaming'],
1418 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1419 'availability': 'needs_auth',
1420 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1421 'like_count': int,
1422 'channel': 'The Witcher',
1423 'live_status': 'not_live',
1424 'tags': 'count:17',
1425 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1426 'playable_in_embed': True,
1427 'view_count': int,
7666b936 1428 'channel_follower_count': int,
1429 'uploader': 'The Witcher',
1430 'uploader_url': 'https://www.youtube.com/@thewitcher',
1431 'uploader_id': '@thewitcher',
14a14335 1432 'comment_count': int,
8213ce28 1433 'channel_is_verified': True,
14a14335 1434 'heatmap': 'count:100',
c522adb1
JMF
1435 },
1436 },
65c2fde2 1437 {
1438 'note': 'Age-gate video with embed allowed in public site',
1439 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1440 'info_dict': {
1441 'id': 'HsUATh_Nc2U',
1442 'ext': 'mp4',
1443 'title': 'Godzilla 2 (Official Video)',
1444 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1445 'upload_date': '20200408',
65c2fde2 1446 'age_limit': 18,
976ae3ea 1447 'availability': 'needs_auth',
1448 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
976ae3ea 1449 'channel': 'FlyingKitty',
1450 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1451 'view_count': int,
1452 'categories': ['Entertainment'],
1453 'live_status': 'not_live',
1454 'tags': ['Flyingkitty', 'godzilla 2'],
1455 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1456 'like_count': int,
1457 'duration': 177,
1458 'playable_in_embed': True,
7666b936 1459 'channel_follower_count': int,
1460 'uploader': 'FlyingKitty',
1461 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1462 'uploader_id': '@FlyingKitty900',
5caf30db 1463 'comment_count': int,
8213ce28 1464 'channel_is_verified': True,
65c2fde2 1465 },
1466 },
1467 {
1468 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1469 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1470 'info_dict': {
1471 'id': 'Tq92D6wQ1mg',
1472 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1473 'ext': 'mp4',
17322130 1474 'upload_date': '20191228',
65c2fde2 1475 'description': 'md5:17eccca93a786d51bc67646756894066',
1476 'age_limit': 18,
976ae3ea 1477 'like_count': int,
1478 'availability': 'needs_auth',
976ae3ea 1479 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1480 'view_count': int,
1481 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1482 'channel': 'Projekt Melody',
1483 'live_status': 'not_live',
1484 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1485 'playable_in_embed': True,
1486 'categories': ['Entertainment'],
1487 'duration': 106,
1488 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1489 'comment_count': int,
7666b936 1490 'channel_follower_count': int,
1491 'uploader': 'Projekt Melody',
1492 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1493 'uploader_id': '@ProjektMelody',
65c2fde2 1494 },
1495 },
1496 {
1497 'note': 'Non-Agegated non-embeddable video',
1498 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1499 'info_dict': {
1500 'id': 'MeJVWBSsPAY',
1501 'ext': 'mp4',
1502 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
65c2fde2 1503 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1504 'upload_date': '20130730',
976ae3ea 1505 'track': 'Such mich find mich',
1506 'age_limit': 0,
1507 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1508 'like_count': int,
1509 'playable_in_embed': False,
1510 'creator': 'OOMPH!',
1511 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1512 'view_count': int,
1513 'alt_title': 'Such mich find mich',
1514 'duration': 210,
1515 'channel': 'Herr Lurik',
1516 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1517 'categories': ['Music'],
1518 'availability': 'public',
976ae3ea 1519 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1520 'live_status': 'not_live',
1521 'artist': 'OOMPH!',
7666b936 1522 'channel_follower_count': int,
1523 'uploader': 'Herr Lurik',
1524 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1525 'uploader_id': '@HerrLurik',
65c2fde2 1526 },
1527 },
1528 {
1529 'note': 'Non-bypassable age-gated video',
1530 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1531 'only_matching': True,
1532 },
8bdd16b4 1533 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1534 # YouTube Red ad is not captured for creator
1535 {
1536 'url': '__2ABJjxzNo',
1537 'info_dict': {
1538 'id': '__2ABJjxzNo',
1539 'ext': 'mp4',
1540 'duration': 266,
1541 'upload_date': '20100430',
545cc85d 1542 'creator': 'deadmau5',
1543 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1544 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1545 'alt_title': 'Some Chords',
976ae3ea 1546 'availability': 'public',
1547 'tags': 'count:14',
1548 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1549 'view_count': int,
1550 'live_status': 'not_live',
1551 'channel': 'deadmau5',
1552 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1553 'like_count': int,
1554 'track': 'Some Chords',
1555 'artist': 'deadmau5',
1556 'playable_in_embed': True,
1557 'age_limit': 0,
1558 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1559 'categories': ['Music'],
1560 'album': 'Some Chords',
7666b936 1561 'channel_follower_count': int,
1562 'uploader': 'deadmau5',
1563 'uploader_url': 'https://www.youtube.com/@deadmau5',
1564 'uploader_id': '@deadmau5',
8bdd16b4 1565 },
1566 'expected_warnings': [
1567 'DASH manifest missing',
1568 ]
1569 },
067aa17e 1570 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1571 {
1572 'url': 'lqQg6PlCWgI',
1573 'info_dict': {
1574 'id': 'lqQg6PlCWgI',
1575 'ext': 'mp4',
556dbe7f 1576 'duration': 6085,
90227264 1577 'upload_date': '20150827',
12a1b225 1578 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
cbe2bd91 1579 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1580 'like_count': int,
1581 'release_timestamp': 1343767800,
1582 'playable_in_embed': True,
1583 'categories': ['Sports'],
1584 'release_date': '20120731',
1585 'channel': 'Olympics',
1586 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1587 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1588 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1589 'age_limit': 0,
1590 'availability': 'public',
1591 'live_status': 'was_live',
1592 'view_count': int,
1593 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
7666b936 1594 'channel_follower_count': int,
1595 'uploader': 'Olympics',
1596 'uploader_url': 'https://www.youtube.com/@Olympics',
1597 'uploader_id': '@Olympics',
8213ce28 1598 'channel_is_verified': True,
cbe2bd91
PH
1599 },
1600 'params': {
1601 'skip_download': 'requires avconv',
e52a40ab 1602 }
cbe2bd91 1603 },
6271f1ca
PH
1604 # Non-square pixels
1605 {
1606 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1607 'info_dict': {
1608 'id': '_b-2C3KPAM0',
1609 'ext': 'mp4',
1610 'stretched_ratio': 16 / 9.,
556dbe7f 1611 'duration': 85,
6271f1ca 1612 'upload_date': '20110310',
6271f1ca 1613 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
6271f1ca 1614 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1615 'playable_in_embed': True,
1616 'channel': '孫ᄋᄅ',
1617 'age_limit': 0,
1618 'tags': 'count:11',
1619 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1620 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1621 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1622 'view_count': int,
1623 'categories': ['People & Blogs'],
1624 'like_count': int,
1625 'live_status': 'not_live',
1626 'availability': 'unlisted',
12a1b225 1627 'comment_count': int,
7666b936 1628 'channel_follower_count': int,
1629 'uploader': '孫ᄋᄅ',
1630 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1631 'uploader_id': '@AllenMeow',
6271f1ca 1632 },
06b491eb
S
1633 },
1634 # url_encoded_fmt_stream_map is empty string
1635 {
1636 'url': 'qEJwOuvDf7I',
1637 'info_dict': {
1638 'id': 'qEJwOuvDf7I',
f57b7835 1639 'ext': 'webm',
06b491eb
S
1640 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1641 'description': '',
1642 'upload_date': '20150404',
06b491eb
S
1643 },
1644 'params': {
1645 'skip_download': 'requires avconv',
e323cf3f
S
1646 },
1647 'skip': 'This live event has ended.',
06b491eb 1648 },
067aa17e 1649 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1650 {
1651 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1652 'info_dict': {
1653 'id': 'FIl7x6_3R5Y',
eb6793ba 1654 'ext': 'webm',
da77d856
S
1655 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1656 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1657 'duration': 220,
da77d856 1658 'upload_date': '20150625',
eb6793ba 1659 'formats': 'mincount:31',
da77d856 1660 },
eb6793ba 1661 'skip': 'not actual anymore',
2ee8f5d8 1662 },
8a1a26ce
YCH
1663 # DASH manifest with segment_list
1664 {
1665 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1666 'md5': '8ce563a1d667b599d21064e982ab9e31',
1667 'info_dict': {
1668 'id': 'CsmdDsKjzN8',
1669 'ext': 'mp4',
17ee98e1 1670 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce 1671 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
8a1a26ce
YCH
1672 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1673 },
1674 'params': {
1675 'youtube_include_dash_manifest': True,
1676 'format': '135', # bestvideo
be49068d
S
1677 },
1678 'skip': 'This live event has ended.',
2ee8f5d8 1679 },
cf7e015f 1680 {
6368e2e6 1681 # Multifeed videos (multiple cameras), URL can be of any Camera
7666b936 1682 # TODO: fix multifeed titles
6368e2e6 1683 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1684 'info_dict': {
6368e2e6 1685 'id': 'zaPI8MvL8pg',
1686 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1687 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1688 },
1689 'playlist': [{
1690 'info_dict': {
6368e2e6 1691 'id': 'j5yGuxZ8lLU',
cf7e015f 1692 'ext': 'mp4',
6368e2e6 1693 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
6368e2e6 1694 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1695 'duration': 10120,
1696 'channel_follower_count': int,
1697 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1698 'availability': 'public',
1699 'playable_in_embed': True,
1700 'upload_date': '20131105',
6368e2e6 1701 'categories': ['Gaming'],
1702 'live_status': 'was_live',
1703 'tags': 'count:24',
1704 'release_timestamp': 1383701910,
1705 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1706 'comment_count': int,
1707 'age_limit': 0,
1708 'like_count': int,
1709 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1710 'channel': 'WiiLikeToPlay',
1711 'view_count': int,
1712 'release_date': '20131106',
7666b936 1713 'uploader': 'WiiLikeToPlay',
1714 'uploader_id': '@WLTP',
1715 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1716 },
1717 }, {
1718 'info_dict': {
6368e2e6 1719 'id': 'zaPI8MvL8pg',
cf7e015f 1720 'ext': 'mp4',
6368e2e6 1721 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
6368e2e6 1722 'availability': 'public',
1723 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1724 'channel': 'WiiLikeToPlay',
6368e2e6 1725 'channel_follower_count': int,
1726 'description': 'md5:563ccbc698b39298481ca3c571169519',
1727 'duration': 10108,
1728 'age_limit': 0,
1729 'like_count': int,
1730 'tags': 'count:24',
1731 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
6368e2e6 1732 'release_timestamp': 1383701915,
1733 'comment_count': int,
1734 'upload_date': '20131105',
1735 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1736 'release_date': '20131106',
1737 'playable_in_embed': True,
1738 'live_status': 'was_live',
1739 'categories': ['Gaming'],
1740 'view_count': int,
7666b936 1741 'uploader': 'WiiLikeToPlay',
1742 'uploader_id': '@WLTP',
1743 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1744 },
1745 }, {
1746 'info_dict': {
6368e2e6 1747 'id': 'R7r3vfO7Hao',
cf7e015f 1748 'ext': 'mp4',
6368e2e6 1749 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1750 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1751 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1752 'like_count': int,
1753 'availability': 'public',
1754 'playable_in_embed': True,
1755 'upload_date': '20131105',
1756 'description': 'md5:563ccbc698b39298481ca3c571169519',
6368e2e6 1757 'channel_follower_count': int,
1758 'tags': 'count:24',
1759 'release_date': '20131106',
6368e2e6 1760 'comment_count': int,
1761 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1762 'channel': 'WiiLikeToPlay',
1763 'categories': ['Gaming'],
1764 'release_timestamp': 1383701914,
1765 'live_status': 'was_live',
1766 'age_limit': 0,
1767 'duration': 10128,
1768 'view_count': int,
7666b936 1769 'uploader': 'WiiLikeToPlay',
1770 'uploader_id': '@WLTP',
1771 'uploader_url': 'https://www.youtube.com/@WLTP',
cf7e015f
S
1772 },
1773 }],
6368e2e6 1774 'params': {'skip_download': True},
cbaed4bb 1775 },
f9f49d87 1776 {
067aa17e 1777 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1778 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1779 'info_dict': {
1780 'id': 'gVfLd0zydlo',
1781 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1782 },
1783 'playlist_count': 2,
be49068d 1784 'skip': 'Not multifeed anymore',
f9f49d87 1785 },
cbaed4bb 1786 {
2d3d2997 1787 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1788 'only_matching': True,
0e49d9a6 1789 },
6d4fc66b 1790 {
2d3d2997 1791 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1792 'only_matching': True,
1793 },
0e49d9a6 1794 {
067aa17e 1795 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1796 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1797 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1798 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1799 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1800 'info_dict': {
1801 'id': 'lsguqyKfVQg',
1802 'ext': 'mp4',
1803 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1804 'alt_title': 'Dark Walk',
0e49d9a6 1805 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1806 'duration': 133,
0e49d9a6 1807 'upload_date': '20151119',
11f9be09 1808 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1809 'track': 'Dark Walk',
1810 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1811 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1812 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1813 'categories': ['Film & Animation'],
1814 'view_count': int,
1815 'live_status': 'not_live',
1816 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1817 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1818 'tags': 'count:13',
1819 'availability': 'public',
1820 'channel': 'IronSoulElf',
1821 'playable_in_embed': True,
1822 'like_count': int,
1823 'age_limit': 0,
6c73052c 1824 'channel_follower_count': int
0e49d9a6
LL
1825 },
1826 'params': {
1827 'skip_download': True,
1828 },
1829 },
61f92af1 1830 {
067aa17e 1831 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1832 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1833 'only_matching': True,
1834 },
313dfc45
LL
1835 {
1836 # Video with yt:stretch=17:0
1837 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1838 'info_dict': {
1839 'id': 'Q39EVAstoRM',
1840 'ext': 'mp4',
1841 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1842 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1843 'upload_date': '20151107',
313dfc45
LL
1844 },
1845 'params': {
1846 'skip_download': True,
1847 },
be49068d 1848 'skip': 'This video does not exist.',
313dfc45 1849 },
201c1459 1850 {
1851 # Video with incomplete 'yt:stretch=16:'
1852 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1853 'only_matching': True,
1854 },
7caf9830
S
1855 {
1856 # Video licensed under Creative Commons
1857 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1858 'info_dict': {
1859 'id': 'M4gD1WSo5mA',
1860 'ext': 'mp4',
1861 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1862 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1863 'duration': 721,
17322130 1864 'upload_date': '20150128',
7caf9830 1865 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1866 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1867 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1868 'like_count': int,
1869 'age_limit': 0,
1870 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1871 'channel': 'The Berkman Klein Center for Internet & Society',
1872 'availability': 'public',
1873 'view_count': int,
1874 'categories': ['Education'],
1875 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1876 'live_status': 'not_live',
1877 'playable_in_embed': True,
d5d1df8a 1878 'channel_follower_count': int,
1879 'chapters': list,
7666b936 1880 'uploader': 'The Berkman Klein Center for Internet & Society',
1881 'uploader_id': '@BKCHarvard',
1882 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
7caf9830
S
1883 },
1884 'params': {
1885 'skip_download': True,
1886 },
1887 },
fd050249 1888 {
fd050249
S
1889 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1890 'info_dict': {
1891 'id': 'eQcmzGIKrzg',
1892 'ext': 'mp4',
1893 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1894 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1895 'duration': 4060,
17322130 1896 'upload_date': '20151120',
fd050249 1897 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1898 'playable_in_embed': True,
1899 'tags': 'count:12',
1900 'like_count': int,
1901 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1902 'age_limit': 0,
1903 'availability': 'public',
1904 'categories': ['News & Politics'],
1905 'channel': 'Bernie Sanders',
1906 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1907 'view_count': int,
1908 'live_status': 'not_live',
1909 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1910 'comment_count': int,
d5d1df8a 1911 'channel_follower_count': int,
1912 'chapters': list,
7666b936 1913 'uploader': 'Bernie Sanders',
1914 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1915 'uploader_id': '@BernieSanders',
8213ce28 1916 'channel_is_verified': True,
14a14335 1917 'heatmap': 'count:100',
fd050249
S
1918 },
1919 'params': {
1920 'skip_download': True,
1921 },
1922 },
040ac686
S
1923 {
1924 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1925 'only_matching': True,
7f29cf54
S
1926 },
1927 {
067aa17e 1928 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1929 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1930 'only_matching': True,
6496ccb4
S
1931 },
1932 {
1933 # Rental video preview
1934 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1935 'info_dict': {
1936 'id': 'uGpuVWrhIzE',
1937 'ext': 'mp4',
1938 'title': 'Piku - Trailer',
1939 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1940 'upload_date': '20150811',
6496ccb4
S
1941 'license': 'Standard YouTube License',
1942 },
1943 'params': {
1944 'skip_download': True,
1945 },
eb6793ba 1946 'skip': 'This video is not available.',
022a5d66 1947 },
12afdc2a
S
1948 {
1949 # YouTube Red video with episode data
1950 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1951 'info_dict': {
1952 'id': 'iqKdEhx-dD4',
1953 'ext': 'mp4',
1954 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1955 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1956 'duration': 2085,
12afdc2a 1957 'upload_date': '20170118',
12afdc2a
S
1958 'series': 'Mind Field',
1959 'season_number': 1,
1960 'episode_number': 1,
976ae3ea 1961 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1962 'tags': 'count:12',
1963 'view_count': int,
1964 'availability': 'public',
1965 'age_limit': 0,
1966 'channel': 'Vsauce',
1967 'episode': 'Episode 1',
1968 'categories': ['Entertainment'],
1969 'season': 'Season 1',
1970 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1971 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1972 'like_count': int,
1973 'playable_in_embed': True,
1974 'live_status': 'not_live',
7666b936 1975 'channel_follower_count': int,
1976 'uploader': 'Vsauce',
1977 'uploader_url': 'https://www.youtube.com/@Vsauce',
1978 'uploader_id': '@Vsauce',
14a14335 1979 'comment_count': int,
8213ce28 1980 'channel_is_verified': True,
12afdc2a
S
1981 },
1982 'params': {
1983 'skip_download': True,
1984 },
1985 'expected_warnings': [
1986 'Skipping DASH manifest',
1987 ],
1988 },
c7121fa7
S
1989 {
1990 # The following content has been identified by the YouTube community
1991 # as inappropriate or offensive to some audiences.
1992 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1993 'info_dict': {
1994 'id': '6SJNVb0GnPI',
1995 'ext': 'mp4',
1996 'title': 'Race Differences in Intelligence',
1997 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1998 'duration': 965,
1999 'upload_date': '20140124',
c7121fa7
S
2000 },
2001 'params': {
2002 'skip_download': True,
2003 },
545cc85d 2004 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 2005 },
022a5d66
S
2006 {
2007 # itag 212
2008 'url': '1t24XAntNCY',
2009 'only_matching': True,
fd5c4aab
S
2010 },
2011 {
2012 # geo restricted to JP
2013 'url': 'sJL6WA-aGkQ',
2014 'only_matching': True,
2015 },
cd5a74a2
S
2016 {
2017 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2018 'only_matching': True,
2019 },
bc2ca1bb 2020 {
2021 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2022 'only_matching': True,
2023 },
2024 {
2025 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2026 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2027 'only_matching': True,
2028 },
825cd268
RA
2029 {
2030 # DRM protected
2031 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2032 'only_matching': True,
4fe54c12
S
2033 },
2034 {
2035 # Video with unsupported adaptive stream type formats
2036 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2037 'info_dict': {
2038 'id': 'Z4Vy8R84T1U',
2039 'ext': 'mp4',
2040 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2041 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2042 'duration': 433,
2043 'upload_date': '20130923',
4fe54c12
S
2044 'formats': 'maxcount:10',
2045 },
2046 'params': {
2047 'skip_download': True,
2048 'youtube_include_dash_manifest': False,
2049 },
5429d6a9 2050 'skip': 'not actual anymore',
5caabd3c 2051 },
2052 {
822b9d9c 2053 # Youtube Music Auto-generated description
7666b936 2054 # TODO: fix metadata extraction
5caabd3c 2055 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2056 'info_dict': {
2057 'id': 'MgNrAu2pzNs',
2058 'ext': 'mp4',
2059 'title': 'Voyeur Girl',
2060 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2061 'upload_date': '20190312',
5caabd3c 2062 'artist': 'Stephen',
2063 'track': 'Voyeur Girl',
2064 'album': 'it\'s too much love to know my dear',
2065 'release_date': '20190313',
2066 'release_year': 2019,
976ae3ea 2067 'alt_title': 'Voyeur Girl',
2068 'view_count': int,
976ae3ea 2069 'playable_in_embed': True,
2070 'like_count': int,
2071 'categories': ['Music'],
2072 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
7666b936 2073 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2074 'uploader': 'Stephen',
976ae3ea 2075 'availability': 'public',
2076 'creator': 'Stephen',
2077 'duration': 169,
2078 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2079 'age_limit': 0,
2080 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2081 'tags': 'count:11',
2082 'live_status': 'not_live',
6c73052c 2083 'channel_follower_count': int
5caabd3c 2084 },
2085 'params': {
2086 'skip_download': True,
2087 },
2088 },
66b48727
RA
2089 {
2090 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2091 'only_matching': True,
2092 },
011e75e6
S
2093 {
2094 # invalid -> valid video id redirection
2095 'url': 'DJztXj2GPfl',
2096 'info_dict': {
2097 'id': 'DJztXj2GPfk',
2098 'ext': 'mp4',
2099 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2100 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2101 'upload_date': '20090125',
011e75e6
S
2102 'artist': 'Panjabi MC',
2103 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2104 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2105 },
2106 'params': {
2107 'skip_download': True,
2108 },
545cc85d 2109 'skip': 'Video unavailable',
ea74e00b
DP
2110 },
2111 {
2112 # empty description results in an empty string
2113 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2114 'info_dict': {
2115 'id': 'x41yOUIvK2k',
2116 'ext': 'mp4',
2117 'title': 'IMG 3456',
2118 'description': '',
2119 'upload_date': '20170613',
976ae3ea 2120 'view_count': int,
2121 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
976ae3ea 2122 'like_count': int,
2123 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2124 'tags': [],
2125 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2126 'availability': 'public',
2127 'age_limit': 0,
2128 'categories': ['Pets & Animals'],
2129 'duration': 7,
2130 'playable_in_embed': True,
2131 'live_status': 'not_live',
7666b936 2132 'channel': 'l\'Or Vert asbl',
2133 'channel_follower_count': int,
2134 'uploader': 'l\'Or Vert asbl',
2135 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2136 'uploader_id': '@ElevageOrVert',
ea74e00b
DP
2137 },
2138 'params': {
2139 'skip_download': True,
2140 },
2141 },
a0566bbf 2142 {
29f7c58a 2143 # with '};' inside yt initial data (see [1])
2144 # see [2] for an example with '};' inside ytInitialPlayerResponse
2145 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2146 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2147 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2148 'info_dict': {
2149 'id': 'CHqg6qOn4no',
2150 'ext': 'mp4',
2151 'title': 'Part 77 Sort a list of simple types in c#',
2152 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2153 'upload_date': '20130831',
976ae3ea 2154 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2155 'like_count': int,
976ae3ea 2156 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2157 'live_status': 'not_live',
2158 'categories': ['Education'],
2159 'availability': 'public',
2160 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2161 'tags': 'count:12',
2162 'playable_in_embed': True,
2163 'age_limit': 0,
2164 'view_count': int,
2165 'duration': 522,
2166 'channel': 'kudvenkat',
12a1b225 2167 'comment_count': int,
d5d1df8a 2168 'channel_follower_count': int,
2169 'chapters': list,
7666b936 2170 'uploader': 'kudvenkat',
2171 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2172 'uploader_id': '@Csharp-video-tutorialsBlogspot',
8213ce28 2173 'channel_is_verified': True,
14a14335 2174 'heatmap': 'count:100',
a0566bbf 2175 },
2176 'params': {
2177 'skip_download': True,
2178 },
2179 },
29f7c58a 2180 {
2181 # another example of '};' in ytInitialData
2182 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2183 'only_matching': True,
2184 },
2185 {
2186 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2187 'only_matching': True,
2188 },
545cc85d 2189 {
cc2db878 2190 # https://github.com/ytdl-org/youtube-dl/pull/28094
2191 'url': 'OtqTfy26tG0',
2192 'info_dict': {
2193 'id': 'OtqTfy26tG0',
2194 'ext': 'mp4',
2195 'title': 'Burn Out',
2196 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2197 'upload_date': '20141120',
cc2db878 2198 'artist': 'The Cinematic Orchestra',
2199 'track': 'Burn Out',
2200 'album': 'Every Day',
976ae3ea 2201 'like_count': int,
2202 'live_status': 'not_live',
2203 'alt_title': 'Burn Out',
2204 'duration': 614,
2205 'age_limit': 0,
2206 'view_count': int,
2207 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2208 'creator': 'The Cinematic Orchestra',
2209 'channel': 'The Cinematic Orchestra',
2210 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2211 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2212 'availability': 'public',
2213 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2214 'categories': ['Music'],
2215 'playable_in_embed': True,
7666b936 2216 'channel_follower_count': int,
2217 'uploader': 'The Cinematic Orchestra',
2218 'comment_count': int,
cc2db878 2219 },
2220 'params': {
2221 'skip_download': True,
2222 },
545cc85d 2223 },
bc2ca1bb 2224 {
2225 # controversial video, only works with bpctr when authenticated with cookies
2226 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2227 'only_matching': True,
2228 },
a1a7907b 2229 {
2230 # controversial video, requires bpctr/contentCheckOk
2231 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2232 'info_dict': {
2233 'id': 'SZJvDhaSDnc',
2234 'ext': 'mp4',
2235 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2236 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
a1a7907b 2237 'upload_date': '20140716',
976ae3ea 2238 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2239 'duration': 170,
2240 'categories': ['News & Politics'],
976ae3ea 2241 'view_count': int,
2242 'channel': 'CBS Mornings',
2243 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2244 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2245 'age_limit': 18,
2246 'availability': 'needs_auth',
2247 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2248 'like_count': int,
2249 'live_status': 'not_live',
2250 'playable_in_embed': True,
7666b936 2251 'channel_follower_count': int,
2252 'uploader': 'CBS Mornings',
2253 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2254 'uploader_id': '@CBSMornings',
14a14335 2255 'comment_count': int,
8213ce28 2256 'channel_is_verified': True,
a1a7907b 2257 }
2258 },
f7ad7160 2259 {
2260 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2261 'url': 'cBvYw8_A0vQ',
2262 'info_dict': {
2263 'id': 'cBvYw8_A0vQ',
2264 'ext': 'mp4',
2265 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2266 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2267 'upload_date': '20201120',
976ae3ea 2268 'duration': 1456,
2269 'categories': ['Travel & Events'],
2270 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2271 'view_count': int,
2272 'channel': 'Walk around Japan',
2273 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2274 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2275 'age_limit': 0,
2276 'availability': 'public',
2277 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2278 'live_status': 'not_live',
2279 'playable_in_embed': True,
7666b936 2280 'channel_follower_count': int,
2281 'uploader': 'Walk around Japan',
2282 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2283 'uploader_id': '@walkaroundjapan7124',
f7ad7160 2284 },
2285 'params': {
2286 'skip_download': True,
2287 },
0fb983f6 2288 }, {
2289 # Has multiple audio streams
2290 'url': 'WaOKSUlf4TM',
2291 'only_matching': True
9297939e 2292 }, {
2293 # Requires Premium: has format 141 when requested using YTM url
2294 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2295 'only_matching': True
2296 }, {
120916da 2297 # multiple subtitles with same lang_code
2298 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2299 'only_matching': True,
109dd3b2 2300 }, {
2301 # Force use android client fallback
2302 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2303 'info_dict': {
2304 'id': 'YOelRv7fMxY',
11f9be09 2305 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2306 'ext': '3gp',
2307 'upload_date': '20210624',
2308 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
109dd3b2 2309 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2310 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2311 'duration': 596,
2312 'categories': ['Entertainment'],
976ae3ea 2313 'view_count': int,
2314 'channel': 'colinfurze',
2315 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2316 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2317 'age_limit': 0,
2318 'availability': 'public',
2319 'like_count': int,
2320 'live_status': 'not_live',
2321 'playable_in_embed': True,
d5d1df8a 2322 'channel_follower_count': int,
2323 'chapters': list,
7666b936 2324 'uploader': 'colinfurze',
2325 'uploader_url': 'https://www.youtube.com/@colinfurze',
2326 'uploader_id': '@colinfurze',
14a14335 2327 'comment_count': int,
8213ce28 2328 'channel_is_verified': True,
14a14335 2329 'heatmap': 'count:100',
109dd3b2 2330 },
2331 'params': {
2332 'format': '17', # 3gp format available on android
2333 'extractor_args': {'youtube': {'player_client': ['android']}},
2334 },
120916da 2335 },
109dd3b2 2336 {
2337 # Skip download of additional client configs (remix client config in this case)
2338 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2339 'only_matching': True,
2340 'params': {
2341 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2342 },
8fc54b12 2343 }, {
2344 # shorts
2345 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2346 'only_matching': True,
9222c381 2347 }, {
2348 'note': 'Storyboards',
2349 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2350 'info_dict': {
2351 'id': '5KLPxDtMqe8',
2352 'ext': 'mhtml',
2353 'format_id': 'sb0',
2354 'title': 'Your Brain is Plastic',
9222c381 2355 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2356 'upload_date': '20140324',
976ae3ea 2357 'like_count': int,
2358 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2359 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2360 'view_count': int,
2361 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2362 'playable_in_embed': True,
2363 'tags': 'count:12',
976ae3ea 2364 'availability': 'public',
2365 'channel': 'SciShow',
2366 'live_status': 'not_live',
2367 'duration': 248,
2368 'categories': ['Education'],
2369 'age_limit': 0,
d5d1df8a 2370 'channel_follower_count': int,
2371 'chapters': list,
7666b936 2372 'uploader': 'SciShow',
2373 'uploader_url': 'https://www.youtube.com/@SciShow',
2374 'uploader_id': '@SciShow',
14a14335 2375 'comment_count': int,
8213ce28 2376 'channel_is_verified': True,
14a14335 2377 'heatmap': 'count:100',
9222c381 2378 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2379 }, {
2380 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2381 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2382 'info_dict': {
2383 'id': '2NUZ8W2llS4',
2384 'ext': 'mp4',
2385 'title': 'The NP that test your phone performance 🙂',
2386 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
992f9a73 2387 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2388 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2389 'duration': 21,
2390 'view_count': int,
2391 'age_limit': 0,
2392 'categories': ['Gaming'],
2393 'tags': 'count:23',
2394 'playable_in_embed': True,
2395 'live_status': 'not_live',
2396 'upload_date': '20220103',
2397 'like_count': int,
2398 'availability': 'public',
2399 'channel': 'Leon Nguyen',
2400 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2401 'comment_count': int,
7666b936 2402 'channel_follower_count': int,
2403 'uploader': 'Leon Nguyen',
2404 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2405 'uploader_id': '@LeonNguyen',
14a14335 2406 'heatmap': 'count:100',
992f9a73 2407 }
1ff88b7a 2408 }, {
2409 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2410 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2411 'info_dict': {
2412 'id': '2NUZ8W2llS4',
2413 'ext': 'mp4',
2414 'title': 'The NP that test your phone performance 🙂',
2415 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
1ff88b7a 2416 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2417 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2418 'duration': 21,
2419 'view_count': int,
2420 'age_limit': 0,
2421 'categories': ['Gaming'],
2422 'tags': 'count:23',
2423 'playable_in_embed': True,
2424 'live_status': 'not_live',
2425 'upload_date': '20220102',
2426 'like_count': int,
2427 'availability': 'public',
2428 'channel': 'Leon Nguyen',
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2430 'comment_count': int,
7666b936 2431 'channel_follower_count': int,
2432 'uploader': 'Leon Nguyen',
2433 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2434 'uploader_id': '@LeonNguyen',
14a14335 2435 'heatmap': 'count:100',
1ff88b7a 2436 },
2437 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2438 }, {
2439 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2440 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2441 'info_dict': {
2442 'id': 'mzZzzBU6lrM',
2443 'ext': 'mp4',
2444 'title': 'I Met GeorgeNotFound In Real Life...',
7666b936 2445 'description': 'md5:978296ec9783a031738b684d4ebf302d',
992f9a73 2446 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2447 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2448 'duration': 955,
2449 'view_count': int,
2450 'age_limit': 0,
2451 'categories': ['Entertainment'],
2452 'tags': 'count:26',
2453 'playable_in_embed': True,
2454 'live_status': 'not_live',
2455 'release_timestamp': 1641172509,
2456 'release_date': '20220103',
2457 'upload_date': '20220103',
2458 'like_count': int,
2459 'availability': 'public',
2460 'channel': 'Quackity',
2461 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
7666b936 2462 'channel_follower_count': int,
2463 'uploader': 'Quackity',
2464 'uploader_id': '@Quackity',
2465 'uploader_url': 'https://www.youtube.com/@Quackity',
14a14335 2466 'comment_count': int,
8213ce28 2467 'channel_is_verified': True,
14a14335 2468 'heatmap': 'count:100',
992f9a73 2469 }
2470 },
2471 { # continuous livestream. Microformat upload date should be preferred.
2472 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2473 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2474 'info_dict': {
2475 'id': 'kgx4WGK0oNU',
2476 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2477 'ext': 'mp4',
2478 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2479 'availability': 'public',
2480 'age_limit': 0,
2481 'release_timestamp': 1637975704,
2482 'upload_date': '20210619',
2483 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2484 'live_status': 'is_live',
2485 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
992f9a73 2486 'channel': 'Abao in Tokyo',
2487 'channel_follower_count': int,
2488 'release_date': '20211127',
2489 'tags': 'count:39',
2490 'categories': ['People & Blogs'],
2491 'like_count': int,
992f9a73 2492 'view_count': int,
2493 'playable_in_embed': True,
2494 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2495 'concurrent_view_count': int,
7666b936 2496 'uploader': 'Abao in Tokyo',
2497 'uploader_url': 'https://www.youtube.com/@abaointokyo',
2498 'uploader_id': '@abaointokyo',
992f9a73 2499 },
2500 'params': {'skip_download': True}
ee27297f 2501 }, {
2502 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2503 'info_dict': {
2504 'id': 'tjjjtzRLHvA',
2505 'ext': 'mp4',
2506 'title': 'ハッシュタグ無し };if window.ytcsi',
2507 'upload_date': '20220323',
2508 'like_count': int,
2509 'availability': 'unlisted',
7666b936 2510 'channel': 'Lesmiscore',
2511 'thumbnail': r're:^https?://.*\.jpg',
ee27297f 2512 'age_limit': 0,
ee27297f 2513 'categories': ['Music'],
6e634cbe 2514 'view_count': int,
2515 'description': '',
ee27297f 2516 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2517 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2518 'live_status': 'not_live',
2519 'playable_in_embed': True,
2520 'channel_follower_count': int,
2521 'duration': 6,
2522 'tags': [],
7666b936 2523 'uploader_id': '@lesmiscore',
2524 'uploader': 'Lesmiscore',
2525 'uploader_url': 'https://www.youtube.com/@lesmiscore',
6e634cbe 2526 }
c26f9b99 2527 }, {
2528 # Prefer primary title+description language metadata by default
2529 # Do not prefer translated description if primary is empty
2530 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2531 'info_dict': {
2532 'id': 'el3E4MbxRqQ',
2533 'ext': 'mp4',
2534 'title': 'dlp test video 2 - primary sv no desc',
2535 'description': '',
2536 'channel': 'cole-dlp-test-acc',
2537 'tags': [],
2538 'view_count': int,
2539 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2540 'like_count': int,
2541 'playable_in_embed': True,
2542 'availability': 'unlisted',
7666b936 2543 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2544 'age_limit': 0,
2545 'duration': 5,
c26f9b99 2546 'live_status': 'not_live',
2547 'upload_date': '20220908',
2548 'categories': ['People & Blogs'],
c26f9b99 2549 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
7666b936 2550 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2551 'uploader_id': '@coletdjnz',
2552 'uploader': 'cole-dlp-test-acc',
c26f9b99 2553 },
2554 'params': {'skip_download': True}
2555 }, {
2556 # Extractor argument: prefer translated title+description
2557 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2558 'info_dict': {
2559 'id': 'gHKT4uU8Zng',
2560 'ext': 'mp4',
2561 'channel': 'cole-dlp-test-acc',
2562 'tags': [],
2563 'duration': 5,
2564 'live_status': 'not_live',
2565 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2566 'upload_date': '20220728',
c26f9b99 2567 'view_count': int,
2568 'categories': ['People & Blogs'],
7666b936 2569 'thumbnail': r're:^https?://.*\.jpg',
c26f9b99 2570 'title': 'dlp test video title translated (fr)',
2571 'availability': 'public',
c26f9b99 2572 'age_limit': 0,
2573 'description': 'dlp test video description translated (fr)',
2574 'playable_in_embed': True,
2575 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
7666b936 2576 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2577 'uploader_id': '@coletdjnz',
2578 'uploader': 'cole-dlp-test-acc',
c26f9b99 2579 },
2580 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2581 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2582 }, {
2583 'note': '6 channel audio',
2584 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2585 'only_matching': True,
a4894d3e 2586 }, {
2587 'note': 'Multiple HLS formats with same itag',
2588 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2589 'info_dict': {
2590 'id': 'kX3nB4PpJko',
2591 'ext': 'mp4',
2592 'categories': ['Entertainment'],
2593 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
a4894d3e 2594 'live_status': 'not_live',
2595 'duration': 937,
2596 'channel_follower_count': int,
2597 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2598 'title': 'Last To Take Hand Off Jet, Keeps It!',
2599 'channel': 'MrBeast',
2600 'playable_in_embed': True,
2601 'view_count': int,
2602 'upload_date': '20221112',
a4894d3e 2603 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2604 'age_limit': 0,
2605 'availability': 'public',
2606 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2607 'like_count': int,
2608 'tags': [],
7666b936 2609 'uploader': 'MrBeast',
2610 'uploader_url': 'https://www.youtube.com/@MrBeast',
2611 'uploader_id': '@MrBeast',
14a14335 2612 'comment_count': int,
8213ce28 2613 'channel_is_verified': True,
14a14335 2614 'heatmap': 'count:100',
a4894d3e 2615 },
2616 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2617 }, {
2618 'note': 'Audio formats with Dynamic Range Compression',
2619 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2620 'info_dict': {
2621 'id': 'Tq92D6wQ1mg',
7666b936 2622 'ext': 'webm',
9bb85699 2623 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2624 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2625 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2626 'channel_follower_count': int,
2627 'description': 'md5:17eccca93a786d51bc67646756894066',
2628 'upload_date': '20191228',
9bb85699 2629 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2630 'playable_in_embed': True,
2631 'like_count': int,
2632 'categories': ['Entertainment'],
2633 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2634 'age_limit': 18,
2635 'channel': 'Projekt Melody',
9bb85699 2636 'view_count': int,
2637 'availability': 'needs_auth',
2638 'comment_count': int,
2639 'live_status': 'not_live',
9bb85699 2640 'duration': 106,
7666b936 2641 'uploader': 'Projekt Melody',
2642 'uploader_id': '@ProjektMelody',
2643 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
9bb85699 2644 },
2645 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2646 },
2647 {
2648 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2649 'info_dict': {
2650 'id': 'qVv6vCqciTM',
2651 'ext': 'mp4',
2652 'age_limit': 0,
dad2210c 2653 'comment_count': int,
2654 'chapters': 'count:13',
2655 'upload_date': '20221223',
2656 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2657 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
dad2210c 2658 'like_count': int,
2659 'release_date': '20221223',
2660 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2661 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2662 'view_count': int,
2663 'playable_in_embed': True,
2664 'duration': 4438,
2665 'availability': 'public',
2666 'channel_follower_count': int,
2667 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2668 'categories': ['Entertainment'],
2669 'live_status': 'was_live',
2670 'release_timestamp': 1671793345,
2671 'channel': 'さなちゃんねる',
2672 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2673 'uploader': 'さなちゃんねる',
7666b936 2674 'uploader_url': 'https://www.youtube.com/@sana_natori',
2675 'uploader_id': '@sana_natori',
8213ce28 2676 'channel_is_verified': True,
14a14335 2677 'heatmap': 'count:100',
7666b936 2678 },
2679 },
2680 {
2681 # Fallbacks when webpage and web client is unavailable
2682 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2683 'info_dict': {
2684 'id': 'wSSmNUl9Snw',
2685 'ext': 'mp4',
2686 # 'categories': ['Science & Technology'],
2687 'view_count': int,
2688 'chapters': 'count:2',
2689 'channel': 'Scott Manley',
2690 'like_count': int,
2691 'age_limit': 0,
2692 # 'availability': 'public',
2693 'channel_follower_count': int,
2694 'live_status': 'not_live',
2695 'upload_date': '20170831',
2696 'duration': 682,
2697 'tags': 'count:8',
2698 'uploader_url': 'https://www.youtube.com/@scottmanley',
2699 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2700 'uploader': 'Scott Manley',
2701 'uploader_id': '@scottmanley',
2702 'title': 'The Computer Hack That Saved Apollo 14',
2703 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2704 'thumbnail': r're:^https?://.*\.webp',
2705 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2706 'playable_in_embed': True,
14a14335 2707 'comment_count': int,
8213ce28 2708 'channel_is_verified': True,
14a14335 2709 'heatmap': 'count:100',
7666b936 2710 },
2711 'params': {
2712 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
dad2210c 2713 },
2714 },
2eb88d95
PH
2715 ]
2716
f2e8dbcc 2717 _WEBPAGE_TESTS = [
2718 # YouTube <object> embed
2719 {
2720 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2721 'md5': '873c81d308b979f0e23ee7e620b312a3',
2722 'info_dict': {
2723 'id': 'msN87y-iEx0',
2724 'ext': 'mp4',
2725 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2726 'upload_date': '20080526',
2727 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
f2e8dbcc 2728 'age_limit': 0,
2729 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2730 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2731 'playable_in_embed': True,
2732 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2733 'like_count': int,
2734 'comment_count': int,
2735 'channel': 'Christopher Sykes',
2736 'live_status': 'not_live',
2737 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2738 'availability': 'public',
2739 'duration': 195,
2740 'view_count': int,
2741 'categories': ['Science & Technology'],
2742 'channel_follower_count': int,
7666b936 2743 'uploader': 'Christopher Sykes',
2744 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2745 'uploader_id': '@ChristopherSykesDocumentaries',
14a14335 2746 'heatmap': 'count:100',
f2e8dbcc 2747 },
2748 'params': {
2749 'skip_download': True,
2750 }
2751 },
2752 ]
2753
201c1459 2754 @classmethod
2755 def suitable(cls, url):
4dfbf869 2756 from ..utils import parse_qs
2757
201c1459 2758 qs = parse_qs(url)
2759 if qs.get('list', [None])[0]:
2760 return False
86e5f3ed 2761 return super().suitable(url)
201c1459 2762
e0df6211 2763 def __init__(self, *args, **kwargs):
86e5f3ed 2764 super().__init__(*args, **kwargs)
545cc85d 2765 self._code_cache = {}
83799698 2766 self._player_cache = {}
e0df6211 2767
4d37720a 2768 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2769 lock = threading.Lock()
185bf310 2770 start_time = time.time()
adbc4ec4
THD
2771 formats = [f for f in formats if f.get('is_from_start')]
2772
185bf310 2773 def refetch_manifest(format_id, delay):
2774 nonlocal formats, start_time, is_live
2775 if time.time() <= start_time + delay:
adbc4ec4
THD
2776 return
2777
2778 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2779 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2780 microformats = traverse_obj(
2781 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2782 expected_type=dict)
4d37720a
L
2783 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2784 is_live = live_status == 'is_live'
185bf310 2785 start_time = time.time()
adbc4ec4 2786
185bf310 2787 def mpd_feed(format_id, delay):
adbc4ec4
THD
2788 """
2789 @returns (manifest_url, manifest_stream_number, is_live) or None
2790 """
253ac4ba 2791 for retry in self.RetryManager(fatal=False):
2792 with lock:
2793 refetch_manifest(format_id, delay)
2794
2795 f = next((f for f in formats if f['format_id'] == format_id), None)
2796 if not f:
2797 if not is_live:
2798 retry.error = f'{video_id}: Video is no longer live'
2799 else:
2800 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2801 continue
2802 return f['manifest_url'], f['manifest_stream_number'], is_live
2803 return None
adbc4ec4
THD
2804
2805 for f in formats:
4d37720a
L
2806 f['is_live'] = is_live
2807 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2808 live_start_time, mpd_feed, not is_live and f.copy())
2809 if is_live:
2810 f['fragments'] = gen
2811 f['protocol'] = 'http_dash_segments_generator'
2812 else:
2813 f['fragments'] = LazyList(gen({}))
2814 del f['is_from_start']
adbc4ec4 2815
4d37720a 2816 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2817 FETCH_SPAN, MAX_DURATION = 5, 432000
2818
2819 mpd_url, stream_number, is_live = None, None, True
2820
2821 begin_index = 0
2822 download_start_time = ctx.get('start') or time.time()
2823
2824 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2825 if lack_early_segments:
2826 self.report_warning(bug_reports_message(
2827 'Starting download from the last 120 hours of the live stream since '
2828 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2829 lack_early_segments = True
2830
2831 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2832 fragments, fragment_base_url = None, None
2833
a539f065 2834 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2835 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2836 # Obtain from MPD's maximum seq value
2837 old_mpd_url = mpd_url
185bf310 2838 last_error = ctx.pop('last_error', None)
3d2623a8 2839 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
185bf310 2840 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2841 or (mpd_url, stream_number, False))
2842 if not refresh_sequence:
2843 if expire_fast and not is_live:
2844 return False, last_seq
2845 elif old_mpd_url == mpd_url:
2846 return True, last_seq
4d37720a
L
2847 if manifestless_orig_fmt:
2848 fmt_info = manifestless_orig_fmt
2849 else:
2850 try:
2851 fmts, _ = self._extract_mpd_formats_and_subtitles(
2852 mpd_url, None, note=False, errnote=False, fatal=False)
2853 except ExtractorError:
2854 fmts = None
2855 if not fmts:
2856 no_fragment_score += 2
2857 return False, last_seq
2858 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2859 fragments = fmt_info['fragments']
2860 fragment_base_url = fmt_info['fragment_base_url']
2861 assert fragment_base_url
2862
2863 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2864 return True, _last_seq
2865
4d37720a 2866 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2867 while is_live:
2868 fetch_time = time.time()
2869 if no_fragment_score > 30:
2870 return
2871 if last_segment_url:
2872 # Obtain from "X-Head-Seqnum" header value from each segment
2873 try:
2874 urlh = self._request_webpage(
2875 last_segment_url, None, note=False, errnote=False, fatal=False)
2876 except ExtractorError:
2877 urlh = None
2878 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2879 if last_seq is None:
a539f065 2880 no_fragment_score += 2
adbc4ec4
THD
2881 last_segment_url = None
2882 continue
2883 else:
a539f065
LNO
2884 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2885 no_fragment_score += 2
185bf310 2886 if not should_continue:
adbc4ec4
THD
2887 continue
2888
2889 if known_idx > last_seq:
2890 last_segment_url = None
2891 continue
2892
2893 last_seq += 1
2894
2895 if begin_index < 0 and known_idx < 0:
2896 # skip from the start when it's negative value
2897 known_idx = last_seq + begin_index
2898 if lack_early_segments:
2899 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2900 try:
2901 for idx in range(known_idx, last_seq):
2902 # do not update sequence here or you'll get skipped some part of it
a539f065 2903 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2904 if not should_continue:
adbc4ec4
THD
2905 known_idx = idx - 1
2906 raise ExtractorError('breaking out of outer loop')
2907 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2908 yield {
2909 'url': last_segment_url,
36195c44 2910 'fragment_count': last_seq,
adbc4ec4
THD
2911 }
2912 if known_idx == last_seq:
2913 no_fragment_score += 5
2914 else:
2915 no_fragment_score = 0
2916 known_idx = last_seq
2917 except ExtractorError:
2918 continue
2919
4d37720a
L
2920 if manifestless_orig_fmt:
2921 # Stop at the first iteration if running for post-live manifestless;
2922 # fragment count no longer increase since it starts
2923 break
2924
adbc4ec4
THD
2925 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2926
b6de707d 2927 def _extract_player_url(self, *ytcfgs, webpage=None):
2928 player_url = traverse_obj(
2929 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2930 get_all=False, expected_type=str)
11f9be09 2931 if not player_url:
b6de707d 2932 return
60f393e4 2933 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2934
b6de707d 2935 def _download_player_url(self, video_id, fatal=False):
2936 res = self._download_webpage(
2937 'https://www.youtube.com/iframe_api',
2938 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2939 if res:
2940 player_version = self._search_regex(
2941 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2942 if player_version:
2943 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2944
60064c53
PH
2945 def _signature_cache_id(self, example_sig):
2946 """ Return a string representation of a signature """
14f25df2 2947 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2948
e40c758c
S
2949 @classmethod
2950 def _extract_player_info(cls, player_url):
2951 for player_re in cls._PLAYER_INFO_RE:
2952 id_m = re.search(player_re, player_url)
2953 if id_m:
2954 break
2955 else:
c081b35c 2956 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2957 return id_m.group('id')
e40c758c 2958
404f611f 2959 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2960 player_id = self._extract_player_info(player_url)
2961 if player_id not in self._code_cache:
1276a43a 2962 code = self._download_webpage(
109dd3b2 2963 player_url, video_id, fatal=fatal,
2964 note='Downloading player ' + player_id,
2965 errnote='Download of %s failed' % player_url)
1276a43a 2966 if code:
2967 self._code_cache[player_id] = code
404f611f 2968 return self._code_cache.get(player_id)
109dd3b2 2969
e40c758c 2970 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2971 player_id = self._extract_player_info(player_url)
e0df6211 2972
c4417ddb 2973 # Read from filesystem cache
86e5f3ed 2974 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2975 assert os.path.basename(func_id) == func_id
a0e07d31 2976
ae61d108 2977 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2978 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2979
580ce007 2980 if not cache_spec:
2981 code = self._load_player(video_id, player_url)
404f611f 2982 if code:
109dd3b2 2983 res = self._parse_sig_js(code)
ac668111 2984 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2985 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2986 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2987
2988 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2989
60064c53 2990 def _print_sig_code(self, func, example_sig):
404f611f 2991 if not self.get_param('youtube_print_sig_code'):
2992 return
2993
edf3e38e
PH
2994 def gen_sig_code(idxs):
2995 def _genslice(start, end, step):
78caa52a 2996 starts = '' if start == 0 else str(start)
8bcc8756 2997 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2998 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2999 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
3000
3001 step = None
7af808a5
PH
3002 # Quelch pyflakes warnings - start will be set when step is set
3003 start = '(Never used)'
edf3e38e
PH
3004 for i, prev in zip(idxs[1:], idxs[:-1]):
3005 if step is not None:
3006 if i - prev == step:
3007 continue
3008 yield _genslice(start, prev, step)
3009 step = None
3010 continue
3011 if i - prev in [-1, 1]:
3012 step = i - prev
3013 start = prev
3014 continue
3015 else:
78caa52a 3016 yield 's[%d]' % prev
edf3e38e 3017 if step is None:
78caa52a 3018 yield 's[%d]' % i
edf3e38e
PH
3019 else:
3020 yield _genslice(start, i, step)
3021
ac668111 3022 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 3023 cache_res = func(test_string)
edf3e38e 3024 cache_spec = [ord(c) for c in cache_res]
78caa52a 3025 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 3026 signature_id_tuple = '(%s)' % (
14f25df2 3027 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 3028 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 3029 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 3030 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 3031
e0df6211
PH
3032 def _parse_sig_js(self, jscode):
3033 funcname = self._search_regex(
abefc03f
S
3034 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3035 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
3036 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3037 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
4823ec9f 3038 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
31ce6e99 3039 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f 3040 # Obsolete patterns
4823ec9f 3041 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 3042 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
3043 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3044 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3045 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f 3046 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 3047 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
3048
3049 jsi = JSInterpreter(jscode)
3050 initial_function = jsi.extract_function(funcname)
e0df6211
PH
3051 return lambda s: initial_function([s])
3052
580ce007 3053 def _cached(self, func, *cache_id):
3054 def inner(*args, **kwargs):
3055 if cache_id not in self._player_cache:
3056 try:
3057 self._player_cache[cache_id] = func(*args, **kwargs)
3058 except ExtractorError as e:
3059 self._player_cache[cache_id] = e
3060 except Exception as e:
3061 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3062
3063 ret = self._player_cache[cache_id]
3064 if isinstance(ret, Exception):
3065 raise ret
3066 return ret
3067 return inner
3068
545cc85d 3069 def _decrypt_signature(self, s, video_id, player_url):
257a2501 3070 """Turn the encrypted s field into a working signature"""
580ce007 3071 extract_sig = self._cached(
3072 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3073 func = extract_sig(video_id, player_url, s)
3074 self._print_sig_code(func, s)
3075 return func(s)
404f611f 3076
3077 def _decrypt_nsig(self, s, video_id, player_url):
3078 """Turn the encrypted n field into a working signature"""
3079 if player_url is None:
3080 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 3081 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 3082
b505e851 3083 try:
3084 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3085 except ExtractorError as e:
3086 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 3087 if self.get_param('youtube_print_sig_code'):
3088 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 3089
25836db6 3090 try:
3091 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3092 ret = extract_nsig(jsi, func_code)(s)
3093 except JSInterpreter.Exception as e:
3094 try:
992dc6b4 3095 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 3096 except ExtractorError:
3097 raise e
3098 self.report_warning(
3099 f'Native nsig extraction failed: Trying with PhantomJS\n'
3100 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 3101 self.write_debug(e, only_once=True)
25836db6 3102
3103 args, func_body = func_code
3104 ret = jsi.execute(
3105 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3106 video_id=video_id, note='Executing signature code').strip()
580ce007 3107
3108 self.write_debug(f'Decrypted nsig {s} => {ret}')
3109 return ret
3110
90a1df30 3111 def _extract_n_function_name(self, jscode):
3112 funcname, idx = self._search_regex(
3113 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3114 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3115 if not idx:
3116 return funcname
3117
3118 return json.loads(js_to_json(self._search_regex(
337734d4 3119 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
90a1df30 3120 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3121
580ce007 3122 def _extract_n_function_code(self, video_id, player_url):
404f611f 3123 player_id = self._extract_player_info(player_url)
05deb747 3124 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3125 jscode = func_code or self._load_player(video_id, player_url)
3126 jsi = JSInterpreter(jscode)
404f611f 3127
3128 if func_code:
580ce007 3129 return jsi, player_id, func_code
404f611f 3130
b505e851 3131 func_name = self._extract_n_function_name(jscode)
3132
3133 # For redundancy
3134 func_code = self._search_regex(
3135 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3136 # NB: The end of the regex is intentionally kept strict
3137 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3138 jscode, 'nsig function', group=('var', 'code'), default=None)
3139 if func_code:
3140 func_code = ([func_code[0]], func_code[1])
3141 else:
3142 self.write_debug('Extracting nsig function with jsinterp')
3143 func_code = jsi.extract_function_code(func_name)
3144
580ce007 3145 self.cache.store('youtube-nsig', player_id, func_code)
3146 return jsi, player_id, func_code
3147
3148 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3149 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3150
580ce007 3151 def extract_nsig(s):
25836db6 3152 try:
3153 ret = func([s])
3154 except JSInterpreter.Exception:
3155 raise
3156 except Exception as e:
3157 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3158
f6ca640b 3159 if ret.startswith('enhanced_except_'):
25836db6 3160 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3161 return ret
580ce007 3162
3163 return extract_nsig
e0df6211 3164
109dd3b2 3165 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3166 """
3167 Extract signatureTimestamp (sts)
3168 Required to tell API what sig/player version is in use.
3169 """
3170 sts = None
3171 if isinstance(ytcfg, dict):
3172 sts = int_or_none(ytcfg.get('STS'))
3173
3174 if not sts:
3175 # Attempt to extract from player
3176 if player_url is None:
3177 error_msg = 'Cannot extract signature timestamp without player_url.'
3178 if fatal:
3179 raise ExtractorError(error_msg)
3180 self.report_warning(error_msg)
3181 return
404f611f 3182 code = self._load_player(video_id, player_url, fatal=fatal)
3183 if code:
109dd3b2 3184 sts = int_or_none(self._search_regex(
3185 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3186 'JS player signature timestamp', group='sts', fatal=fatal))
3187 return sts
3188
11f9be09 3189 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3190 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3191 label = 'fully ' if is_full else ''
3192 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3193 expected_type=url_or_none)
3194 if not url:
3195 self.report_warning(f'Unable to mark {label}watched')
3196 return
14f25df2 3197 parsed_url = urllib.parse.urlparse(url)
3198 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3199
3200 # cpn generation algorithm is reverse engineered from base.js.
3201 # In fact it works even with dummy cpn.
3202 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3203 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3204
3205 # # more consistent results setting it to right before the end
3206 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3207
3208 qs.update({
3209 'ver': ['2'],
3210 'cpn': [cpn],
3211 'cmt': video_length,
3212 'el': 'detailpage', # otherwise defaults to "shorts"
3213 })
3214
3215 if is_full:
3216 # these seem to mark watchtime "history" in the real world
3217 # they're required, so send in a single value
3218 qs.update({
5318156f 3219 'st': 0,
06cc8f10
B
3220 'et': video_length,
3221 })
3222
14f25df2 3223 url = urllib.parse.urlunparse(
3224 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3225
3226 self._download_webpage(
3227 url, video_id, f'Marking {label}watched',
3228 'Unable to mark watched', fatal=False)
d77ab8e2 3229
bfd973ec 3230 @classmethod
3231 def _extract_from_webpage(cls, url, webpage):
3232 # Invidious Instances
3233 # https://github.com/yt-dlp/yt-dlp/issues/195
3234 # https://github.com/iv-org/invidious/pull/1730
3235 mobj = re.search(
3236 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3237 webpage)
3238 if mobj:
3239 yield cls.url_result(mobj.group('url'), cls)
3240 raise cls.StopExtraction()
3241
3242 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3243
3244 # lazyYT YouTube embed
bfd973ec 3245 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3246 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3247
3248 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3249 for m in re.findall(r'''(?x)<div[^>]+
3250 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3251 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3252 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3253
97665381
PH
3254 @classmethod
3255 def extract_id(cls, url):
ae61d108 3256 video_id = cls.get_temp_id(url)
3257 if not video_id:
3258 raise ExtractorError(f'Invalid URL: {url}')
3259 return video_id
c5e8d7af 3260
7c365c21 3261 def _extract_chapters_from_json(self, data, duration):
3262 chapter_list = traverse_obj(
3263 data, (
3264 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3265 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3266 ), expected_type=list)
3267
22ccd542 3268 return self._extract_chapters_helper(
7c365c21 3269 chapter_list,
22ccd542 3270 start_function=lambda chapter: float_or_none(
7c365c21 3271 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
22ccd542 3272 title_function=lambda chapter: traverse_obj(
7c365c21 3273 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3274 duration=duration)
3275
3276 def _extract_chapters_from_engagement_panel(self, data, duration):
3277 content_list = traverse_obj(
8bdd16b4 3278 data,
7c365c21 3279 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3280 expected_type=list)
052e1350 3281 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3282 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3283
1890fc63 3284 return next(filter(None, (
22ccd542 3285 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3286 chapter_time, chapter_title, duration)
1890fc63 3287 for contents in content_list)), [])
7c365c21 3288
5caf30db
A
3289 def _extract_heatmap_from_player_overlay(self, data):
3290 content_list = traverse_obj(data, (
3291 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
3292 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
3293 return next(filter(None, (
3294 traverse_obj(contents, (..., 'heatMarkerRenderer', {
3295 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
3296 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
3297 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
3298 })) for contents in content_list)), None)
3299
a1c5d2ca
M
3300 def _extract_comment(self, comment_renderer, parent=None):
3301 comment_id = comment_renderer.get('commentId')
3302 if not comment_id:
3303 return
fe93e2c4 3304
c35448b7 3305 info = {
3306 'id': comment_id,
3307 'text': self._get_text(comment_renderer, 'contentText'),
3308 'like_count': self._get_count(comment_renderer, 'voteCount'),
3309 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3310 'author': self._get_text(comment_renderer, 'authorText'),
3311 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3312 'parent': parent or 'root',
3313 }
fe93e2c4 3314
c26f9b99 3315 # Timestamp is an estimate calculated from the current time and time_text
3316 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3317 timestamp = self._parse_time_text(time_text)
3318
c35448b7 3319 info.update({
3320 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3321 '_time_text': time_text,
3322 'timestamp': timestamp,
3323 })
fe93e2c4 3324
c35448b7 3325 info['author_url'] = urljoin(
3326 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3327 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3328 expected_type=str, get_all=False))
a1c5d2ca 3329
c35448b7 3330 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3331 if author_is_uploader is not None:
3332 info['author_is_uploader'] = author_is_uploader
3333
3334 comment_abr = traverse_obj(
89bed013 3335 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
c35448b7 3336 if comment_abr is not None:
3337 info['is_favorited'] = 'creatorHeart' in comment_abr
3338
14a14335 3339 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3340 if self._has_badge(badges, BadgeType.VERIFIED):
3341 info['author_is_verified'] = True
c35448b7 3342
3343 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3344 if is_pinned:
3345 info['is_pinned'] = True
3346
3347 return info
a1c5d2ca 3348
46383212 3349 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3350
3351 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3352
3353 def extract_header(contents):
2d6659b9 3354 _continuation = None
3355 for content in contents:
46383212 3356 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3357 expected_comment_count = self._get_count(
3358 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3359
18f8fba7 3360 if expected_comment_count is not None:
46383212 3361 tracker['est_total'] = expected_comment_count
3362 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3363 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3364
3365 sort_menu_item = try_get(
3366 comments_header_renderer,
3367 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3368 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3369
3370 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3371 if not _continuation:
3372 continue
3373
46383212 3374 sort_text = str_or_none(sort_menu_item.get('title'))
3375 if not sort_text:
2d6659b9 3376 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3377 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3378 break
a2160aa4 3379 return _continuation
a1c5d2ca 3380
2d6659b9 3381 def extract_thread(contents):
a1c5d2ca 3382 if not parent:
46383212 3383 tracker['current_page_thread'] = 0
a1c5d2ca 3384 for content in contents:
46383212 3385 if not parent and tracker['total_parent_comments'] >= max_parents:
3386 yield
a1c5d2ca 3387 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3388 comment_renderer = get_first(
3389 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3390 expected_type=dict, default={})
a1c5d2ca 3391
a1c5d2ca
M
3392 comment = self._extract_comment(comment_renderer, parent)
3393 if not comment:
3394 continue
141a8dff 3395 comment_id = comment['id']
c35448b7 3396 if comment.get('is_pinned'):
141a8dff 3397 tracker['pinned_comment_ids'].add(comment_id)
7f51861b 3398 # Sometimes YouTube may break and give us infinite looping comments.
3399 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
141a8dff 3400 if comment_id in tracker['seen_comment_ids']:
c35448b7 3401 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
141a8dff 3402 # Pinned comments may appear a second time in newest first sort
3403 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3404 continue
4dc4d847 3405 self.report_warning(
3406 'Detected YouTube comments looping. Stopping comment extraction '
3407 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
7f51861b 3408 yield
3409 else:
3410 tracker['seen_comment_ids'].add(comment['id'])
46383212 3411
3412 tracker['running_total'] += 1
3413 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3414 yield comment
46383212 3415
a1c5d2ca
M
3416 # Attempt to get the replies
3417 comment_replies_renderer = try_get(
3418 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3419
3420 if comment_replies_renderer:
46383212 3421 tracker['current_page_thread'] += 1
a1c5d2ca 3422 comment_entries_iter = self._comment_entries(
99e9e001 3423 comment_replies_renderer, ytcfg, video_id,
46383212 3424 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3425 yield from itertools.islice(comment_entries_iter, min(
3426 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3427
46383212 3428 # Keeps track of counts across recursive calls
3429 if not tracker:
3430 tracker = dict(
3431 running_total=0,
18f8fba7 3432 est_total=None,
46383212 3433 current_page_thread=0,
3434 total_parent_comments=0,
7f51861b 3435 total_reply_comments=0,
141a8dff 3436 seen_comment_ids=set(),
3437 pinned_comment_ids=set()
3438 )
46383212 3439
3440 # TODO: Deprecated
2d6659b9 3441 # YouTube comments have a max depth of 2
46383212 3442 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3443 if max_depth:
da4db748 3444 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3445 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3446 if max_depth == 1 and parent:
3447 return
a1c5d2ca 3448
46383212 3449 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3450 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3451
46383212 3452 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3453
46383212 3454 response = None
6e634cbe 3455 is_forced_continuation = False
2d6659b9 3456 is_first_continuation = parent is None
6e634cbe 3457 if is_first_continuation and not continuation:
3458 # Sometimes you can get comments by generating the continuation yourself,
3459 # even if YouTube initially reports them being disabled - e.g. stories comments.
3460 # Note: if the comment section is actually disabled, YouTube may return a response with
3461 # required check_get_keys missing. So we will disable that check initially in this case.
3462 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3463 is_forced_continuation = True
a1c5d2ca 3464
18f8fba7 3465 continuation_items_path = (
3466 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
a1c5d2ca
M
3467 for page_num in itertools.count(0):
3468 if not continuation:
3469 break
46383212 3470 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
18f8fba7 3471 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
2d6659b9 3472 if page_num == 0:
3473 if is_first_continuation:
3474 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3475 else:
2d6659b9 3476 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3477 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3478 else:
3479 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3480 ' ' if parent else '', ' replies' if parent else '',
3481 page_num, comment_prog_str)
18f8fba7 3482
3483 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3484 # Ignore check if YouTube says the comment count is 0.
3485 check_get_keys = None
3486 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3487 check_get_keys = [[*continuation_items_path, ..., (
3488 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
e72e48c5
M
3489 try:
3490 response = self._extract_response(
3491 item_id=None, query=continuation,
3492 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
18f8fba7 3493 check_get_keys=check_get_keys)
e72e48c5
M
3494 except ExtractorError as e:
3495 # Ignore incomplete data error for replies if retries didn't work.
3496 # This is to allow any other parent comments and comment threads to be downloaded.
3497 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
4dc4d847 3498 if 'incomplete data' in str(e).lower() and parent:
3499 if self.get_param('ignoreerrors') in (True, 'only_download'):
3500 self.report_warning(
3501 'Received incomplete data for a comment reply thread and retrying did not help. '
3502 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3503 return
3504 else:
3505 raise ExtractorError(
3506 'Incomplete data received for comment reply thread. '
3507 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3508 expected=True)
3509 raise
6e634cbe 3510 is_forced_continuation = False
2d6659b9 3511 continuation = None
18f8fba7 3512 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
46383212 3513 if is_first_continuation:
3514 continuation = extract_header(continuation_items)
3515 is_first_continuation = False
2d6659b9 3516 if continuation:
a1c5d2ca 3517 break
46383212 3518 continue
a1c5d2ca 3519
46383212 3520 for entry in extract_thread(continuation_items):
3521 if not entry:
3522 return
3523 yield entry
3524 continuation = self._extract_continuation({'contents': continuation_items})
3525 if continuation:
2d6659b9 3526 break
a1c5d2ca 3527
6e634cbe 3528 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3529 if message and not parent and tracker['running_total'] == 0:
3530 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3531 raise self.CommentsDisabled
6e634cbe 3532
3533 @staticmethod
3534 def _generate_comment_continuation(video_id):
3535 """
3536 Generates initial comment section continuation token from given video id
3537 """
3538 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3539 return base64.b64encode(token.encode()).decode()
3540
a2160aa4 3541 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3542 """Entry for comment extraction"""
2d6659b9 3543 def _real_comment_extract(contents):
aae16f6e 3544 renderer = next((
3545 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3546 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3547 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3548
a2160aa4 3549 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3550 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3551
109dd3b2 3552 @staticmethod
99e9e001 3553 def _get_checkok_params():
3554 return {'contentCheckOk': True, 'racyCheckOk': True}
3555
3556 @classmethod
3557 def _generate_player_context(cls, sts=None):
109dd3b2 3558 context = {
3559 'html5Preference': 'HTML5_PREF_WANTS',
3560 }
3561 if sts is not None:
3562 context['signatureTimestamp'] = sts
3563 return {
3564 'playbackContext': {
3565 'contentPlaybackContext': context
a1a7907b 3566 },
99e9e001 3567 **cls._get_checkok_params()
109dd3b2 3568 }
3569
e7e94f2a
D
3570 @staticmethod
3571 def _is_agegated(player_response):
3572 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3573 return True
e7e94f2a 3574
6839ae1f 3575 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3576 AGE_GATE_REASONS = (
3577 'confirm your age', 'age-restricted', 'inappropriate', # reason
3578 'age_verification_required', 'age_check_required', # status
3579 )
3580 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3581
3582 @staticmethod
3583 def _is_unplayable(player_response):
3584 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3585
50ac0e54 3586 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3587
11f9be09 3588 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3589 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3590 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3591 headers = self.generate_api_headers(
99e9e001 3592 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3593
6e634cbe 3594 yt_query = {
3595 'videoId': video_id,
6e634cbe 3596 }
90db9a3c 3597 if _split_innertube_client(client)[0] == 'android':
ba06d77a 3598 yt_query['params'] = 'CgIQBg=='
3599
3600 pp_arg = self._configuration_arg('player_params', [None])[0]
3601 if pp_arg:
3602 yt_query['params'] = pp_arg
50ac0e54 3603
11f9be09 3604 yt_query.update(self._generate_player_context(sts))
3605 return self._extract_response(
3606 item_id=video_id, ep='player', query=yt_query,
379e44ed 3607 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3608 default_client=client,
11f9be09 3609 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3610 ) or None
3611
11f9be09 3612 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3613 requested_clients = []
1e75d97d 3614 default = ['ios', 'android', 'web']
000c15a4 3615 allowed_clients = sorted(
86e5f3ed 3616 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3617 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3618 for client in self._configuration_arg('player_client'):
3619 if client in allowed_clients:
3620 requested_clients.append(client)
d0d012d4 3621 elif client == 'default':
3622 requested_clients.extend(default)
b4c055ba 3623 elif client == 'all':
3624 requested_clients.extend(allowed_clients)
3625 else:
3626 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3627 if not requested_clients:
d0d012d4 3628 requested_clients = default
cf7e015f 3629
11f9be09 3630 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3631 requested_clients.extend(
e7e94f2a 3632 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3633
11f9be09 3634 return orderedSet(requested_clients)
cf7e015f 3635
50ac0e54 3636 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3637 initial_pr = None
3638 if webpage:
b7c47b74 3639 initial_pr = self._search_json(
3640 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3641
ae729626 3642 all_clients = set(clients)
c0bc527b 3643 clients = clients[::-1]
b6de707d 3644 prs = []
e7e94f2a 3645
ae729626 3646 def append_client(*client_names):
e7870111 3647 """ Append the first client name that exists but not already used """
ae729626 3648 for client_name in client_names:
e7870111
D
3649 actual_client = _split_innertube_client(client_name)[0]
3650 if actual_client in INNERTUBE_CLIENTS:
3651 if actual_client not in all_clients:
ae729626 3652 clients.append(client_name)
e7870111
D
3653 all_clients.add(actual_client)
3654 return
e7e94f2a 3655
379e44ed 3656 # Android player_response does not have microFormats which are needed for
3657 # extraction of some data. So we return the initial_pr with formats
3658 # stripped out even if not requested by the user
3659 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3660 if initial_pr:
3661 pr = dict(initial_pr)
3662 pr['streamingData'] = None
b6de707d 3663 prs.append(pr)
379e44ed 3664
3665 last_error = None
b6de707d 3666 tried_iframe_fallback = False
3667 player_url = None
c0bc527b 3668 while clients:
e7870111 3669 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3670 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3671 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3672 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3673
b6de707d 3674 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3675 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3676 if 'js' in self._configuration_arg('player_skip'):
3677 require_js_player = False
3678 player_url = None
3679
3680 if not player_url and not tried_iframe_fallback and require_js_player:
3681 player_url = self._download_player_url(video_id)
3682 tried_iframe_fallback = True
3683
379e44ed 3684 try:
3685 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3686 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3687 except ExtractorError as e:
3688 if last_error:
3689 self.report_warning(last_error)
3690 last_error = e
3691 continue
3692
11f9be09 3693 if pr:
a3e96421 3694 # YouTube may return a different video player response than expected.
3695 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3696 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3697 if pr_video_id and pr_video_id != video_id:
3698 self.report_warning(
c7dcf0b3 3699 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3700 else:
c795c39f
L
3701 # Save client name for introspection later
3702 name = short_client_name(client)
3703 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3704 sd[STREAMING_DATA_CLIENT_NAME] = name
3705 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3706 f[STREAMING_DATA_CLIENT_NAME] = name
a3e96421 3707 prs.append(pr)
c0bc527b 3708
e7e94f2a 3709 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3710 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3711 append_client(f'{base_client}_creator')
e7e94f2a 3712 elif self._is_agegated(pr):
e7870111
D
3713 if variant == 'tv_embedded':
3714 append_client(f'{base_client}_embedded')
3715 elif not variant:
3716 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3717
379e44ed 3718 if last_error:
b6de707d 3719 if not len(prs):
379e44ed 3720 raise last_error
3721 self.report_warning(last_error)
b6de707d 3722 return prs, player_url
11f9be09 3723
4d37720a
L
3724 def _needs_live_processing(self, live_status, duration):
3725 if (live_status == 'is_live' and self.get_param('live_from_start')
d949c10c 3726 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
4d37720a
L
3727 return live_status
3728
3729 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
e389d172 3730 CHUNK_SIZE = 10 << 20
a4894d3e 3731 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3732 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3733 q = qualities([
2a9c6dcd 3734 # Normally tiny is the smallest video-only formats. But
3735 # audio-only formats with unknown quality may get tagged as tiny
3736 'tiny',
3737 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3738 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3739 ])
6839ae1f 3740 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
58786a10 3741 format_types = self._configuration_arg('formats')
3742 all_formats = 'duplicate' in format_types
3743 if self._configuration_arg('include_duplicate_formats'):
3744 all_formats = True
3745 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3746 'Use formats=duplicate extractor argument instead')
9297939e 3747
e389d172 3748 def build_fragments(f):
3749 return LazyList({
3750 'url': update_url_query(f['url'], {
3751 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
3752 })
3753 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3754
545cc85d 3755 for fmt in streaming_formats:
727029c5 3756 if fmt.get('targetDurationSec'):
545cc85d 3757 continue
321bf820 3758
cc2db878 3759 itag = str_or_none(fmt.get('itag'))
9297939e 3760 audio_track = fmt.get('audioTrack') or {}
9bb85699 3761 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
86cb9221 3762 if not all_formats:
3763 if stream_id in stream_ids:
3764 continue
9297939e 3765
cc2db878 3766 quality = fmt.get('quality')
2a9c6dcd 3767 height = int_or_none(fmt.get('height'))
d3fc8074 3768 if quality == 'tiny' or not quality:
3769 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3770 # The 3gp format (17) in android client has a quality of "small",
3771 # but is actually worse than other formats
3772 if itag == '17':
3773 quality = 'tiny'
3774 if quality:
3775 if itag:
3776 itag_qualities[itag] = quality
3777 if height:
3778 res_qualities[height] = quality
cc2db878 3779 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3780 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3781 # number of fragment that would subsequently requested with (`&sq=N`)
3782 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3783 continue
3784
545cc85d 3785 fmt_url = fmt.get('url')
3786 if not fmt_url:
14f25df2 3787 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3788 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3789 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3790 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3791 continue
52023f12 3792 try:
3793 fmt_url += '&%s=%s' % (
3794 traverse_obj(sc, ('sp', -1)) or 'signature',
3795 self._decrypt_signature(encrypted_sig, video_id, player_url)
3796 )
3797 except ExtractorError as e:
580ce007 3798 self.report_warning('Signature extraction failed: Some formats may be missing',
3799 video_id=video_id, only_once=True)
52023f12 3800 self.write_debug(e, only_once=True)
201e9eaa 3801 continue
545cc85d 3802
404f611f 3803 query = parse_qs(fmt_url)
3804 throttled = False
b2916526 3805 if query.get('n'):
404f611f 3806 try:
580ce007 3807 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3808 fmt_url = update_url_query(fmt_url, {
580ce007 3809 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3810 })
404f611f 3811 except ExtractorError as e:
25836db6 3812 phantomjs_hint = ''
3813 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3814 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3815 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3816 if player_url:
3817 self.report_warning(
3818 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3819 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3820 self.write_debug(e, only_once=True)
3821 else:
3822 self.report_warning(
3823 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3824 video_id=video_id, only_once=True)
404f611f 3825 throttled = True
3826
0ad92dfb 3827 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3828 language_preference = (
3829 10 if audio_track.get('audioIsDefault') and 10
3830 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3831 else -1)
0ad92dfb 3832 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3833 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3834 # Make sure to avoid false positives with small duration differences.
62b58c09 3835 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3836 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3837 if is_damaged:
0f06bcd7 3838 self.report_warning(
3839 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
c795c39f
L
3840
3841 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
51a07b0d 3842 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
97afb093 3843 fps = int_or_none(fmt.get('fps')) or 0
545cc85d 3844 dct = {
3845 'asr': int_or_none(fmt.get('audioSampleRate')),
3846 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3847 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3848 'format_note': join_nonempty(
392389b7 3849 join_nonempty(audio_track.get('displayName'),
3850 language_preference > 0 and ' (default)', delim=''),
51a07b0d 3851 name, fmt.get('isDrc') and 'DRC',
a4166234 3852 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3853 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
c795c39f 3854 throttled and 'THROTTLED', is_damaged and 'DAMAGED',
86cb9221 3855 (self.get_param('verbose') or all_formats) and client_name,
c795c39f 3856 delim=', '),
91e5e839 3857 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
51a07b0d 3858 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
3859 + (100 if 'Premium' in name else 0)),
97afb093 3860 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
a4166234 3861 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3862 'height': height,
9bb85699 3863 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3864 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3865 'tbr': tbr,
545cc85d 3866 'url': fmt_url,
2a9c6dcd 3867 'width': int_or_none(fmt.get('width')),
ab6df717 3868 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3869 'desc' if language_preference < -1 else '') or None,
ab6df717 3870 'language_preference': language_preference,
a405b38f 3871 # Strictly de-prioritize damaged and 3gp formats
3872 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3873 }
60bdb7bd 3874 mime_mobj = re.match(
3875 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3876 if mime_mobj:
3877 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3878 dct.update(parse_codecs(mime_mobj.group(2)))
86cb9221 3879 if itag:
3880 itags[itag].add(('https', dct.get('language')))
3881 stream_ids.append(stream_id)
c9abebb8 3882 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3883 if single_stream and dct.get('ext'):
3884 dct['container'] = dct['ext'] + '_dash'
86cb9221 3885
58786a10 3886 if (all_formats or 'dashy' in format_types) and dct['filesize']:
86cb9221 3887 yield {
3888 **dct,
3889 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
5038f6d7 3890 'protocol': 'http_dash_segments',
e389d172 3891 'fragments': build_fragments(dct),
86cb9221 3892 }
58786a10 3893 if all_formats or 'dashy' not in format_types:
3894 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3895 yield dct
545cc85d 3896
4d37720a 3897 needs_live_processing = self._needs_live_processing(live_status, duration)
58786a10 3898 skip_bad_formats = 'incomplete' not in format_types
3899 if self._configuration_arg('include_incomplete_formats'):
3900 skip_bad_formats = False
3901 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3902 'Use formats=incomplete extractor argument instead')
4d37720a
L
3903
3904 skip_manifests = set(self._configuration_arg('skip'))
3905 if (not self.get_param('youtube_include_hls_manifest', True)
3906 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3907 or needs_live_processing and skip_bad_formats):
3908 skip_manifests.add('hls')
3909
0f06bcd7 3910 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3911 skip_manifests.add('dash')
3912 if self._configuration_arg('include_live_dash'):
3913 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
58786a10 3914 'Use formats=incomplete extractor argument instead')
4d37720a
L
3915 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3916 skip_manifests.add('dash')
5d3a0e79 3917
c795c39f 3918 def process_manifest_format(f, proto, client_name, itag):
a4894d3e 3919 key = (proto, f.get('language'))
86cb9221 3920 if not all_formats and key in itags[itag]:
a4894d3e 3921 return False
3922 itags[itag].add(key)
3923
86cb9221 3924 if itag and all_formats:
3925 f['format_id'] = f'{itag}-{proto}'
3926 elif any(p != proto for p, _ in itags[itag]):
a4894d3e 3927 f['format_id'] = f'{itag}-{proto}'
3928 elif itag:
a0bb6ce5 3929 f['format_id'] = itag
a0bb6ce5 3930
94ed638a 3931 if f.get('source_preference') is None:
3932 f['source_preference'] = -1
3933
1e75d97d 3934 if itag in ('616', '235'):
3935 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
94ed638a 3936 f['source_preference'] += 100
1e75d97d 3937
b25cac65 3938 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3939 if f['quality'] == -1 and f.get('height'):
3940 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
ad54c913 3941 if self.get_param('verbose') or all_formats:
c795c39f 3942 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
97afb093 3943 if f.get('fps') and f['fps'] <= 1:
3944 del f['fps']
94ed638a 3945
3946 if proto == 'hls' and f.get('has_drm'):
3947 f['has_drm'] = 'maybe'
3948 f['source_preference'] -= 5
a0bb6ce5 3949 return True
2a9c6dcd 3950
c646d76f 3951 subtitles = {}
11f9be09 3952 for sd in streaming_data:
c795c39f
L
3953 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
3954
4d37720a 3955 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3956 if hls_manifest_url:
4d37720a
L
3957 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3958 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3959 subtitles = self._merge_subtitles(subs, subtitles)
3960 for f in fmts:
c795c39f 3961 if process_manifest_format(f, 'hls', client_name, self._search_regex(
a0bb6ce5 3962 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3963 yield f
545cc85d 3964
4d37720a 3965 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3966 if dash_manifest_url:
c646d76f 3967 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3968 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3969 for f in formats:
c795c39f 3970 if process_manifest_format(f, 'dash', client_name, f['format_id']):
a0bb6ce5 3971 f['filesize'] = int_or_none(self._search_regex(
3972 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3973 if needs_live_processing:
adbc4ec4
THD
3974 f['is_from_start'] = True
3975
a0bb6ce5 3976 yield f
c646d76f 3977 yield subtitles
11f9be09 3978
720c3099 3979 def _extract_storyboard(self, player_responses, duration):
3980 spec = get_first(
3981 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3982 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3983 if not base_url:
720c3099 3984 return
720c3099 3985 L = len(spec) - 1
3986 for i, args in enumerate(spec):
3987 args = args.split('#')
3988 counts = list(map(int_or_none, args[:5]))
3989 if len(args) != 8 or not all(counts):
3990 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3991 continue
3992 width, height, frame_count, cols, rows = counts
3993 N, sigh = args[6:]
3994
3995 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3996 fragment_count = frame_count / (cols * rows)
3997 fragment_duration = duration / fragment_count
3998 yield {
3999 'format_id': f'sb{i}',
4000 'format_note': 'storyboard',
4001 'ext': 'mhtml',
4002 'protocol': 'mhtml',
4003 'acodec': 'none',
4004 'vcodec': 'none',
4005 'url': url,
4006 'width': width,
4007 'height': height,
45e8a04e 4008 'fps': frame_count / duration,
4009 'rows': rows,
4010 'columns': cols,
720c3099 4011 'fragments': [{
b3edc806 4012 'url': url.replace('$M', str(j)),
720c3099 4013 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4014 } for j in range(math.ceil(fragment_count))],
4015 }
4016
adbc4ec4 4017 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 4018 webpage = None
4019 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 4020 query = {'bpctr': '9999999999', 'has_verified': '1'}
ba06d77a 4021 pp = self._configuration_arg('player_params', [None])[0]
4022 if pp:
4023 query['pp'] = pp
b6de707d 4024 webpage = self._download_webpage(
50ac0e54 4025 webpage_url, video_id, fatal=False, query=query)
11f9be09 4026
4027 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 4028
b6de707d 4029 player_responses, player_url = self._extract_player_responses(
11f9be09 4030 self._get_requested_clients(url, smuggled_data),
50ac0e54 4031 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 4032
adbc4ec4
THD
4033 return webpage, master_ytcfg, player_responses, player_url
4034
a1b2d843 4035 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
4036 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4037 is_live = get_first(video_details, 'isLive')
4038 if is_live is None:
4039 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
4040 live_content = get_first(video_details, 'isLiveContent')
4041 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
4042 post_live = get_first(video_details, 'isPostLiveDvr')
4043 live_status = ('post_live' if post_live
4044 else 'is_live' if is_live
4045 else 'is_upcoming' if is_upcoming
6678a4f0 4046 else 'was_live' if live_content
4047 else 'not_live' if False in (is_live, live_content)
4048 else None)
6839ae1f 4049 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 4050 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
94ed638a 4051 if all(f.get('has_drm') for f in formats):
4052 # If there are no formats that definitely don't have DRM, all have DRM
4053 for f in formats:
4054 f['has_drm'] = True
adbc4ec4 4055
4d37720a 4056 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
4057
4058 def _real_extract(self, url):
4059 url, smuggled_data = unsmuggle_url(url, {})
4060 video_id = self._match_id(url)
4061
4062 base_url = self.http_scheme() + '//www.youtube.com/'
4063 webpage_url = base_url + 'watch?v=' + video_id
4064
4065 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4066
11f9be09 4067 playability_statuses = traverse_obj(
6839ae1f 4068 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 4069
4070 trailer_video_id = get_first(
4071 playability_statuses,
4072 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4073 expected_type=str)
4074 if trailer_video_id:
4075 return self.url_result(
4076 trailer_video_id, self.ie_key(), trailer_video_id)
4077
4078 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4079 if webpage else (lambda x: None))
4080
6839ae1f 4081 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 4082 microformats = traverse_obj(
4083 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 4084 expected_type=dict)
c26f9b99 4085
4086 translated_title = self._get_text(microformats, (..., 'title'))
4087 video_title = (self._preferred_lang and translated_title
4088 or get_first(video_details, 'title') # primary
4089 or translated_title
4090 or search_meta(['og:title', 'twitter:title', 'title']))
4091 translated_description = self._get_text(microformats, (..., 'description'))
4092 original_description = get_first(video_details, 'shortDescription')
4093 video_description = (
4094 self._preferred_lang and translated_description
4095 # If original description is blank, it will be an empty string.
4096 # Do not prefer translated description in this case.
4097 or original_description if original_description is not None else translated_description)
11f9be09 4098
d89257f3 4099 multifeed_metadata_list = get_first(
4100 player_responses,
4101 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4102 expected_type=str)
4103 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4104 if self.get_param('noplaylist'):
11f9be09 4105 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 4106 else:
4107 entries = []
4108 feed_ids = []
4109 for feed in multifeed_metadata_list.split(','):
4110 # Unquote should take place before split on comma (,) since textual
4111 # fields may contain comma as well (see
4112 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 4113 feed_data = urllib.parse.parse_qs(
ac668111 4114 urllib.parse.unquote_plus(feed))
d89257f3 4115
4116 def feed_entry(name):
4117 return try_get(
14f25df2 4118 feed_data, lambda x: x[name][0], str)
d89257f3 4119
4120 feed_id = feed_entry('id')
4121 if not feed_id:
4122 continue
4123 feed_title = feed_entry('title')
4124 title = video_title
4125 if feed_title:
4126 title += ' (%s)' % feed_title
4127 entries.append({
4128 '_type': 'url_transparent',
4129 'ie_key': 'Youtube',
4130 'url': smuggle_url(
4131 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
4132 {'force_singlefeed': True}),
4133 'title': title,
4134 })
4135 feed_ids.append(feed_id)
4136 self.to_screen(
4137 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
4138 % (', '.join(feed_ids), video_id))
4139 return self.playlist_result(
4140 entries, video_id, video_title, video_description)
11f9be09 4141
9da6612b 4142 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4143 or int_or_none(get_first(microformats, 'lengthSeconds'))
4144 or parse_duration(search_meta('duration')) or None)
a1b2d843 4145
4d37720a
L
4146 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4147 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4148 if live_status == 'post_live':
4149 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 4150
545cc85d 4151 if not formats:
11f9be09 4152 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 4153 self.report_drm(video_id)
11f9be09 4154 pemr = get_first(
4155 playability_statuses,
4156 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4157 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4158 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 4159 if subreason:
545cc85d 4160 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 4161 countries = get_first(microformats, 'availableCountries')
545cc85d 4162 if not countries:
4163 regions_allowed = search_meta('regionsAllowed')
4164 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 4165 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 4166 reason += f'. {subreason}'
545cc85d 4167 if reason:
b7da73eb 4168 self.raise_no_formats(reason, expected=True)
bf1317d2 4169
11f9be09 4170 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 4171 if not keywords and webpage:
4172 keywords = [
4173 unescapeHTML(m.group('content'))
4174 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4175 for keyword in keywords:
4176 if keyword.startswith('yt:stretch='):
201c1459 4177 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4178 if mobj:
4179 # NB: float is intentional for forcing float division
4180 w, h = (float(v) for v in mobj.groups())
4181 if w > 0 and h > 0:
4182 ratio = w / h
4183 for f in formats:
4184 if f.get('vcodec') != 'none':
4185 f['stretched_ratio'] = ratio
4186 break
a709d873 4187 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4188 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4189 if thumbnail_url:
4190 thumbnails.append({
4191 'url': thumbnail_url,
ff2751ac 4192 })
fccf5021 4193 original_thumbnails = thumbnails.copy()
4194
0ba692ac 4195 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4196 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4197 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4198 thumbnail_names = [
962ffcf8 4199 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4200 # in resolution, these are not the custom thumbnail. So de-prioritize them
4201 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4202 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 4203 ]
cca80fe6 4204 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4205 thumbnails.extend({
4206 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4207 video_id=video_id, name=name, ext=ext,
4d37720a 4208 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4209 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4210 for thumb in thumbnails:
cca80fe6 4211 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4212 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4213 self._remove_duplicate_formats(thumbnails)
fccf5021 4214 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4215
7ea65411 4216 category = get_first(microformats, 'category') or search_meta('genre')
7666b936 4217 channel_id = self.ucid_or_none(str_or_none(
7ea65411 4218 get_first(video_details, 'channelId')
4219 or get_first(microformats, 'externalChannelId')
7666b936 4220 or search_meta('channelId')))
7ea65411 4221 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4222
adbc4ec4
THD
4223 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4224 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4225 if not duration and live_end_time and live_start_time:
4226 duration = live_end_time - live_start_time
4227
4d37720a
L
4228 needs_live_processing = self._needs_live_processing(live_status, duration)
4229
4230 def is_bad_format(fmt):
4231 if needs_live_processing and not fmt.get('is_from_start'):
4232 return True
4233 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4234 and fmt.get('protocol') == 'http_dash_segments'):
4235 return True
4236
4237 for fmt in filter(is_bad_format, formats):
4238 fmt['preference'] = (fmt.get('preference') or -1) - 10
d949c10c 4239 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4d37720a
L
4240
4241 if needs_live_processing:
4242 self._prepare_live_from_start_formats(
4243 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4244
720c3099 4245 formats.extend(self._extract_storyboard(player_responses, duration))
4246
7666b936 4247 channel_handle = self.handle_from_url(owner_profile_url)
4248
545cc85d 4249 info = {
4250 'id': video_id,
39ca3b5c 4251 'title': video_title,
545cc85d 4252 'formats': formats,
4253 'thumbnails': thumbnails,
fccf5021 4254 # The best thumbnail that we are sure exists. Prevents unnecessary
4255 # URL checking if user don't care about getting the best possible thumbnail
4256 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4257 'description': video_description,
545cc85d 4258 'channel_id': channel_id,
7666b936 4259 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
545cc85d 4260 'duration': duration,
4261 'view_count': int_or_none(
11f9be09 4262 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4263 or search_meta('interactionCount')),
11f9be09 4264 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4265 'age_limit': 18 if (
11f9be09 4266 get_first(microformats, 'isFamilySafe') is False
545cc85d 4267 or search_meta('isFamilyFriendly') == 'false'
4268 or search_meta('og:restrictions:age') == '18+') else 0,
4269 'webpage_url': webpage_url,
4270 'categories': [category] if category else None,
4271 'tags': keywords,
11f9be09 4272 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4273 'live_status': live_status,
adbc4ec4 4274 'release_timestamp': live_start_time,
9f14daf2 4275 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4276 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
545cc85d 4277 }
b477fc13 4278
c646d76f 4279 subtitles = {}
3944e7af 4280 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4281 if pctr:
ecdc9049 4282 def get_lang_code(track):
4283 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4284 or track.get('languageCode'))
4285
4286 # Converted into dicts to remove duplicates
4287 captions = {
4288 get_lang_code(sub): sub
6839ae1f 4289 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4290 translation_languages = {
4291 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4292 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4293
774d79cc 4294 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4295 lang_subs = container.setdefault(lang_code, [])
545cc85d 4296 for fmt in self._SUBTITLE_FORMATS:
4297 query.update({
4298 'fmt': fmt,
4299 })
4300 lang_subs.append({
4301 'ext': fmt,
60f393e4 4302 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4303 'name': sub_name,
545cc85d 4304 })
7e72694b 4305
07b47084 4306 # NB: Constructing the full subtitle dictionary is slow
4307 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4308 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4309 for lang_code, caption_track in captions.items():
4310 base_url = caption_track.get('baseUrl')
1235d333 4311 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4312 if not base_url:
4313 continue
ecdc9049 4314 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4315 if caption_track.get('kind') != 'asr':
545cc85d 4316 if not lang_code:
4317 continue
4318 process_language(
ecdc9049 4319 subtitles, base_url, lang_code, lang_name, {})
4320 if not caption_track.get('isTranslatable'):
4321 continue
3944e7af 4322 for trans_code, trans_name in translation_languages.items():
4323 if not trans_code:
545cc85d 4324 continue
1235d333 4325 orig_trans_code = trans_code
71eb82d1 4326 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4327 if not get_translated_subs:
18e49408 4328 continue
ecdc9049 4329 trans_code += f'-{lang_code}'
a70635b8 4330 trans_name += format_field(lang_name, None, ' from %s')
1235d333 4331 if lang_code == f'a-{orig_trans_code}':
ff9b0e07 4332 # Set audio language based on original subtitles
4333 for f in formats:
4334 if f.get('acodec') != 'none' and not f.get('language'):
4335 f['language'] = orig_trans_code
4336 # Add an "-orig" label to the original language so that it can be distinguished.
4337 # The subs are returned without "-orig" as well for compatibility
0c8d9e5f 4338 process_language(
d49669ac 4339 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4340 # Setting tlang=lang returns damaged subtitles.
d49669ac 4341 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4342 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4343
4344 info['automatic_captions'] = automatic_captions
4345 info['subtitles'] = subtitles
7e72694b 4346
14f25df2 4347 parsed_url = urllib.parse.urlparse(url)
545cc85d 4348 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4349 query = urllib.parse.parse_qs(component)
545cc85d 4350 for k, v in query.items():
4351 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4352 d_k += '_time'
4353 if d_k not in info and k in s_ks:
4354 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4355
4356 # Youtube Music Auto-generated description
71dc18fa
BT
4357 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4358 # XXX: Causes catastrophic backtracking if description has "·"
4359 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4360 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4361 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
1890fc63 4362 mobj = re.search(
4363 r'''(?xs)
71dc18fa
BT
4364 (?=(?P<track>[^\n·]+))(?P=track)·
4365 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4366 (?=(?P<album>[^\n]+))(?P=album)\n
1890fc63 4367 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4368 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
71dc18fa
BT
4369 (.+?\nArtist\s*:\s*
4370 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4371 )?.+\nAuto-generated\ by\ YouTube\.\s*$
1890fc63 4372 ''', video_description)
822b9d9c 4373 if mobj:
822b9d9c
RA
4374 release_year = mobj.group('release_year')
4375 release_date = mobj.group('release_date')
4376 if release_date:
4377 release_date = release_date.replace('-', '')
4378 if not release_year:
545cc85d 4379 release_year = release_date[:4]
4380 info.update({
4381 'album': mobj.group('album'.strip()),
4382 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4383 'track': mobj.group('track').strip(),
4384 'release_date': release_date,
cc2db878 4385 'release_year': int_or_none(release_year),
545cc85d 4386 })
7e72694b 4387
545cc85d 4388 initial_data = None
4389 if webpage:
56ba69e4 4390 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
607510b9 4391 if not traverse_obj(initial_data, 'contents'):
4392 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4393 initial_data = None
545cc85d 4394 if not initial_data:
99e9e001 4395 query = {'videoId': video_id}
4396 query.update(self._get_checkok_params())
109dd3b2 4397 initial_data = self._extract_response(
4398 item_id=video_id, ep='next', fatal=False,
607510b9 4399 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
99e9e001 4400 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4401 note='Downloading initial data API JSON')
545cc85d 4402
0df111a3 4403 info['comment_count'] = traverse_obj(initial_data, (
4404 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
071670cb 4405 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
0df111a3 4406 ), (
4407 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
071670cb
ND
4408 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
4409 ), expected_type=self._get_count, get_all=False)
0df111a3 4410
19a03940 4411 try: # This will error if there is no livechat
c60ee3a2 4412 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4413 except (KeyError, IndexError, TypeError):
4414 pass
4415 else:
ecdc9049 4416 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4417 # url is needed to set cookies
4418 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4419 'video_id': video_id,
4420 'ext': 'json',
4d37720a
L
4421 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4422 else 'youtube_live_chat_replay'),
c60ee3a2 4423 }]
545cc85d 4424
4425 if initial_data:
7c365c21 4426 info['chapters'] = (
4427 self._extract_chapters_from_json(initial_data, duration)
4428 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4429 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4430 or None)
545cc85d 4431
5caf30db
A
4432 info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
4433
17322130 4434 contents = traverse_obj(
4435 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4436 expected_type=list, default=[])
4437
4438 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4439 if vpir:
4440 stl = vpir.get('superTitleLink')
4441 if stl:
4442 stl = self._get_text(stl)
4443 if try_get(
4444 vpir,
4445 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4446 info['location'] = stl
4447 else:
affc4fef 4448 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4449 if mobj:
545cc85d 4450 info.update({
17322130 4451 'series': mobj.group(1),
4452 'season_number': int(mobj.group(2)),
4453 'episode_number': int(mobj.group(3)),
545cc85d 4454 })
17322130 4455 for tlb in (try_get(
4456 vpir,
4457 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4458 list) or []):
3ffb2f5b 4459 tbrs = variadic(
4460 traverse_obj(
6839ae1f
SS
4461 tlb, ('toggleButtonRenderer', ...),
4462 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4463 for tbr in tbrs:
4464 for getter, regex in [(
4465 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4466 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4467 lambda x: x['accessibility'],
4468 lambda x: x['accessibilityData']['accessibilityData'],
4469 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4470 label = (try_get(tbr, getter, dict) or {}).get('label')
4471 if label:
4472 mobj = re.match(regex, label)
4473 if mobj:
4474 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4475 break
17322130 4476 sbr_tooltip = try_get(
4477 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4478 if sbr_tooltip:
4479 like_count, dislike_count = sbr_tooltip.split(' / ')
4480 info.update({
4481 'like_count': str_to_int(like_count),
4482 'dislike_count': str_to_int(dislike_count),
4483 })
867c66ff
M
4484 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4485 if vcr:
4486 vc = self._get_count(vcr, 'viewCount')
4487 # Upcoming premieres with waiting count are treated as live here
4488 if vcr.get('isLive'):
4489 info['concurrent_view_count'] = vc
4490 elif info.get('view_count') is None:
4491 info['view_count'] = vc
4492
17322130 4493 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4494 if vsir:
4495 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4496 info.update({
4497 'channel': self._get_text(vor, 'title'),
4498 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4499
7666b936 4500 if not channel_handle:
4501 channel_handle = self.handle_from_url(
4502 traverse_obj(vor, (
4503 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4504 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4505 {str}), get_all=False))
4506
17322130 4507 rows = try_get(
4508 vsir,
4509 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4510 list) or []
4511 multiple_songs = False
4512 for row in rows:
4513 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4514 multiple_songs = True
4515 break
4516 for row in rows:
4517 mrr = row.get('metadataRowRenderer') or {}
4518 mrr_title = mrr.get('title')
4519 if not mrr_title:
4520 continue
4521 mrr_title = self._get_text(mrr, 'title')
4522 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4523 if mrr_title == 'License':
4524 info['license'] = mrr_contents_text
4525 elif not multiple_songs:
4526 if mrr_title == 'Album':
4527 info['album'] = mrr_contents_text
4528 elif mrr_title == 'Artist':
4529 info['artist'] = mrr_contents_text
4530 elif mrr_title == 'Song':
4531 info['track'] = mrr_contents_text
8213ce28 4532 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4533 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4534 info['channel_is_verified'] = True
545cc85d 4535
7666b936 4536 info.update({
4537 'uploader': info.get('channel'),
4538 'uploader_id': channel_handle,
4539 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4540 })
17322130 4541 # The upload date for scheduled, live and past live streams / premieres in microformats
4542 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4543 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4544 upload_date = (
4545 unified_strdate(get_first(microformats, 'uploadDate'))
4546 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4547 if not upload_date or (
4d37720a 4548 live_status in ('not_live', None)
1ff88b7a 4549 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4550 ):
c26f9b99 4551 upload_date = strftime_or_none(
ad54c913 4552 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
17322130 4553 info['upload_date'] = upload_date
992f9a73 4554
545cc85d 4555 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4556 v = info.get(s_k)
4557 if v:
4558 info[d_k] = v
b84071c0 4559
14a14335 4560 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
c26f9b99 4561
4562 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4563 or get_first(video_details, 'isPrivate', expected_type=bool))
4564
4565 info['availability'] = (
4566 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4567 else self._availability(
4568 is_private=is_private,
4569 needs_premium=(
4570 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4571 or False if initial_data and is_private is not None else None),
4572 needs_subscription=(
4573 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4574 or False if initial_data and is_private is not None else None),
4575 needs_auth=info['age_limit'] >= 18,
4576 is_unlisted=None if is_private is None else (
4577 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4578 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4579
a2160aa4 4580 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4581
11f9be09 4582 self.mark_watched(video_id, player_responses)
d77ab8e2 4583
545cc85d 4584 return info
c5e8d7af 4585
a61fd4cf 4586
a6213a49 4587class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4588 @staticmethod
4589 def passthrough_smuggled_data(func):
bd7e919a 4590 def _smuggle(info, smuggled_data):
4591 if info.get('_type') not in ('url', 'url_transparent'):
4592 return info
4593 if smuggled_data.get('is_music_url'):
4594 parsed_url = urllib.parse.urlparse(info['url'])
4595 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4596 smuggled_data.pop('is_music_url')
4597 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4598 if smuggled_data:
4599 info['url'] = smuggle_url(info['url'], smuggled_data)
4600 return info
182bda88 4601
4602 @functools.wraps(func)
4603 def wrapper(self, url):
4604 url, smuggled_data = unsmuggle_url(url, {})
4605 if self.is_music_url(url):
4606 smuggled_data['is_music_url'] = True
4607 info_dict = func(self, url, smuggled_data)
bd7e919a 4608 if smuggled_data:
4609 _smuggle(info_dict, smuggled_data)
4610 if info_dict.get('entries'):
a8c754cc 4611 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4612 return info_dict
4613 return wrapper
4614
8bdd16b4 4615 @staticmethod
cd7c66cf 4616 def _extract_basic_item_renderer(item):
4617 # Modified from _extract_grid_item_renderer
201c1459 4618 known_basic_renderers = (
a17526e4 4619 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4620 )
4621 for key, renderer in item.items():
201c1459 4622 if not isinstance(renderer, dict):
cd7c66cf 4623 continue
201c1459 4624 elif key in known_basic_renderers:
4625 return renderer
4626 elif key.startswith('grid') and key.endswith('Renderer'):
4627 return renderer
8bdd16b4 4628
c7335551 4629 def _extract_channel_renderer(self, renderer):
7666b936 4630 channel_id = self.ucid_or_none(renderer['channelId'])
c7335551 4631 title = self._get_text(renderer, 'title')
7666b936 4632 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
7666b936 4633 channel_handle = self.handle_from_url(
4634 traverse_obj(renderer, (
4635 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4636 ('browseEndpoint', 'canonicalBaseUrl')),
4637 {str}), get_all=False))
14a14335 4638 if not channel_handle:
4639 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4640 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
c7335551
M
4641 return {
4642 '_type': 'url',
4643 'url': channel_url,
4644 'id': channel_id,
4645 'ie_key': YoutubeTabIE.ie_key(),
4646 'channel': title,
7666b936 4647 'uploader': title,
c7335551
M
4648 'channel_id': channel_id,
4649 'channel_url': channel_url,
4650 'title': title,
7666b936 4651 'uploader_id': channel_handle,
4652 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
14a14335 4653 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4654 # However, in feed/channels this is set correctly to the subscriber count
4655 'channel_follower_count': traverse_obj(
4656 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
c7335551 4657 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
14a14335 4658 'playlist_count': (
4659 # videoCountText may be the subscriber count
4660 self._get_count(renderer, 'videoCountText')
4661 if self._get_count(renderer, 'subscriberCountText') is not None else None),
c7335551 4662 'description': self._get_text(renderer, 'descriptionSnippet'),
8213ce28 4663 'channel_is_verified': True if self._has_badge(
4664 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
c7335551
M
4665 }
4666
8bdd16b4 4667 def _grid_entries(self, grid_renderer):
4668 for item in grid_renderer['items']:
4669 if not isinstance(item, dict):
39b62db1 4670 continue
cd7c66cf 4671 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4672 if not isinstance(renderer, dict):
4673 continue
052e1350 4674 title = self._get_text(renderer, 'title')
fe93e2c4 4675
8bdd16b4 4676 # playlist
4677 playlist_id = renderer.get('playlistId')
4678 if playlist_id:
4679 yield self.url_result(
4680 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4681 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4682 video_title=title)
201c1459 4683 continue
8bdd16b4 4684 # video
4685 video_id = renderer.get('videoId')
4686 if video_id:
4687 yield self._extract_video(renderer)
201c1459 4688 continue
8bdd16b4 4689 # channel
4690 channel_id = renderer.get('channelId')
4691 if channel_id:
c7335551 4692 yield self._extract_channel_renderer(renderer)
201c1459 4693 continue
4694 # generic endpoint URL support
4695 ep_url = urljoin('https://www.youtube.com/', try_get(
4696 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4697 str))
201c1459 4698 if ep_url:
4699 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4700 if ie.suitable(ep_url):
4701 yield self.url_result(
4702 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4703 break
8bdd16b4 4704
16aa9ea4 4705 def _music_reponsive_list_entry(self, renderer):
4706 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4707 if video_id:
69a40e4a 4708 title = traverse_obj(renderer, (
4709 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4710 'text', 'runs', 0, 'text'))
16aa9ea4 4711 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
69a40e4a 4712 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
16aa9ea4 4713 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4714 if playlist_id:
4715 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4716 if video_id:
4717 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4718 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4719 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4720 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4721 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4722 if browse_id:
4723 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4724 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4725
3d3dddc9 4726 def _shelf_entries_from_content(self, shelf_renderer):
4727 content = shelf_renderer.get('content')
4728 if not isinstance(content, dict):
8bdd16b4 4729 return
cd7c66cf 4730 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4731 if renderer:
4732 # TODO: add support for nested playlists so each shelf is processed
4733 # as separate playlist
4734 # TODO: this includes only first N items
86e5f3ed 4735 yield from self._grid_entries(renderer)
3d3dddc9 4736 renderer = content.get('horizontalListRenderer')
4737 if renderer:
4738 # TODO
4739 pass
8bdd16b4 4740
29f7c58a 4741 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4742 ep = try_get(
4743 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4744 str)
8bdd16b4 4745 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4746 if shelf_url:
29f7c58a 4747 # Skipping links to another channels, note that checking for
4748 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4749 # will not work
4750 if skip_channels and '/channels?' in shelf_url:
4751 return
052e1350 4752 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4753 yield self.url_result(shelf_url, video_title=title)
4754 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4755 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4756
8bdd16b4 4757 def _playlist_entries(self, video_list_renderer):
4758 for content in video_list_renderer['contents']:
4759 if not isinstance(content, dict):
4760 continue
4761 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4762 if not isinstance(renderer, dict):
4763 continue
4764 video_id = renderer.get('videoId')
4765 if not video_id:
4766 continue
4767 yield self._extract_video(renderer)
07aeced6 4768
3462ffa8 4769 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4770 renderer = traverse_obj(
447afb9e 4771 rich_grid_renderer,
4772 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
3462ffa8 4773 video_id = renderer.get('videoId')
447afb9e 4774 if video_id:
4775 yield self._extract_video(renderer)
4776 return
4777 playlist_id = renderer.get('playlistId')
4778 if playlist_id:
4779 yield self.url_result(
4780 f'https://www.youtube.com/playlist?list={playlist_id}',
4781 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4782 video_title=self._get_text(renderer, 'title'))
3462ffa8 4783 return
3462ffa8 4784
8bdd16b4 4785 def _video_entry(self, video_renderer):
4786 video_id = video_renderer.get('videoId')
4787 if video_id:
4788 return self._extract_video(video_renderer)
dacb3a86 4789
ad210f4f 4790 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4791 url = urljoin('https://youtube.com', traverse_obj(
4792 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4793 if url:
4794 return self.url_result(
4795 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4796
8bdd16b4 4797 def _post_thread_entries(self, post_thread_renderer):
4798 post_renderer = try_get(
4799 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4800 if not post_renderer:
4801 return
4802 # video attachment
4803 video_renderer = try_get(
895b0931 4804 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4805 video_id = video_renderer.get('videoId')
4806 if video_id:
4807 entry = self._extract_video(video_renderer)
8bdd16b4 4808 if entry:
4809 yield entry
895b0931 4810 # playlist attachment
4811 playlist_id = try_get(
14f25df2 4812 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4813 if playlist_id:
4814 yield self.url_result(
e28f1c0a 4815 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4816 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4817 # inline video links
4818 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4819 for run in runs:
4820 if not isinstance(run, dict):
4821 continue
4822 ep_url = try_get(
14f25df2 4823 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4824 if not ep_url:
4825 continue
4826 if not YoutubeIE.suitable(ep_url):
4827 continue
4828 ep_video_id = YoutubeIE._match_id(ep_url)
4829 if video_id == ep_video_id:
4830 continue
895b0931 4831 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4832
8bdd16b4 4833 def _post_thread_continuation_entries(self, post_thread_continuation):
4834 contents = post_thread_continuation.get('contents')
4835 if not isinstance(contents, list):
4836 return
4837 for content in contents:
4838 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4839 if isinstance(renderer, dict):
4840 yield from self._post_thread_entries(renderer)
8bdd16b4 4841 continue
6b0b0a28 4842 renderer = content.get('videoRenderer')
4843 if isinstance(renderer, dict):
4844 yield self._video_entry(renderer)
07aeced6 4845
39ed931e 4846 r''' # unused
4847 def _rich_grid_entries(self, contents):
4848 for content in contents:
4849 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4850 if video_renderer:
4851 entry = self._video_entry(video_renderer)
4852 if entry:
4853 yield entry
4854 '''
52efa4b3 4855
0a5095fe 4856 def _report_history_entries(self, renderer):
4857 for url in traverse_obj(renderer, (
7a32c70d 4858 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4859 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4860 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4861 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4862
a6213a49 4863 def _extract_entries(self, parent_renderer, continuation_list):
4864 # continuation_list is modified in-place with continuation_list = [continuation_token]
4865 continuation_list[:] = [None]
4866 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4867 for content in contents:
4868 if not isinstance(content, dict):
4869 continue
16aa9ea4 4870 is_renderer = traverse_obj(
4871 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4872 expected_type=dict)
a6213a49 4873 if not is_renderer:
0a5095fe 4874 if content.get('richItemRenderer'):
4875 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4876 yield entry
4877 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4878 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4879 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4880 yield from self._report_history_entries(table)
4881 continuation_list[0] = self._extract_continuation(table)
a6213a49 4882 continue
0a5095fe 4883
a6213a49 4884 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4885 for isr_content in isr_contents:
4886 if not isinstance(isr_content, dict):
8bdd16b4 4887 continue
69184e41 4888
a6213a49 4889 known_renderers = {
4890 'playlistVideoListRenderer': self._playlist_entries,
4891 'gridRenderer': self._grid_entries,
a17526e4 4892 'reelShelfRenderer': self._grid_entries,
4893 'shelfRenderer': self._shelf_entries,
16aa9ea4 4894 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4895 'backstagePostThreadRenderer': self._post_thread_entries,
4896 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4897 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4898 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
fcbc9ed7 4899 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
4900 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
a6213a49 4901 }
4902 for key, renderer in isr_content.items():
4903 if key not in known_renderers:
4904 continue
4905 for entry in known_renderers[key](renderer):
4906 if entry:
4907 yield entry
4908 continuation_list[0] = self._extract_continuation(renderer)
4909 break
70d5c17b 4910
4911 if not continuation_list[0]:
a6213a49 4912 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4913
a6213a49 4914 if not continuation_list[0]:
4915 continuation_list[0] = self._extract_continuation(parent_renderer)
4916
4917 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4918 continuation_list = [None]
4919 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4920 tab_content = try_get(tab, lambda x: x['content'], dict)
4921 if not tab_content:
4922 return
3462ffa8 4923 parent_renderer = (
29f7c58a 4924 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4925 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4926 yield from extract_entries(parent_renderer)
3462ffa8 4927 continuation = continuation_list[0]
1ba6fe9d 4928 seen_continuations = set()
8bdd16b4 4929 for page_num in itertools.count(1):
4930 if not continuation:
4931 break
1ba6fe9d 4932 continuation_token = continuation.get('continuation')
4933 if continuation_token is not None and continuation_token in seen_continuations:
4934 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
4935 break
4936 seen_continuations.add(continuation_token)
99e9e001 4937 headers = self.generate_api_headers(
4938 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4939 response = self._extract_response(
86e5f3ed 4940 item_id=f'{item_id} page {page_num}',
fe93e2c4 4941 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4942 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4943
4944 if not response:
8bdd16b4 4945 break
ac56cf38 4946 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4947 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4948 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4949
a1b535bd 4950 known_renderers = {
e4b98809 4951 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4952 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4953 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4954 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4955 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4956 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4957 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4958 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4959 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4960 'playlistVideoListContinuation': (self._playlist_entries, None),
4961 'gridContinuation': (self._grid_entries, None),
4962 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4963 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4964 }
1fb53b94 4965
4966 continuation_items = traverse_obj(response, (
4967 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4968 'appendContinuationItemsAction', 'continuationItems'
4969 ), 'continuationContents', get_all=False)
4970 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4971
a1b535bd 4972 video_items_renderer = None
1fb53b94 4973 for key in continuation_item.keys():
a1b535bd 4974 if key not in known_renderers:
8bdd16b4 4975 continue
1fb53b94 4976 func, parent_key = known_renderers[key]
4977 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4978 continuation_list = [None]
1fb53b94 4979 yield from func(video_items_renderer)
9ba5705a 4980 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4981
4982 if not video_items_renderer:
a1b535bd 4983 break
9558dcec 4984
8bdd16b4 4985 @staticmethod
7c219ea6 4986 def _extract_selected_tab(tabs, fatal=True):
86973308
M
4987 for tab_renderer in tabs:
4988 if tab_renderer.get('selected'):
4989 return tab_renderer
4990 if fatal:
4991 raise ExtractorError('Unable to find selected tab')
4992
4993 @staticmethod
4994 def _extract_tab_renderers(response):
4995 return traverse_obj(
4996 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 4997
ac56cf38 4998 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 4999 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 5000
8bdd16b4 5001 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 5002 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5003 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5004
5005 return self.playlist_result(
5006 self._entries(
5007 selected_tab, metadata['id'], ytcfg,
5008 self._extract_account_syncid(ytcfg, data),
5009 self._extract_visitor_data(data, ytcfg)),
5010 **metadata)
39ed931e 5011
bd7e919a 5012 def _extract_metadata_from_tabs(self, item_id, data):
5013 info = {'id': item_id}
5014
5015 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 5016 if metadata_renderer:
7666b936 5017 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
4823ec9f 5018 ('channelUrl', {self.ucid_from_url}))
bd7e919a 5019 info.update({
7666b936 5020 'channel': metadata_renderer.get('title'),
5021 'channel_id': channel_id,
bd7e919a 5022 })
7666b936 5023 if info['channel_id']:
5024 info['id'] = info['channel_id']
bd7e919a 5025 else:
5026 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 5027
301d07fc 5028 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5029 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5030 def _get_uncropped(url):
5031 return url_or_none((url or '').split('=')[0] + '=s0')
5032
6141346d 5033 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 5034 if avatar_thumbnails:
5035 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5036 if uncropped_avatar:
5037 avatar_thumbnails.append({
5038 'url': uncropped_avatar,
5039 'id': 'avatar_uncropped',
5040 'preference': 1
5041 })
5042
5043 channel_banners = self._extract_thumbnails(
bd7e919a 5044 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 5045 for banner in channel_banners:
5046 banner['preference'] = -10
5047
5048 if channel_banners:
5049 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5050 if uncropped_banner:
5051 channel_banners.append({
5052 'url': uncropped_banner,
5053 'id': 'banner_uncropped',
5054 'preference': -5
5055 })
5056
bd7e919a 5057 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5058 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5059 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5060
301d07fc 5061 primary_thumbnails = self._extract_thumbnails(
a17526e4 5062 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
5063 playlist_thumbnails = self._extract_thumbnails(
5064 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5065
bd7e919a 5066 info.update({
5067 'title': (traverse_obj(metadata_renderer, 'title')
5068 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5069 or info['id']),
5070 'availability': self._extract_availability(data),
5071 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
5072 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5073 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
5074 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5075 })
f0d785d3 5076
7666b936 5077 channel_handle = (
5078 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5079 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5080
5081 if channel_handle:
5082 info.update({
5083 'uploader_id': channel_handle,
5084 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5085 })
8213ce28 5086
5087 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5088 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5089 info['channel_is_verified'] = True
6141346d
M
5090 # Playlist stats is a text runs array containing [video count, view count, last updated].
5091 # last updated or (view count and last updated) may be missing.
5092 playlist_stats = get_first(
bd7e919a 5093 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5094
6141346d
M
5095 last_updated_unix = self._parse_time_text(
5096 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5097 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
ad54c913 5098 info['modified_date'] = strftime_or_none(last_updated_unix)
6141346d 5099
bd7e919a 5100 info['view_count'] = self._get_count(playlist_stats, 1)
5101 if info['view_count'] is None: # 0 is allowed
5102 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
31e18355 5103 if info['view_count'] is None:
5104 info['view_count'] = self._get_count(data, (
5105 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5106 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
bd7e919a 5107
5108 info['playlist_count'] = self._get_count(playlist_stats, 0)
5109 if info['playlist_count'] is None: # 0 is allowed
5110 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5111
7666b936 5112 if not info.get('channel_id'):
6141346d 5113 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 5114 if not owner: # Deprecated
6141346d
M
5115 owner = traverse_obj(
5116 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5117 ('videoOwner', 'videoOwnerRenderer', 'title'))
5118 owner_text = self._get_text(owner)
5119 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 5120 info.update({
7666b936 5121 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5122 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5123 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
bd7e919a 5124 })
6141346d 5125
bd7e919a 5126 info.update({
7666b936 5127 'uploader': info['channel'],
5128 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5129 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
bd7e919a 5130 })
7666b936 5131
bd7e919a 5132 return info
73c4ac2c 5133
6e634cbe 5134 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 5135 first_id = last_id = response = None
2be71994 5136 for page_num in itertools.count(1):
cd7c66cf 5137 videos = list(self._playlist_entries(playlist))
5138 if not videos:
5139 return
2be71994 5140 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5141 if start >= len(videos):
5142 return
24146491 5143 yield from videos[start:]
2be71994 5144 first_id = first_id or videos[0]['id']
5145 last_id = videos[-1]['id']
79360d99 5146 watch_endpoint = try_get(
5147 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 5148 headers = self.generate_api_headers(
5149 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5150 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 5151 query = {
5152 'playlistId': playlist_id,
5153 'videoId': watch_endpoint.get('videoId') or last_id,
5154 'index': watch_endpoint.get('index') or len(videos),
5155 'params': watch_endpoint.get('params') or 'OAE%3D'
5156 }
5157 response = self._extract_response(
5158 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 5159 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 5160 check_get_keys='contents'
5161 )
cd7c66cf 5162 playlist = try_get(
79360d99 5163 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 5164
ac56cf38 5165 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 5166 title = playlist.get('title') or try_get(
14f25df2 5167 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 5168 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 5169
5170 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 5171 playlist_url = urljoin(url, try_get(
5172 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 5173 str))
6e634cbe 5174
5175 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5176 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5177 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5178
5179 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 5180 return self.url_result(
5181 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5182 video_title=title)
cd7c66cf 5183
8bdd16b4 5184 return self.playlist_result(
6e634cbe 5185 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 5186 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 5187
47193e02 5188 def _extract_availability(self, data):
5189 """
5190 Gets the availability of a given playlist/tab.
5191 Note: Unless YouTube tells us explicitly, we do not assume it is public
5192 @param data: response
5193 """
6141346d
M
5194 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5195 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5196 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 5197
14a14335 5198 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
47193e02 5199
5200 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
5201 privacy_setting_icon = get_first(
5202 (playlist_header_renderer, sidebar_renderer),
5203 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5204 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5205 expected_type=str)
5206
5207 microformats_is_unlisted = traverse_obj(
5208 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 5209
c26f9b99 5210 return (
5211 'public' if (
5212 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5213 or player_header_privacy == 'PUBLIC'
5214 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5215 else self._availability(
5216 is_private=(
5217 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5218 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5219 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5220 is_unlisted=(
5221 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5222 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
5223 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5224 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 5225 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5226 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5227 needs_auth=False))
47193e02 5228
5229 @staticmethod
5230 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5231 sidebar_renderer = try_get(
5232 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5233 for item in sidebar_renderer:
5234 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5235 if renderer:
5236 return renderer
5237
ac56cf38 5238 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 5239 """
6141346d 5240 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5241 """
6141346d
M
5242 is_playlist = bool(traverse_obj(
5243 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5244 if not is_playlist:
47193e02 5245 return
11f9be09 5246 headers = self.generate_api_headers(
99e9e001 5247 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5248 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5249 query = {
6141346d
M
5250 'params': 'wgYCCAA=',
5251 'browseId': f'VL{item_id}'
47193e02 5252 }
5253 return self._extract_response(
5254 item_id=item_id, headers=headers, query=query,
fe93e2c4 5255 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5256 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5257
2762dbb1 5258 @functools.cached_property
a25bca9f 5259 def skip_webpage(self):
5260 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5261
ac56cf38 5262 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5263 webpage, data = None, None
5264 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5265 try:
be5c1ae8 5266 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5267 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5268 except ExtractorError as e:
5269 if isinstance(e.cause, network_exceptions):
3d2623a8 5270 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
be5c1ae8 5271 retry.error = e
5272 continue
5273 self._error_or_warning(e, fatal=fatal)
14fdfea9 5274 break
ac56cf38 5275
be5c1ae8 5276 try:
5277 self._extract_and_report_alerts(data)
5278 except ExtractorError as e:
5279 self._error_or_warning(e, fatal=fatal)
5280 break
ac56cf38 5281
be5c1ae8 5282 # Sometimes youtube returns a webpage with incomplete ytInitialData
5283 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5284 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5285 retry.error = ExtractorError('Incomplete yt initial data received')
5286 continue
ac56cf38 5287
cd7c66cf 5288 return webpage, data
5289
a25bca9f 5290 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5291 """Use if failed to extract ytcfg (and data) from initial webpage"""
5292 if not ytcfg and self.is_authenticated:
5293 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5294 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5295 raise ExtractorError(
5296 f'{msg}. If you are not downloading private content, or '
5297 'your cookies are only for the first account and channel,'
5298 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5299 expected=True)
5300 self.report_warning(msg, only_once=True)
5301
ac56cf38 5302 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5303 data = None
a25bca9f 5304 if not self.skip_webpage:
ac56cf38 5305 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5306 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5307 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5308 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5309 if (url != 'https://www.youtube.com/feed/recommended'
5310 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5311 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5312 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5313 if fatal:
5314 raise ExtractorError(msg, expected=True)
5315 self.report_warning(msg, only_once=True)
ac56cf38 5316 if not data:
a25bca9f 5317 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5318 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5319 return data, ytcfg
5320
5321 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5322 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5323 resolve_response = self._extract_response(
5324 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5325 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5326 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5327 for ep_key, ep in endpoints.items():
5328 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5329 if params:
5330 return self._extract_response(
5331 item_id=item_id, query=params, ep=ep, headers=headers,
5332 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5333 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5334 err_note = 'Failed to resolve url (does the playlist exist?)'
5335 if fatal:
5336 raise ExtractorError(err_note, expected=True)
5337 self.report_warning(err_note, item_id)
5338
a6213a49 5339 _SEARCH_PARAMS = None
5340
af5c1c55 5341 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5342 data = {'query': query}
5343 if params is NO_DEFAULT:
5344 params = self._SEARCH_PARAMS
5345 if params:
5346 data['params'] = params
16aa9ea4 5347
5348 content_keys = (
5349 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5350 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5351 # ytmusic search
5352 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5353 ('continuationContents', ),
5354 )
a25bca9f 5355 display_id = f'query "{query}"'
86e5f3ed 5356 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5357 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5358 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5359
a61fd4cf 5360 continuation_list = [None]
a25bca9f 5361 search = None
a6213a49 5362 for page_num in itertools.count(1):
a61fd4cf 5363 data.update(continuation_list[0] or {})
a25bca9f 5364 headers = self.generate_api_headers(
5365 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5366 search = self._extract_response(
a25bca9f 5367 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5368 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5369 slr_contents = traverse_obj(search, *content_keys)
5370 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5371 if not continuation_list[0]:
a6213a49 5372 break
5373
5374
5375class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5376 IE_DESC = 'YouTube Tabs'
5377 _VALID_URL = r'''(?x:
5378 https?://
b032ff0f 5379 (?!consent\.)(?:\w+\.)?
a6213a49 5380 (?:
5381 youtube(?:kids)?\.com|
5382 %(invidious)s
5383 )/
5384 (?:
5385 (?P<channel_type>channel|c|user|browse)/|
5386 (?P<not_channel>
5387 feed/|hashtag/|
5388 (?:playlist|watch)\?.*?\blist=
5389 )|
5390 (?!(?:%(reserved_names)s)\b) # Direct URLs
5391 )
5392 (?P<id>[^/?\#&]+)
5393 )''' % {
5394 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5395 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5396 }
5397 IE_NAME = 'youtube:tab'
5398
5399 _TESTS = [{
5400 'note': 'playlists, multipage',
5401 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5402 'playlist_mincount': 94,
5403 'info_dict': {
5404 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5405 'title': 'Igor Kleiner - Playlists',
a6213a49 5406 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5407 'uploader': 'Igor Kleiner',
7666b936 5408 'uploader_id': '@IgorDataScience',
5409 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
976ae3ea 5410 'channel': 'Igor Kleiner',
5411 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5412 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5413 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5414 'channel_follower_count': int
a6213a49 5415 },
5416 }, {
5417 'note': 'playlists, multipage, different order',
5418 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5419 'playlist_mincount': 94,
5420 'info_dict': {
5421 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5422 'title': 'Igor Kleiner - Playlists',
a6213a49 5423 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5424 'uploader': 'Igor Kleiner',
7666b936 5425 'uploader_id': '@IgorDataScience',
5426 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
976ae3ea 5427 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5428 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5429 'channel': 'Igor Kleiner',
5430 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5431 'channel_follower_count': int
a6213a49 5432 },
5433 }, {
5434 'note': 'playlists, series',
5435 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5436 'playlist_mincount': 5,
5437 'info_dict': {
5438 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5439 'title': '3Blue1Brown - Playlists',
5440 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
976ae3ea 5441 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5442 'channel': '3Blue1Brown',
5443 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5444 'uploader_id': '@3blue1brown',
5445 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5446 'uploader': '3Blue1Brown',
976ae3ea 5447 'tags': ['Mathematics'],
14a14335 5448 'channel_follower_count': int,
8213ce28 5449 'channel_is_verified': True,
a6213a49 5450 },
5451 }, {
5452 'note': 'playlists, singlepage',
5453 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5454 'playlist_mincount': 4,
5455 'info_dict': {
5456 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5457 'title': 'ThirstForScience - Playlists',
5458 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5459 'uploader': 'ThirstForScience',
7666b936 5460 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5461 'uploader_id': '@ThirstForScience',
976ae3ea 5462 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
7666b936 5463 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5464 'tags': 'count:13',
5465 'channel': 'ThirstForScience',
6c73052c 5466 'channel_follower_count': int
a6213a49 5467 }
5468 }, {
5469 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5470 'only_matching': True,
5471 }, {
5472 'note': 'basic, single video playlist',
5473 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5474 'info_dict': {
a6213a49 5475 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5476 'title': 'youtube-dl public playlist',
976ae3ea 5477 'description': '',
5478 'tags': [],
5479 'view_count': int,
5480 'modified_date': '20201130',
5481 'channel': 'Sergey M.',
5482 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
976ae3ea 5483 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5484 'availability': 'public',
7666b936 5485 'uploader': 'Sergey M.',
5486 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5487 'uploader_id': '@sergeym.6173',
a6213a49 5488 },
5489 'playlist_count': 1,
5490 }, {
5491 'note': 'empty playlist',
5492 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5493 'info_dict': {
a6213a49 5494 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5495 'title': 'youtube-dl empty playlist',
976ae3ea 5496 'tags': [],
5497 'channel': 'Sergey M.',
5498 'description': '',
5499 'modified_date': '20160902',
5500 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5501 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5502 'availability': 'public',
7666b936 5503 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5504 'uploader_id': '@sergeym.6173',
5505 'uploader': 'Sergey M.',
a6213a49 5506 },
5507 'playlist_count': 0,
5508 }, {
5509 'note': 'Home tab',
5510 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5511 'info_dict': {
5512 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5513 'title': 'lex will - Home',
5514 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5515 'uploader': 'lex will',
7666b936 5516 'uploader_id': '@lexwill718',
976ae3ea 5517 'channel': 'lex will',
5518 'tags': ['bible', 'history', 'prophesy'],
7666b936 5519 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5520 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5521 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5522 'channel_follower_count': int
a6213a49 5523 },
5524 'playlist_mincount': 2,
5525 }, {
5526 'note': 'Videos tab',
5527 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5528 'info_dict': {
5529 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5530 'title': 'lex will - Videos',
5531 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5532 'uploader': 'lex will',
7666b936 5533 'uploader_id': '@lexwill718',
976ae3ea 5534 'tags': ['bible', 'history', 'prophesy'],
5535 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5536 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5537 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5538 'channel': 'lex will',
6c73052c 5539 'channel_follower_count': int
a6213a49 5540 },
5541 'playlist_mincount': 975,
5542 }, {
5543 'note': 'Videos tab, sorted by popular',
5544 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5545 'info_dict': {
5546 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5547 'title': 'lex will - Videos',
5548 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5549 'uploader': 'lex will',
7666b936 5550 'uploader_id': '@lexwill718',
976ae3ea 5551 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
7666b936 5552 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5553 'channel': 'lex will',
5554 'tags': ['bible', 'history', 'prophesy'],
5555 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5556 'channel_follower_count': int
a6213a49 5557 },
5558 'playlist_mincount': 199,
5559 }, {
5560 'note': 'Playlists tab',
5561 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5562 'info_dict': {
5563 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5564 'title': 'lex will - Playlists',
5565 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5566 'uploader': 'lex will',
7666b936 5567 'uploader_id': '@lexwill718',
5568 'uploader_url': 'https://www.youtube.com/@lexwill718',
976ae3ea 5569 'channel': 'lex will',
5570 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5571 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5572 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5573 'channel_follower_count': int
a6213a49 5574 },
5575 'playlist_mincount': 17,
5576 }, {
5577 'note': 'Community tab',
5578 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5579 'info_dict': {
5580 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5581 'title': 'lex will - Community',
5582 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5583 'channel': 'lex will',
5584 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5585 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5586 'tags': ['bible', 'history', 'prophesy'],
7666b936 5587 'channel_follower_count': int,
5588 'uploader_url': 'https://www.youtube.com/@lexwill718',
5589 'uploader_id': '@lexwill718',
5590 'uploader': 'lex will',
a6213a49 5591 },
5592 'playlist_mincount': 18,
5593 }, {
5594 'note': 'Channels tab',
5595 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5596 'info_dict': {
5597 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5598 'title': 'lex will - Channels',
5599 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
976ae3ea 5600 'channel': 'lex will',
5601 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5602 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5603 'tags': ['bible', 'history', 'prophesy'],
7666b936 5604 'channel_follower_count': int,
5605 'uploader_url': 'https://www.youtube.com/@lexwill718',
5606 'uploader_id': '@lexwill718',
5607 'uploader': 'lex will',
a6213a49 5608 },
5609 'playlist_mincount': 12,
5610 }, {
5611 'note': 'Search tab',
5612 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5613 'playlist_mincount': 40,
5614 'info_dict': {
5615 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5616 'title': '3Blue1Brown - Search - linear algebra',
5617 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
976ae3ea 5618 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5619 'tags': ['Mathematics'],
5620 'channel': '3Blue1Brown',
5621 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
7666b936 5622 'channel_follower_count': int,
5623 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5624 'uploader_id': '@3blue1brown',
5625 'uploader': '3Blue1Brown',
8213ce28 5626 'channel_is_verified': True,
a6213a49 5627 },
5628 }, {
5629 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5630 'only_matching': True,
5631 }, {
5632 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5633 'only_matching': True,
5634 }, {
5635 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5636 'only_matching': True,
5637 }, {
5638 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5639 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5640 'info_dict': {
5641 'title': '29C3: Not my department',
5642 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
a6213a49 5643 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5644 'tags': [],
976ae3ea 5645 'view_count': int,
5646 'modified_date': '20150605',
5647 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
7666b936 5648 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
976ae3ea 5649 'channel': 'Christiaan008',
c26f9b99 5650 'availability': 'public',
7666b936 5651 'uploader_id': '@ChRiStIaAn008',
5652 'uploader': 'Christiaan008',
5653 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
a6213a49 5654 },
5655 'playlist_count': 96,
5656 }, {
5657 'note': 'Large playlist',
5658 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5659 'info_dict': {
5660 'title': 'Uploads from Cauchemar',
5661 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
7666b936 5662 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5663 'tags': [],
5664 'modified_date': r're:\d{8}',
5665 'channel': 'Cauchemar',
976ae3ea 5666 'view_count': int,
5667 'description': '',
5668 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5669 'availability': 'public',
7666b936 5670 'uploader_id': '@Cauchemar89',
5671 'uploader': 'Cauchemar',
5672 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
a6213a49 5673 },
5674 'playlist_mincount': 1123,
976ae3ea 5675 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5676 }, {
5677 'note': 'even larger playlist, 8832 videos',
5678 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5679 'only_matching': True,
5680 }, {
5681 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5682 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5683 'info_dict': {
5684 'title': 'Uploads from Interstellar Movie',
5685 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5686 'tags': [],
5687 'view_count': int,
5688 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
7666b936 5689 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5690 'channel': 'Interstellar Movie',
5691 'description': '',
5692 'modified_date': r're:\d{8}',
c26f9b99 5693 'availability': 'public',
7666b936 5694 'uploader_id': '@InterstellarMovie',
5695 'uploader': 'Interstellar Movie',
5696 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
a6213a49 5697 },
5698 'playlist_mincount': 21,
5699 }, {
5700 'note': 'Playlist with "show unavailable videos" button',
5701 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5702 'info_dict': {
5703 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5704 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5705 'view_count': int,
5706 'channel': 'Phim Siêu Nhân Nhật Bản',
5707 'tags': [],
976ae3ea 5708 'description': '',
5709 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5710 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5711 'modified_date': r're:\d{8}',
c26f9b99 5712 'availability': 'public',
7666b936 5713 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5714 'uploader_id': '@phimsieunhannhatban',
5715 'uploader': 'Phim Siêu Nhân Nhật Bản',
a6213a49 5716 },
5717 'playlist_mincount': 200,
976ae3ea 5718 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5719 }, {
5720 'note': 'Playlist with unavailable videos in page 7',
5721 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5722 'info_dict': {
5723 'title': 'Uploads from BlankTV',
5724 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5725 'channel': 'BlankTV',
7666b936 5726 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5727 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5728 'view_count': int,
5729 'tags': [],
976ae3ea 5730 'modified_date': r're:\d{8}',
5731 'description': '',
c26f9b99 5732 'availability': 'public',
7666b936 5733 'uploader_id': '@blanktv',
5734 'uploader': 'BlankTV',
5735 'uploader_url': 'https://www.youtube.com/@blanktv',
a6213a49 5736 },
5737 'playlist_mincount': 1000,
976ae3ea 5738 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5739 }, {
5740 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5741 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5742 'info_dict': {
5743 'title': 'Data Analysis with Dr Mike Pound',
5744 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
a6213a49 5745 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5746 'tags': [],
5747 'view_count': int,
5748 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
7666b936 5749 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
976ae3ea 5750 'channel': 'Computerphile',
c26f9b99 5751 'availability': 'public',
6141346d 5752 'modified_date': '20190712',
7666b936 5753 'uploader_id': '@Computerphile',
5754 'uploader': 'Computerphile',
5755 'uploader_url': 'https://www.youtube.com/@Computerphile',
a6213a49 5756 },
5757 'playlist_mincount': 11,
5758 }, {
5759 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5760 'only_matching': True,
5761 }, {
5762 'note': 'Playlist URL that does not actually serve a playlist',
5763 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5764 'info_dict': {
5765 'id': 'FqZTN594JQw',
5766 'ext': 'webm',
5767 'title': "Smiley's People 01 detective, Adventure Series, Action",
a6213a49 5768 'upload_date': '20150526',
5769 'license': 'Standard YouTube License',
5770 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5771 'categories': ['People & Blogs'],
5772 'tags': list,
5773 'view_count': int,
5774 'like_count': int,
a6213a49 5775 },
5776 'params': {
5777 'skip_download': True,
5778 },
5779 'skip': 'This video is not available.',
5780 'add_ie': [YoutubeIE.ie_key()],
5781 }, {
5782 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5783 'only_matching': True,
5784 }, {
5785 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5786 'only_matching': True,
5787 }, {
5788 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5789 'info_dict': {
14a14335 5790 'id': 'hGkQjiJLjWQ', # This will keep changing
a6213a49 5791 'ext': 'mp4',
976ae3ea 5792 'title': str,
a6213a49 5793 'upload_date': r're:\d{8}',
976ae3ea 5794 'description': str,
a6213a49 5795 'categories': ['News & Politics'],
5796 'tags': list,
5797 'like_count': int,
86973308 5798 'release_timestamp': int,
976ae3ea 5799 'channel': 'Sky News',
5800 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5801 'age_limit': 0,
5802 'view_count': int,
86973308 5803 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5804 'playable_in_embed': True,
86973308 5805 'release_date': r're:\d+',
976ae3ea 5806 'availability': 'public',
5807 'live_status': 'is_live',
5808 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5809 'channel_follower_count': int,
5810 'concurrent_view_count': int,
7666b936 5811 'uploader_url': 'https://www.youtube.com/@SkyNews',
5812 'uploader_id': '@SkyNews',
5813 'uploader': 'Sky News',
8213ce28 5814 'channel_is_verified': True,
a6213a49 5815 },
5816 'params': {
5817 'skip_download': True,
5818 },
976ae3ea 5819 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5820 }, {
5821 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5822 'info_dict': {
5823 'id': 'a48o2S1cPoo',
5824 'ext': 'mp4',
5825 'title': 'The Young Turks - Live Main Show',
a6213a49 5826 'upload_date': '20150715',
5827 'license': 'Standard YouTube License',
5828 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5829 'categories': ['News & Politics'],
5830 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5831 'like_count': int,
a6213a49 5832 },
5833 'params': {
5834 'skip_download': True,
5835 },
5836 'only_matching': True,
5837 }, {
5838 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5839 'only_matching': True,
5840 }, {
5841 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5842 'only_matching': True,
5843 }, {
5844 'note': 'A channel that is not live. Should raise error',
5845 'url': 'https://www.youtube.com/user/numberphile/live',
5846 'only_matching': True,
5847 }, {
5848 'url': 'https://www.youtube.com/feed/trending',
5849 'only_matching': True,
5850 }, {
5851 'url': 'https://www.youtube.com/feed/library',
5852 'only_matching': True,
5853 }, {
5854 'url': 'https://www.youtube.com/feed/history',
5855 'only_matching': True,
5856 }, {
5857 'url': 'https://www.youtube.com/feed/subscriptions',
5858 'only_matching': True,
5859 }, {
5860 'url': 'https://www.youtube.com/feed/watch_later',
5861 'only_matching': True,
5862 }, {
5863 'note': 'Recommended - redirects to home page.',
5864 'url': 'https://www.youtube.com/feed/recommended',
5865 'only_matching': True,
5866 }, {
5867 'note': 'inline playlist with not always working continuations',
5868 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5869 'only_matching': True,
5870 }, {
5871 'url': 'https://www.youtube.com/course',
5872 'only_matching': True,
5873 }, {
5874 'url': 'https://www.youtube.com/zsecurity',
5875 'only_matching': True,
5876 }, {
5877 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5878 'only_matching': True,
5879 }, {
5880 'url': 'https://www.youtube.com/TheYoungTurks/live',
5881 'only_matching': True,
5882 }, {
5883 'url': 'https://www.youtube.com/hashtag/cctv9',
5884 'info_dict': {
5885 'id': 'cctv9',
5886 'title': '#cctv9',
976ae3ea 5887 'tags': [],
a6213a49 5888 },
4dc23a80 5889 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5890 }, {
5891 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5892 'only_matching': True,
5893 }, {
5894 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5895 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5896 'only_matching': True
5897 }, {
5898 'note': '/browse/ should redirect to /channel/',
5899 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5900 'only_matching': True
5901 }, {
5902 'note': 'VLPL, should redirect to playlist?list=PL...',
5903 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5904 'info_dict': {
5905 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
a6213a49 5906 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
12a1b225 5907 'title': 'NCS : All Releases 💿',
7666b936 5908 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
976ae3ea 5909 'modified_date': r're:\d{8}',
5910 'view_count': int,
5911 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5912 'tags': [],
5913 'channel': 'NoCopyrightSounds',
c26f9b99 5914 'availability': 'public',
7666b936 5915 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
5916 'uploader': 'NoCopyrightSounds',
5917 'uploader_id': '@NoCopyrightSounds',
a6213a49 5918 },
5919 'playlist_mincount': 166,
7666b936 5920 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
a6213a49 5921 }, {
7666b936 5922 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
a6213a49 5923 'note': 'Topic, should redirect to playlist?list=UU...',
5924 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5925 'info_dict': {
5926 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5927 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 5928 'tags': [],
5929 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5930 'channel': 'Royalty Free Music - Topic',
5931 'view_count': int,
5932 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
976ae3ea 5933 'modified_date': r're:\d{8}',
976ae3ea 5934 'description': '',
c26f9b99 5935 'availability': 'public',
7666b936 5936 'uploader': 'Royalty Free Music - Topic',
a6213a49 5937 },
a6213a49 5938 'playlist_mincount': 101,
7666b936 5939 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5940 }, {
86973308
M
5941 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5942 # Treat as a general feed
a6213a49 5943 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5944 'info_dict': {
5945 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5946 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5947 'tags': [],
a6213a49 5948 },
a6213a49 5949 'playlist_mincount': 9,
5950 }, {
5951 'note': 'Youtube music Album',
5952 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5953 'info_dict': {
5954 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5955 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5956 'tags': [],
5957 'view_count': int,
5958 'description': '',
5959 'availability': 'unlisted',
5960 'modified_date': r're:\d{8}',
a6213a49 5961 },
5962 'playlist_count': 50,
7666b936 5963 'expected_warnings': ['YouTube Music is not directly supported'],
a6213a49 5964 }, {
5965 'note': 'unlisted single video playlist',
5966 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5967 'info_dict': {
a6213a49 5968 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5969 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5970 'availability': 'unlisted',
5971 'tags': [],
12a1b225 5972 'modified_date': '20220418',
976ae3ea 5973 'channel': 'colethedj',
5974 'view_count': int,
5975 'description': '',
976ae3ea 5976 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5977 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
7666b936 5978 'uploader_url': 'https://www.youtube.com/@colethedj1894',
5979 'uploader_id': '@colethedj1894',
5980 'uploader': 'colethedj',
a6213a49 5981 },
93e12ed7 5982 'playlist': [{
5983 'info_dict': {
5984 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
5985 'id': 'BaW_jenozKc',
5986 '_type': 'url',
5987 'ie_key': 'Youtube',
5988 'duration': 10,
5989 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
5990 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
5991 'view_count': int,
5992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
5993 'channel': 'Philipp Hagemeister',
5994 'uploader_id': '@PhilippHagemeister',
5995 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
5996 'uploader': 'Philipp Hagemeister',
5997 }
5998 }],
a6213a49 5999 'playlist_count': 1,
93e12ed7 6000 'params': {'extract_flat': True},
a6213a49 6001 }, {
6002 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6003 'url': 'https://www.youtube.com/feed/recommended',
6004 'info_dict': {
6005 'id': 'recommended',
6006 'title': 'recommended',
6c73052c 6007 'tags': [],
a6213a49 6008 },
6009 'playlist_mincount': 50,
6010 'params': {
6011 'skip_download': True,
6012 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6013 },
6014 }, {
6015 'note': 'API Fallback: /videos tab, sorted by oldest first',
6016 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6017 'info_dict': {
6018 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6019 'title': 'Cody\'sLab - Videos',
6020 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
976ae3ea 6021 'channel': 'Cody\'sLab',
6022 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6023 'tags': [],
6024 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 6025 'channel_follower_count': int
a6213a49 6026 },
6027 'playlist_mincount': 650,
6028 'params': {
6029 'skip_download': True,
6030 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6031 },
86973308 6032 'skip': 'Query for sorting no longer works',
a6213a49 6033 }, {
6034 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6035 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6036 'info_dict': {
6037 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 6038 'title': 'Uploads from Royalty Free Music - Topic',
976ae3ea 6039 'modified_date': r're:\d{8}',
6040 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6041 'description': '',
6042 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6043 'tags': [],
6044 'channel': 'Royalty Free Music - Topic',
6045 'view_count': int,
c26f9b99 6046 'availability': 'public',
7666b936 6047 'uploader': 'Royalty Free Music - Topic',
a6213a49 6048 },
a6213a49 6049 'playlist_mincount': 101,
6050 'params': {
6051 'skip_download': True,
6052 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
6053 },
7666b936 6054 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
7c219ea6 6055 }, {
6056 'note': 'non-standard redirect to regional channel',
6057 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6058 'only_matching': True
61d3665d 6059 }, {
6060 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6061 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6062 'info_dict': {
6063 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6064 'modified_date': '20220407',
6065 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6066 'tags': [],
61d3665d 6067 'availability': 'unlisted',
6068 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6069 'channel': 'pukkandan',
6070 'description': 'Test for collaborative playlist',
6071 'title': 'yt-dlp test - collaborative playlist',
12a1b225 6072 'view_count': int,
7666b936 6073 'uploader_url': 'https://www.youtube.com/@pukkandan',
6074 'uploader_id': '@pukkandan',
6075 'uploader': 'pukkandan',
61d3665d 6076 },
6077 'playlist_mincount': 2
c26f9b99 6078 }, {
6079 'note': 'translated tab name',
6080 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6081 'info_dict': {
6082 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6083 'tags': [],
c26f9b99 6084 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 6085 'description': 'test description',
c26f9b99 6086 'title': 'cole-dlp-test-acc - 再生リスト',
c26f9b99 6087 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6088 'channel': 'cole-dlp-test-acc',
7666b936 6089 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6090 'uploader_id': '@coletdjnz',
6091 'uploader': 'cole-dlp-test-acc',
c26f9b99 6092 },
6093 'playlist_mincount': 1,
6094 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6095 'expected_warnings': ['Preferring "ja"'],
6096 }, {
6097 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6098 'note': 'preferred lang set with playlist with translated video titles',
6099 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6100 'info_dict': {
6101 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6102 'tags': [],
6103 'view_count': int,
6104 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
c26f9b99 6105 'channel': 'cole-dlp-test-acc',
6106 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6107 'description': 'test',
c26f9b99 6108 'title': 'dlp test playlist',
6109 'availability': 'public',
7666b936 6110 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6111 'uploader_id': '@coletdjnz',
6112 'uploader': 'cole-dlp-test-acc',
c26f9b99 6113 },
6114 'playlist_mincount': 1,
6115 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6116 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 6117 }, {
6118 # shorts audio pivot for 2GtVksBMYFM.
6119 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6120 'info_dict': {
6121 'id': 'sfv_audio_pivot',
6122 'title': 'sfv_audio_pivot',
6123 'tags': [],
6124 },
6125 'playlist_mincount': 50,
6126
86973308
M
6127 }, {
6128 # Channel with a real live tab (not to be mistaken with streams tab)
6129 # Do not treat like it should redirect to live stream
6130 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6131 'info_dict': {
6132 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6133 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6134 'tags': [],
6135 },
6136 'playlist_mincount': 20,
6137 }, {
6138 # Tab name is not the same as tab id
6139 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6140 'info_dict': {
6141 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6142 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6143 'tags': [],
6144 },
6145 'playlist_mincount': 8,
6146 }, {
6147 # Home tab id is literally home. Not to get mistaken with featured
6148 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6149 'info_dict': {
6150 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6151 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6152 'tags': [],
6153 },
6154 'playlist_mincount': 8,
6155 }, {
6156 # Should get three playlists for videos, shorts and streams tabs
6157 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6158 'info_dict': {
6159 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 6160 'title': 'Polka Ch. 尾丸ポルカ',
6161 'channel_follower_count': int,
6162 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6163 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
7666b936 6164 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
bd7e919a 6165 'channel': 'Polka Ch. 尾丸ポルカ',
6166 'tags': 'count:35',
7666b936 6167 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6168 'uploader': 'Polka Ch. 尾丸ポルカ',
6169 'uploader_id': '@OmaruPolka',
86973308
M
6170 },
6171 'playlist_count': 3,
6172 }, {
6173 # Shorts tab with channel with handle
7666b936 6174 # TODO: fix channel description
86973308
M
6175 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6176 'info_dict': {
6177 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6178 'title': 'Not Just Bikes - Shorts',
6179 'tags': 'count:12',
86973308 6180 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
7666b936 6181 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
86973308 6182 'channel_follower_count': int,
86973308 6183 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
86973308 6184 'channel': 'Not Just Bikes',
7666b936 6185 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6186 'uploader': 'Not Just Bikes',
6187 'uploader_id': '@NotJustBikes',
86973308
M
6188 },
6189 'playlist_mincount': 10,
6190 }, {
6191 # Streams tab
6192 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6193 'info_dict': {
6194 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6195 'title': '中村悠一 - Live',
6196 'tags': 'count:7',
6197 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6198 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
86973308 6199 'channel': '中村悠一',
86973308 6200 'channel_follower_count': int,
86973308 6201 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
7666b936 6202 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6203 'uploader_id': '@Yuichi-Nakamura',
6204 'uploader': '中村悠一',
86973308
M
6205 },
6206 'playlist_mincount': 60,
6207 }, {
6208 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6209 # See test_youtube_lists
6210 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6211 'only_matching': True,
6212 }, {
6213 # No uploads and no UCID given. Should fail with no uploads error
6214 # See test_youtube_lists
6215 'url': 'https://www.youtube.com/news',
6216 'only_matching': True
6217 }, {
6218 # No videos tab but has a shorts tab
6219 'url': 'https://www.youtube.com/c/TKFShorts',
6220 'info_dict': {
6221 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6222 'title': 'Shorts Break - Shorts',
7666b936 6223 'tags': 'count:48',
86973308
M
6224 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6225 'channel': 'Shorts Break',
7666b936 6226 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
86973308 6227 'channel_follower_count': int,
86973308 6228 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
7666b936 6229 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6230 'uploader': 'Shorts Break',
6231 'uploader_id': '@ShortsBreak_Official',
86973308
M
6232 },
6233 'playlist_mincount': 30,
6234 }, {
6235 # Trending Now Tab. tab id is empty
6236 'url': 'https://www.youtube.com/feed/trending',
6237 'info_dict': {
6238 'id': 'trending',
6239 'title': 'trending - Now',
6240 'tags': [],
6241 },
6242 'playlist_mincount': 30,
6243 }, {
6244 # Trending Gaming Tab. tab id is empty
6245 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6246 'info_dict': {
6247 'id': 'trending',
6248 'title': 'trending - Gaming',
6249 'tags': [],
6250 },
6251 'playlist_mincount': 30,
4dc23a80
M
6252 }, {
6253 # Shorts url result in shorts tab
7666b936 6254 # TODO: Fix channel id extraction
4dc23a80
M
6255 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6256 'info_dict': {
6257 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6258 'title': 'cole-dlp-test-acc - Shorts',
4dc23a80 6259 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6260 'description': 'test description',
6261 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6262 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6263 'tags': [],
7666b936 6264 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6265 'uploader_id': '@coletdjnz',
4dc23a80 6266 'uploader': 'cole-dlp-test-acc',
4dc23a80
M
6267 },
6268 'playlist': [{
6269 'info_dict': {
7666b936 6270 # Channel data is not currently available for short renderers (as of 2023-03-01)
4dc23a80
M
6271 '_type': 'url',
6272 'ie_key': 'Youtube',
6273 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6274 'id': 'sSM9J5YH_60',
4dc23a80 6275 'title': 'SHORT short',
4dc23a80
M
6276 'view_count': int,
6277 'thumbnails': list,
6278 }
6279 }],
6280 'params': {'extract_flat': True},
6281 }, {
6282 # Live video status should be extracted
6283 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6284 'info_dict': {
6285 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6286 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6287 'tags': []
6288 },
6289 'playlist': [{
6290 'info_dict': {
6291 '_type': 'url',
6292 'ie_key': 'Youtube',
6293 'url': 'startswith:https://www.youtube.com/watch?v=',
6294 'id': str,
6295 'title': str,
6296 'live_status': 'is_live',
6297 'channel_id': str,
6298 'channel_url': str,
6299 'concurrent_view_count': int,
6300 'channel': str,
93e12ed7 6301 'uploader': str,
6302 'uploader_url': str,
14a14335 6303 'uploader_id': str,
8213ce28 6304 'channel_is_verified': bool, # this will keep changing
4dc23a80
M
6305 }
6306 }],
c7335551 6307 'params': {'extract_flat': True, 'playlist_items': '1'},
4dc23a80 6308 'playlist_mincount': 1
c7335551
M
6309 }, {
6310 # Channel renderer metadata. Contains number of videos on the channel
6311 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6312 'info_dict': {
6313 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6314 'title': 'cole-dlp-test-acc - Channels',
c7335551
M
6315 'channel': 'cole-dlp-test-acc',
6316 'description': 'test description',
6317 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6318 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6319 'tags': [],
7666b936 6320 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6321 'uploader_id': '@coletdjnz',
c7335551 6322 'uploader': 'cole-dlp-test-acc',
c7335551
M
6323 },
6324 'playlist': [{
6325 'info_dict': {
6326 '_type': 'url',
6327 'ie_key': 'YoutubeTab',
6328 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6329 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6330 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6331 'title': 'PewDiePie',
6332 'channel': 'PewDiePie',
6333 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6334 'thumbnails': list,
6335 'channel_follower_count': int,
7666b936 6336 'playlist_count': int,
6337 'uploader': 'PewDiePie',
6338 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6339 'uploader_id': '@PewDiePie',
8213ce28 6340 'channel_is_verified': True,
c7335551
M
6341 }
6342 }],
6343 'params': {'extract_flat': True},
31e18355 6344 }, {
6345 'url': 'https://www.youtube.com/@3blue1brown/about',
6346 'info_dict': {
6347 'id': 'UCYO_jab_esuFRV4b17AJtAw',
6348 'tags': ['Mathematics'],
6349 'title': '3Blue1Brown - About',
31e18355 6350 'channel_follower_count': int,
6351 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
31e18355 6352 'channel': '3Blue1Brown',
31e18355 6353 'view_count': int,
6354 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6355 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
7666b936 6356 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6357 'uploader_id': '@3blue1brown',
6358 'uploader': '3Blue1Brown',
8213ce28 6359 'channel_is_verified': True,
31e18355 6360 },
6361 'playlist_count': 0,
447afb9e 6362 }, {
6363 # Podcasts tab, with rich entry playlistRenderers
6364 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6365 'info_dict': {
6366 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6367 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6368 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6369 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6370 'title': '99 Percent Invisible - Podcasts',
6371 'uploader': '99 Percent Invisible',
6372 'channel_follower_count': int,
6373 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6374 'tags': [],
6375 'channel': '99 Percent Invisible',
6376 'uploader_id': '@99percentinvisiblepodcast',
6377 },
6378 'playlist_count': 1,
6379 }, {
6380 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6381 'url': 'https://www.youtube.com/@AHimitsu/releases',
6382 'info_dict': {
6383 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6384 'channel': 'A Himitsu',
6385 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6386 'title': 'A Himitsu - Releases',
6387 'uploader_id': '@AHimitsu',
6388 'uploader': 'A Himitsu',
6389 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6390 'tags': 'count:16',
6391 'description': 'I make music',
6392 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6393 'channel_follower_count': int,
8213ce28 6394 'channel_is_verified': True,
447afb9e 6395 },
6396 'playlist_mincount': 10,
fcbc9ed7 6397 }, {
6398 # Playlist with only shorts, shown as reel renderers
6399 # FIXME: future: YouTube currently doesn't give continuation for this,
6400 # may do in future.
6401 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6402 'info_dict': {
6403 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6404 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6405 'view_count': int,
6406 'uploader_id': '@BangyShorts',
6407 'description': '',
6408 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6409 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6410 'channel': 'Bangy Shorts',
6411 'uploader': 'Bangy Shorts',
6412 'tags': [],
6413 'availability': 'public',
6414 'modified_date': '20230626',
6415 'title': 'Uploads from Bangy Shorts',
6416 },
6417 'playlist_mincount': 100,
6418 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 6419 }]
6420
6421 @classmethod
6422 def suitable(cls, url):
86e5f3ed 6423 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6424
86973308
M
6425 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6426
6427 def _get_url_mobj(self, url):
6428 mobj = self._URL_RE.match(url).groupdict()
6429 mobj.update((k, '') for k, v in mobj.items() if v is None)
6430 return mobj
6431
6432 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6433 tab_name = (tab.get('title') or '').lower()
6434 tab_url = urljoin(base_url, traverse_obj(
6435 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6436
bd7e919a 6437 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6438 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6439 if tab_id:
bd7e919a 6440 return {
6441 'TAB_ID_SPONSORSHIPS': 'membership',
6442 }.get(tab_id, tab_id), tab_name
86973308
M
6443
6444 # Fallback to tab name if we cannot get the tab id.
6445 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6446 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6447 if tab_name:
6448 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6449 return {
6450 'home': 'featured',
6451 'live': 'streams',
6452 }.get(tab_name, tab_name), tab_name
6453
6454 def _has_tab(self, tabs, tab_id):
6455 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6456
182bda88 6457 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6458 def _real_extract(self, url, smuggled_data):
cd7c66cf 6459 item_id = self._match_id(url)
14f25df2 6460 url = urllib.parse.urlunparse(
6461 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6462 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6463
86973308
M
6464 mobj = self._get_url_mobj(url)
6465 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6466 if is_channel and smuggled_data.get('is_music_url'):
6467 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6468 return self.url_result(
6469 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6470 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6471 mdata = self._extract_tab_endpoint(
6472 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6473 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6474 get_all=False, expected_type=str)
6475 if not murl:
6476 raise ExtractorError('Failed to resolve album to playlist')
6477 return self.url_result(murl, YoutubeTabIE)
6478 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6479 return self.url_result(
6480 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6481
6482 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6483 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6484 url = f'{pre}/videos{post}'
5b28cef7 6485 if smuggled_data.get('is_music_url'):
6486 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
cd7c66cf 6487
6488 # Handle both video/playlist URLs
201c1459 6489 qs = parse_qs(url)
bd7e919a 6490 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 6491 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6492 if not playlist_id:
fe03a6cd 6493 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6494 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6495 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6496 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6497 return self.url_result(
6498 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6499
86973308
M
6500 if not self._yes_playlist(playlist_id, video_id):
6501 return self.url_result(
6502 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6503
bd7e919a 6504 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6505
7c219ea6 6506 # YouTube may provide a non-standard redirect to the regional channel
6507 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6508 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6509 redirect_url = traverse_obj(
6510 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6511 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6512 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6513 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6514 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6515
bd7e919a 6516 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6517 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6518 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6519 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6520 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6521
6522 if not original_tab_id and selected_tab_name:
bd7e919a 6523 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6524 'To download only the videos in a specific tab, pass the tab\'s URL')
6525 if self._has_tab(tabs, 'streams'):
bd7e919a 6526 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6527 if self._has_tab(tabs, 'shorts'):
bd7e919a 6528 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6529 # XXX: Members-only tab should also be extracted
6530
bd7e919a 6531 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6532 # Channel does not have streams, shorts or videos tabs
6533 if item_id[:2] != 'UC':
6534 raise ExtractorError('This channel has no uploads', expected=True)
6535
6536 # Topic channels don't have /videos. Use the equivalent playlist instead
6537 pl_id = f'UU{item_id[2:]}'
6538 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6539 try:
6540 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6541 except ExtractorError:
6542 raise ExtractorError('This channel has no uploads', expected=True)
64f36541 6543 else:
86973308
M
6544 item_id, url = pl_id, pl_url
6545 self.to_screen(
6546 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6547
bd7e919a 6548 elif extra_tabs and selected_tab_id != 'videos':
86973308 6549 # When there are shorts/live tabs but not videos tab
bd7e919a 6550 url, data = f'{pre}{post}', None
86973308
M
6551
6552 elif (original_tab_id or 'videos') != selected_tab_id:
6553 if original_tab_id == 'live':
6554 # Live tab should have redirected to the video
6555 # Except in the case the channel has an actual live tab
6556 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6557 raise UserNotLive(video_id=item_id)
86973308
M
6558 elif selected_tab_name:
6559 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6560
6561 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6562 url = f'{pre}{post}'
18db7548 6563
358de58c 6564 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6565 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6566 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6567 self._extract_and_report_alerts(data, only_once=True)
86973308 6568
bd7e919a 6569 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6570 if tabs:
bd7e919a 6571 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6572 entries[0].update({
86973308
M
6573 'extractor_key': YoutubeTabIE.ie_key(),
6574 'extractor': YoutubeTabIE.IE_NAME,
6575 'webpage_url': url,
6576 })
bd7e919a 6577 if self.get_param('playlist_items') == '0':
6578 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6579 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6580 entries.extend(map(self._real_extract, extra_tabs))
6581
6582 if len(entries) == 1:
6583 return entries[0]
6584 elif entries:
6585 metadata = self._extract_metadata_from_tabs(item_id, data)
6586 uploads_url = 'the Uploads (UU) playlist URL'
6587 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6588 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6589 self.to_screen(
6590 'Downloading as multiple playlists, separated by tabs. '
6591 f'To download as a single playlist instead, pass {uploads_url}')
6592 return self.playlist_result(entries, item_id, **metadata)
6593
6594 # Inline playlist
37e57a9f 6595 playlist = traverse_obj(
6596 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6597 if playlist:
ac56cf38 6598 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6599
37e57a9f 6600 video_id = traverse_obj(
6601 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6602 if video_id:
bd7e919a 6603 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6604 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6605 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6606
8bdd16b4 6607 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6608
c5e8d7af 6609
8bdd16b4 6610class YoutubePlaylistIE(InfoExtractor):
96565c7e 6611 IE_DESC = 'YouTube playlists'
8bdd16b4 6612 _VALID_URL = r'''(?x)(?:
6613 (?:https?://)?
6614 (?:\w+\.)?
6615 (?:
6616 (?:
6617 youtube(?:kids)?\.com|
d9190e44 6618 %(invidious)s
8bdd16b4 6619 )
6620 /.*?\?.*?\blist=
6621 )?
6622 (?P<id>%(playlist_id)s)
d9190e44
RH
6623 )''' % {
6624 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6625 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6626 }
8bdd16b4 6627 IE_NAME = 'youtube:playlist'
cdc628a4 6628 _TESTS = [{
8bdd16b4 6629 'note': 'issue #673',
6630 'url': 'PLBB231211A4F62143',
cdc628a4 6631 'info_dict': {
8bdd16b4 6632 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6633 'id': 'PLBB231211A4F62143',
976ae3ea 6634 'uploader': 'Wickman',
7666b936 6635 'uploader_id': '@WickmanVT',
11f9be09 6636 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6637 'view_count': int,
7666b936 6638 'uploader_url': 'https://www.youtube.com/@WickmanVT',
976ae3ea 6639 'modified_date': r're:\d{8}',
6640 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6641 'channel': 'Wickman',
6642 'tags': [],
7666b936 6643 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
86973308 6644 'availability': 'public',
8bdd16b4 6645 },
6646 'playlist_mincount': 29,
6647 }, {
6648 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6649 'info_dict': {
6650 'title': 'YDL_safe_search',
6651 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6652 },
6653 'playlist_count': 2,
6654 'skip': 'This playlist is private',
9558dcec 6655 }, {
8bdd16b4 6656 'note': 'embedded',
6657 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6658 'playlist_count': 4,
9558dcec 6659 'info_dict': {
8bdd16b4 6660 'title': 'JODA15',
6661 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6662 'uploader': 'milan',
7666b936 6663 'uploader_id': '@milan5503',
976ae3ea 6664 'description': '',
6665 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6666 'tags': [],
6667 'modified_date': '20140919',
6668 'view_count': int,
6669 'channel': 'milan',
6670 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7666b936 6671 'uploader_url': 'https://www.youtube.com/@milan5503',
c26f9b99 6672 'availability': 'public',
976ae3ea 6673 },
86973308 6674 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
cdc628a4 6675 }, {
8bdd16b4 6676 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6677 'playlist_mincount': 455,
8bdd16b4 6678 'info_dict': {
6679 'title': '2018 Chinese New Singles (11/6 updated)',
6680 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6681 'uploader': 'LBK',
7666b936 6682 'uploader_id': '@music_king',
11f9be09 6683 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6684 'channel': 'LBK',
6685 'view_count': int,
7666b936 6686 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
976ae3ea 6687 'tags': [],
7666b936 6688 'uploader_url': 'https://www.youtube.com/@music_king',
976ae3ea 6689 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6690 'modified_date': r're:\d{8}',
c26f9b99 6691 'availability': 'public',
976ae3ea 6692 },
6693 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6694 }, {
29f7c58a 6695 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6696 'only_matching': True,
6697 }, {
6698 # music album playlist
6699 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6700 'only_matching': True,
6701 }]
6702
6703 @classmethod
6704 def suitable(cls, url):
201c1459 6705 if YoutubeTabIE.suitable(url):
6706 return False
49a57e70 6707 from ..utils import parse_qs
201c1459 6708 qs = parse_qs(url)
6709 if qs.get('v', [None])[0]:
6710 return False
86e5f3ed 6711 return super().suitable(url)
29f7c58a 6712
6713 def _real_extract(self, url):
6714 playlist_id = self._match_id(url)
46953e7e 6715 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6716 url = update_url_query(
6717 'https://www.youtube.com/playlist',
6718 parse_qs(url) or {'list': playlist_id})
6719 if is_music_url:
6720 url = smuggle_url(url, {'is_music_url': True})
6721 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6722
6723
6724class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6725 IE_DESC = 'youtu.be'
29f7c58a 6726 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6727 _TESTS = [{
8bdd16b4 6728 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6729 'info_dict': {
6730 'id': 'yeWKywCrFtk',
6731 'ext': 'mp4',
6732 'title': 'Small Scale Baler and Braiding Rugs',
6733 'uploader': 'Backus-Page House Museum',
7666b936 6734 'uploader_id': '@backuspagemuseum',
6735 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
8bdd16b4 6736 'upload_date': '20161008',
6737 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6738 'categories': ['Nonprofits & Activism'],
6739 'tags': list,
6740 'like_count': int,
976ae3ea 6741 'age_limit': 0,
6742 'playable_in_embed': True,
7666b936 6743 'thumbnail': r're:^https?://.*\.webp',
976ae3ea 6744 'channel': 'Backus-Page House Museum',
6745 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6746 'live_status': 'not_live',
6747 'view_count': int,
6748 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6749 'availability': 'public',
6750 'duration': 59,
12a1b225
A
6751 'comment_count': int,
6752 'channel_follower_count': int
8bdd16b4 6753 },
6754 'params': {
6755 'noplaylist': True,
6756 'skip_download': True,
6757 },
39e7107d 6758 }, {
8bdd16b4 6759 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6760 'only_matching': True,
cdc628a4
PH
6761 }]
6762
8bdd16b4 6763 def _real_extract(self, url):
5ad28e7f 6764 mobj = self._match_valid_url(url)
29f7c58a 6765 video_id = mobj.group('id')
6766 playlist_id = mobj.group('playlist_id')
8bdd16b4 6767 return self.url_result(
29f7c58a 6768 update_url_query('https://www.youtube.com/watch', {
6769 'v': video_id,
6770 'list': playlist_id,
6771 'feature': 'youtu.be',
6772 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6773
6774
b6ce9bb0 6775class YoutubeLivestreamEmbedIE(InfoExtractor):
6776 IE_DESC = 'YouTube livestream embeds'
6777 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6778 _TESTS = [{
6779 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6780 'only_matching': True,
6781 }]
6782
6783 def _real_extract(self, url):
6784 channel_id = self._match_id(url)
6785 return self.url_result(
6786 f'https://www.youtube.com/channel/{channel_id}/live',
6787 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6788
6789
8bdd16b4 6790class YoutubeYtUserIE(InfoExtractor):
96565c7e 6791 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6792 IE_NAME = 'youtube:user'
8bdd16b4 6793 _VALID_URL = r'ytuser:(?P<id>.+)'
6794 _TESTS = [{
6795 'url': 'ytuser:phihag',
6796 'only_matching': True,
6797 }]
6798
6799 def _real_extract(self, url):
6800 user_id = self._match_id(url)
08270da5 6801 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6802
b05654f0 6803
3d3dddc9 6804class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6805 IE_NAME = 'youtube:favorites'
96565c7e 6806 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6807 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6808 _LOGIN_REQUIRED = True
6809 _TESTS = [{
6810 'url': ':ytfav',
6811 'only_matching': True,
6812 }, {
6813 'url': ':ytfavorites',
6814 'only_matching': True,
6815 }]
6816
6817 def _real_extract(self, url):
6818 return self.url_result(
6819 'https://www.youtube.com/playlist?list=LL',
6820 ie=YoutubeTabIE.ie_key())
6821
6822
ca5300c7 6823class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6824 IE_NAME = 'youtube:notif'
6825 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6826 _VALID_URL = r':ytnotif(?:ication)?s?'
6827 _LOGIN_REQUIRED = True
6828 _TESTS = [{
6829 'url': ':ytnotif',
6830 'only_matching': True,
6831 }, {
6832 'url': ':ytnotifications',
6833 'only_matching': True,
6834 }]
6835
6836 def _extract_notification_menu(self, response, continuation_list):
6837 notification_list = traverse_obj(
6838 response,
6839 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6840 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6841 expected_type=list) or []
6842 continuation_list[0] = None
6843 for item in notification_list:
6844 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6845 if entry:
6846 yield entry
6847 continuation = item.get('continuationItemRenderer')
6848 if continuation:
6849 continuation_list[0] = continuation
6850
6851 def _extract_notification_renderer(self, notification):
6852 video_id = traverse_obj(
6853 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6854 url = f'https://www.youtube.com/watch?v={video_id}'
6855 channel_id = None
6856 if not video_id:
6857 browse_ep = traverse_obj(
6858 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7666b936 6859 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
ca5300c7 6860 post_id = self._search_regex(
6861 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6862 'post id', default=None)
6863 if not channel_id or not post_id:
6864 return
6865 # The direct /post url redirects to this in the browser
6866 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6867
6868 channel = traverse_obj(
6869 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6870 expected_type=str)
c7a7baaa 6871 notification_title = self._get_text(notification, 'shortMessage')
6872 if notification_title:
6873 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6874 # TODO: handle recommended videos
ca5300c7 6875 title = self._search_regex(
c7a7baaa 6876 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6877 'video title', default=None)
5225df50 6878 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6879 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6880 else None)
ca5300c7 6881 return {
6882 '_type': 'url',
6883 'url': url,
6884 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6885 'video_id': video_id,
6886 'title': title,
6887 'channel_id': channel_id,
6888 'channel': channel,
7666b936 6889 'uploader': channel,
ca5300c7 6890 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6891 'timestamp': timestamp,
ca5300c7 6892 }
6893
6894 def _notification_menu_entries(self, ytcfg):
6895 continuation_list = [None]
6896 response = None
6897 for page in itertools.count(1):
6898 ctoken = traverse_obj(
6899 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6900 response = self._extract_response(
6901 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6902 ep='notification/get_notification_menu', check_get_keys='actions',
6903 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6904 yield from self._extract_notification_menu(response, continuation_list)
6905 if not continuation_list[0]:
6906 break
6907
6908 def _real_extract(self, url):
6909 display_id = 'notifications'
6910 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6911 self._report_playlist_authcheck(ytcfg)
6912 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6913
6914
a6213a49 6915class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6916 IE_DESC = 'YouTube search'
78caa52a 6917 IE_NAME = 'youtube:search'
b05654f0 6918 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6919 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6920 _TESTS = [{
6921 'url': 'ytsearch5:youtube-dl test video',
6922 'playlist_count': 5,
6923 'info_dict': {
6924 'id': 'youtube-dl test video',
6925 'title': 'youtube-dl test video',
6926 }
6927 }]
b05654f0 6928
a61fd4cf 6929
5f7cb91a 6930class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6931 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6932 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6933 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6934 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6935 _TESTS = [{
6936 'url': 'ytsearchdate5:youtube-dl test video',
6937 'playlist_count': 5,
6938 'info_dict': {
6939 'id': 'youtube-dl test video',
6940 'title': 'youtube-dl test video',
6941 }
6942 }]
75dff0ee 6943
c9ae7b95 6944
a6213a49 6945class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6946 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6947 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6948 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6949 _TESTS = [{
6950 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6951 'playlist_mincount': 5,
6952 'info_dict': {
11f9be09 6953 'id': 'youtube-dl test video',
3462ffa8 6954 'title': 'youtube-dl test video',
6955 }
a61fd4cf 6956 }, {
6957 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6958 'playlist_mincount': 5,
6959 'info_dict': {
6960 'id': 'python',
6961 'title': 'python',
6962 }
ad210f4f 6963 }, {
6964 'url': 'https://www.youtube.com/results?search_query=%23cats',
6965 'playlist_mincount': 1,
6966 'info_dict': {
6967 'id': '#cats',
6968 'title': '#cats',
12a1b225
A
6969 # The test suite does not have support for nested playlists
6970 # 'entries': [{
6971 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6972 # 'title': '#cats',
6973 # }],
ad210f4f 6974 },
c7335551
M
6975 }, {
6976 # Channel results
6977 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6978 'info_dict': {
6979 'id': 'kurzgesagt',
6980 'title': 'kurzgesagt',
6981 },
6982 'playlist': [{
6983 'info_dict': {
6984 '_type': 'url',
6985 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6986 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6987 'ie_key': 'YoutubeTab',
6988 'channel': 'Kurzgesagt – In a Nutshell',
6989 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6990 'title': 'Kurzgesagt – In a Nutshell',
6991 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
14a14335 6992 # No longer available for search as it is set to the handle.
6993 # 'playlist_count': int,
c7335551 6994 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7666b936 6995 'thumbnails': list,
6996 'uploader_id': '@kurzgesagt',
6997 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
6998 'uploader': 'Kurzgesagt – In a Nutshell',
8213ce28 6999 'channel_is_verified': True,
14a14335 7000 'channel_follower_count': int,
c7335551
M
7001 }
7002 }],
7003 'params': {'extract_flat': True, 'playlist_items': '1'},
7004 'playlist_mincount': 1,
3462ffa8 7005 }, {
7006 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7007 'only_matching': True,
7008 }]
7009
7010 def _real_extract(self, url):
4dfbf869 7011 qs = parse_qs(url)
386e1dd9 7012 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 7013 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 7014
7015
16aa9ea4 7016class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 7017 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 7018 IE_NAME = 'youtube:music:search_url'
7019 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7020 _TESTS = [{
7021 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7022 'playlist_count': 16,
7023 'info_dict': {
7024 'id': 'royalty free music',
7025 'title': 'royalty free music',
7026 }
7027 }, {
7028 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7029 'playlist_mincount': 30,
7030 'info_dict': {
7031 'id': 'royalty free music - songs',
7032 'title': 'royalty free music - songs',
7033 },
7034 'params': {'extract_flat': 'in_playlist'}
7035 }, {
7036 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7037 'playlist_mincount': 30,
7038 'info_dict': {
7039 'id': 'royalty free music - community playlists',
7040 'title': 'royalty free music - community playlists',
7041 },
7042 'params': {'extract_flat': 'in_playlist'}
7043 }]
7044
7045 _SECTIONS = {
7046 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7047 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7048 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7049 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7050 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7051 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7052 }
7053
7054 def _real_extract(self, url):
7055 qs = parse_qs(url)
7056 query = (qs.get('search_query') or qs.get('q'))[0]
7057 params = qs.get('sp', (None,))[0]
7058 if params:
7059 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7060 else:
ac668111 7061 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 7062 params = self._SECTIONS.get(section)
7063 if not params:
7064 section = None
7065 title = join_nonempty(query, section, delim=' - ')
af5c1c55 7066 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 7067
7068
182bda88 7069class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 7070 """
25f14e9f 7071 Base class for feed extractors
82d02080 7072 Subclasses must re-define the _FEED_NAME property.
d7ae0639 7073 """
b2e8bc1b 7074 _LOGIN_REQUIRED = True
82d02080 7075 _FEED_NAME = 'feeds'
a25bca9f 7076
7077 def _real_initialize(self):
7078 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 7079
82d02080 7080 @classproperty
d7ae0639 7081 def IE_NAME(self):
82d02080 7082 return f'youtube:{self._FEED_NAME}'
04cc9617 7083
3853309f 7084 def _real_extract(self, url):
3d3dddc9 7085 return self.url_result(
182bda88 7086 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
7087
7088
ef2f3c7f 7089class YoutubeWatchLaterIE(InfoExtractor):
7090 IE_NAME = 'youtube:watchlater'
96565c7e 7091 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 7092 _VALID_URL = r':ytwatchlater'
bc7a9cd8 7093 _TESTS = [{
8bdd16b4 7094 'url': ':ytwatchlater',
bc7a9cd8
S
7095 'only_matching': True,
7096 }]
25f14e9f
S
7097
7098 def _real_extract(self, url):
ef2f3c7f 7099 return self.url_result(
7100 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 7101
7102
25f14e9f 7103class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 7104 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 7105 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 7106 _FEED_NAME = 'recommended'
45db527f 7107 _LOGIN_REQUIRED = False
3d3dddc9 7108 _TESTS = [{
7109 'url': ':ytrec',
7110 'only_matching': True,
7111 }, {
7112 'url': ':ytrecommended',
7113 'only_matching': True,
7114 }, {
7115 'url': 'https://youtube.com',
7116 'only_matching': True,
7117 }]
1ed5b5c9 7118
1ed5b5c9 7119
25f14e9f 7120class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 7121 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 7122 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 7123 _FEED_NAME = 'subscriptions'
3d3dddc9 7124 _TESTS = [{
7125 'url': ':ytsubs',
7126 'only_matching': True,
7127 }, {
7128 'url': ':ytsubscriptions',
7129 'only_matching': True,
7130 }]
1ed5b5c9 7131
1ed5b5c9 7132
25f14e9f 7133class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 7134 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 7135 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 7136 _FEED_NAME = 'history'
3d3dddc9 7137 _TESTS = [{
7138 'url': ':ythistory',
7139 'only_matching': True,
7140 }]
1ed5b5c9
JMF
7141
7142
80eb0bd9 7143class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 7144 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 7145 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 7146 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 7147 _TESTS = [{
1dd18a88 7148 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 7149 'only_matching': True,
7150 }]
7151
7152 @staticmethod
7153 def _generate_audio_pivot_params(video_id):
7154 """
7155 Generates sfv_audio_pivot browse params for this video id
7156 """
7157 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7158 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7159
7160 def _real_extract(self, url):
7161 video_id = self._match_id(url)
7162 return self.url_result(
7163 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7164 ie=YoutubeTabIE)
7165
7166
15870e90
PH
7167class YoutubeTruncatedURLIE(InfoExtractor):
7168 IE_NAME = 'youtube:truncated_url'
7169 IE_DESC = False # Do not list
975d35db 7170 _VALID_URL = r'''(?x)
b95aab84
PH
7171 (?:https?://)?
7172 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7173 (?:watch\?(?:
c4808c60 7174 feature=[a-z_]+|
b95aab84
PH
7175 annotation_id=annotation_[^&]+|
7176 x-yt-cl=[0-9]+|
c1708b89 7177 hl=[^&]*|
287be8c6 7178 t=[0-9]+
b95aab84
PH
7179 )?
7180 |
7181 attribution_link\?a=[^&]+
7182 )
7183 $
975d35db 7184 '''
15870e90 7185
c4808c60 7186 _TESTS = [{
2d3d2997 7187 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 7188 'only_matching': True,
dc2fc736 7189 }, {
2d3d2997 7190 'url': 'https://www.youtube.com/watch?',
dc2fc736 7191 'only_matching': True,
b95aab84
PH
7192 }, {
7193 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7194 'only_matching': True,
7195 }, {
7196 'url': 'https://www.youtube.com/watch?feature=foo',
7197 'only_matching': True,
c1708b89
PH
7198 }, {
7199 'url': 'https://www.youtube.com/watch?hl=en-GB',
7200 'only_matching': True,
287be8c6
PH
7201 }, {
7202 'url': 'https://www.youtube.com/watch?t=2372',
7203 'only_matching': True,
c4808c60
PH
7204 }]
7205
15870e90
PH
7206 def _real_extract(self, url):
7207 raise ExtractorError(
78caa52a
PH
7208 'Did you forget to quote the URL? Remember that & is a meta '
7209 'character in most shells, so you want to put the URL in quotes, '
3867038a 7210 'like youtube-dl '
2d3d2997 7211 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 7212 ' or simply youtube-dl BaW_jenozKc .',
15870e90 7213 expected=True)
772fd5cc
PH
7214
7215
471d0367 7216class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 7217 IE_NAME = 'youtube:clip'
471d0367 7218 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7219 _TESTS = [{
7220 # FIXME: Other metadata should be extracted from the clip, not from the base video
7221 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7222 'info_dict': {
7223 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7224 'ext': 'mp4',
7225 'section_start': 29.0,
7226 'section_end': 39.7,
7227 'duration': 10.7,
12a1b225
A
7228 'age_limit': 0,
7229 'availability': 'public',
7230 'categories': ['Gaming'],
7231 'channel': 'Scott The Woz',
7232 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7233 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7234 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7235 'like_count': int,
7236 'playable_in_embed': True,
7237 'tags': 'count:17',
7238 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7239 'title': 'Mobile Games on Console - Scott The Woz',
7240 'upload_date': '20210920',
7241 'uploader': 'Scott The Woz',
7666b936 7242 'uploader_id': '@ScottTheWoz',
7243 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
12a1b225
A
7244 'view_count': int,
7245 'live_status': 'not_live',
7666b936 7246 'channel_follower_count': int,
7247 'chapters': 'count:20',
14a14335 7248 'comment_count': int,
7249 'heatmap': 'count:100',
471d0367 7250 }
7251 }]
3cd786db 7252
7253 def _real_extract(self, url):
471d0367 7254 clip_id = self._match_id(url)
7255 _, data = self._extract_webpage(url, clip_id)
7256
7257 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7258 if not video_id:
7259 raise ExtractorError('Unable to find video ID')
7260
7261 clip_data = traverse_obj(data, (
7262 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7263 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7264 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7265 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7266
7267 return {
7268 '_type': 'url_transparent',
7269 'url': f'https://www.youtube.com/watch?v={video_id}',
7270 'ie_key': YoutubeIE.ie_key(),
7271 'id': clip_id,
7272 'section_start': int(clip_data['startTimeMs']) / 1000,
7273 'section_end': int(clip_data['endTimeMs']) / 1000,
7274 }
3cd786db 7275
7276
b032ff0f 7277class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7278 IE_NAME = 'youtube:consent'
7279 IE_DESC = False # Do not list
7280 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7281 _TESTS = [{
7282 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7283 'info_dict': {
7284 'id': 'qVv6vCqciTM',
7285 'ext': 'mp4',
7286 'age_limit': 0,
7666b936 7287 'uploader_id': '@sana_natori',
b032ff0f 7288 'comment_count': int,
7289 'chapters': 'count:13',
7290 'upload_date': '20221223',
7291 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7292 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7666b936 7293 'uploader_url': 'https://www.youtube.com/@sana_natori',
b032ff0f 7294 'like_count': int,
7295 'release_date': '20221223',
7296 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7297 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7298 'view_count': int,
7299 'playable_in_embed': True,
7300 'duration': 4438,
7301 'availability': 'public',
7302 'channel_follower_count': int,
7303 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7304 'categories': ['Entertainment'],
7305 'live_status': 'was_live',
7306 'release_timestamp': 1671793345,
7307 'channel': 'さなちゃんねる',
7308 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7309 'uploader': 'さなちゃんねる',
8213ce28 7310 'channel_is_verified': True,
14a14335 7311 'heatmap': 'count:100',
b032ff0f 7312 },
7313 'add_ie': ['Youtube'],
7314 'params': {'skip_download': 'Youtube'},
7315 }]
7316
7317 def _real_extract(self, url):
7318 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7319 if not redirect_url:
7320 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7321 return self.url_result(redirect_url)
7322
7323
772fd5cc
PH
7324class YoutubeTruncatedIDIE(InfoExtractor):
7325 IE_NAME = 'youtube:truncated_id'
7326 IE_DESC = False # Do not list
b95aab84 7327 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
7328
7329 _TESTS = [{
7330 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7331 'only_matching': True,
7332 }]
7333
7334 def _real_extract(self, url):
7335 video_id = self._match_id(url)
7336 raise ExtractorError(
86e5f3ed 7337 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7338 expected=True)