]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/youtube] Fix bug in b7c47b743871cdf3e0de75b17e4454d987384bf9
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
1890fc63 18from ..compat import functools # isort: split
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
f8271158 31 NO_DEFAULT,
32 ExtractorError,
720c3099 33 bug_reports_message,
82d02080 34 classproperty,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
ff91cf74 41 get_first,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
48416bc4 45 js_to_json,
94278f72 46 mimetype2ext,
9c0d7f49 47 network_exceptions,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
f0d785d3 65 unified_timestamp,
cf7e015f 66 unsmuggle_url,
8bdd16b4 67 update_url_query,
21c340b8 68 url_or_none,
fe93e2c4 69 urljoin,
7c365c21 70 variadic,
c5e8d7af
PH
71)
72
000c15a4 73# any clients starting with _ cannot be explicity requested by the user
74INNERTUBE_CLIENTS = {
75 'web': {
76 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
77 'INNERTUBE_CONTEXT': {
78 'client': {
79 'clientName': 'WEB',
18c7683d 80 'clientVersion': '2.20211221.00.00',
000c15a4 81 }
82 },
83 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
84 },
85 'web_embedded': {
86 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
87 'INNERTUBE_CONTEXT': {
88 'client': {
89 'clientName': 'WEB_EMBEDDED_PLAYER',
18c7683d 90 'clientVersion': '1.20211215.00.01',
000c15a4 91 },
92 },
93 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
94 },
95 'web_music': {
96 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
97 'INNERTUBE_HOST': 'music.youtube.com',
98 'INNERTUBE_CONTEXT': {
99 'client': {
100 'clientName': 'WEB_REMIX',
18c7683d 101 'clientVersion': '1.20211213.00.00',
000c15a4 102 }
103 },
104 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
105 },
e7e94f2a 106 'web_creator': {
18c7683d 107 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
108 'INNERTUBE_CONTEXT': {
109 'client': {
110 'clientName': 'WEB_CREATOR',
18c7683d 111 'clientVersion': '1.20211220.02.00',
e7e94f2a
D
112 }
113 },
114 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
115 },
000c15a4 116 'android': {
18c7683d 117 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 118 'INNERTUBE_CONTEXT': {
119 'client': {
120 'clientName': 'ANDROID',
18c7683d 121 'clientVersion': '16.49',
000c15a4 122 }
123 },
124 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 125 'REQUIRE_JS_PLAYER': False
000c15a4 126 },
127 'android_embedded': {
18c7683d 128 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 129 'INNERTUBE_CONTEXT': {
130 'client': {
131 'clientName': 'ANDROID_EMBEDDED_PLAYER',
18c7683d 132 'clientVersion': '16.49',
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
18c7683d 143 'clientVersion': '4.57',
000c15a4 144 }
145 },
146 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 147 'REQUIRE_JS_PLAYER': False
000c15a4 148 },
e7e94f2a 149 'android_creator': {
18c7683d 150 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
18c7683d 154 'clientVersion': '21.47',
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
18c7683d 167 'clientVersion': '16.46',
168 'deviceModel': 'iPhone14,3',
000c15a4 169 }
170 },
b6de707d 171 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
172 'REQUIRE_JS_PLAYER': False
000c15a4 173 },
174 'ios_embedded': {
000c15a4 175 'INNERTUBE_CONTEXT': {
176 'client': {
177 'clientName': 'IOS_MESSAGES_EXTENSION',
18c7683d 178 'clientVersion': '16.46',
179 'deviceModel': 'iPhone14,3',
000c15a4 180 },
181 },
b6de707d 182 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
183 'REQUIRE_JS_PLAYER': False
000c15a4 184 },
185 'ios_music': {
18c7683d 186 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 187 'INNERTUBE_CONTEXT': {
188 'client': {
189 'clientName': 'IOS_MUSIC',
18c7683d 190 'clientVersion': '4.57',
000c15a4 191 },
192 },
b6de707d 193 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
194 'REQUIRE_JS_PLAYER': False
000c15a4 195 },
e7e94f2a
D
196 'ios_creator': {
197 'INNERTUBE_CONTEXT': {
198 'client': {
199 'clientName': 'IOS_CREATOR',
18c7683d 200 'clientVersion': '21.47',
e7e94f2a
D
201 },
202 },
b6de707d 203 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
204 'REQUIRE_JS_PLAYER': False
e7e94f2a 205 },
3619f78d 206 # mweb has 'ultralow' formats
207 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 208 'mweb': {
18c7683d 209 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 210 'INNERTUBE_CONTEXT': {
211 'client': {
212 'clientName': 'MWEB',
18c7683d 213 'clientVersion': '2.20211221.01.00',
000c15a4 214 }
215 },
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
217 },
218 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
219 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
220 'tv_embedded': {
221 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
222 'INNERTUBE_CONTEXT': {
223 'client': {
224 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
225 'clientVersion': '2.0',
226 },
227 },
228 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
229 },
000c15a4 230}
231
232
e7870111
D
233def _split_innertube_client(client_name):
234 variant, *base = client_name.rsplit('.', 1)
235 if base:
236 return variant, base[0], variant
237 base, *variant = client_name.split('_', 1)
238 return client_name, base, variant[0] if variant else None
239
240
000c15a4 241def build_innertube_clients():
2e4cacd0 242 THIRD_PARTY = {
e7870111 243 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 244 }
e7870111 245 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 246 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 247
248 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 249 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 250 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 251 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 252 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 253
e7870111 254 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 255 ytcfg['priority'] = 10 * priority(base_client)
256
e48b3875 257 if not variant:
e7870111
D
258 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
259 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
260 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
261 embedscreen['priority'] -= 3
262 elif variant == 'embedded':
e48b3875 263 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 264 ytcfg['priority'] -= 2
e48b3875 265 else:
000c15a4 266 ytcfg['priority'] -= 3
267
268
269build_innertube_clients()
270
271
de7f3446 272class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 273 """Provide base functions for Youtube extractors"""
e00eb564 274
3462ffa8 275 _RESERVED_NAMES = (
3cd786db 276 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 277 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 278 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 279 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 280
3619f78d 281 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
282
52efa4b3 283 # _NETRC_MACHINE = 'youtube'
3619f78d 284
b2e8bc1b
JMF
285 # If True it will raise an error if no login info is provided
286 _LOGIN_REQUIRED = False
287
d9190e44
RH
288 _INVIDIOUS_SITES = (
289 # invidious-redirect websites
290 r'(?:www\.)?redirect\.invidious\.io',
291 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 292 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
293 r'(?:www\.)?invidious\.pussthecat\.org',
294 r'(?:www\.)?invidious\.zee\.li',
295 r'(?:www\.)?invidious\.ethibox\.fr',
296 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
297 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
298 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
299 # youtube-dl invidious instances list
300 r'(?:(?:www|no)\.)?invidiou\.sh',
301 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
302 r'(?:www\.)?invidious\.kabi\.tk',
303 r'(?:www\.)?invidious\.mastodon\.host',
304 r'(?:www\.)?invidious\.zapashcanon\.fr',
305 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
306 r'(?:www\.)?invidious\.tinfoil-hat\.net',
307 r'(?:www\.)?invidious\.himiko\.cloud',
308 r'(?:www\.)?invidious\.reallyancient\.tech',
309 r'(?:www\.)?invidious\.tube',
310 r'(?:www\.)?invidiou\.site',
311 r'(?:www\.)?invidious\.site',
312 r'(?:www\.)?invidious\.xyz',
313 r'(?:www\.)?invidious\.nixnet\.xyz',
314 r'(?:www\.)?invidious\.048596\.xyz',
315 r'(?:www\.)?invidious\.drycat\.fr',
316 r'(?:www\.)?inv\.skyn3t\.in',
317 r'(?:www\.)?tube\.poal\.co',
318 r'(?:www\.)?tube\.connect\.cafe',
319 r'(?:www\.)?vid\.wxzm\.sx',
320 r'(?:www\.)?vid\.mint\.lgbt',
321 r'(?:www\.)?vid\.puffyan\.us',
322 r'(?:www\.)?yewtu\.be',
323 r'(?:www\.)?yt\.elukerio\.org',
324 r'(?:www\.)?yt\.lelux\.fi',
325 r'(?:www\.)?invidious\.ggc-project\.de',
326 r'(?:www\.)?yt\.maisputain\.ovh',
327 r'(?:www\.)?ytprivate\.com',
328 r'(?:www\.)?invidious\.13ad\.de',
329 r'(?:www\.)?invidious\.toot\.koeln',
330 r'(?:www\.)?invidious\.fdn\.fr',
331 r'(?:www\.)?watch\.nettohikari\.com',
332 r'(?:www\.)?invidious\.namazso\.eu',
333 r'(?:www\.)?invidious\.silkky\.cloud',
334 r'(?:www\.)?invidious\.exonip\.de',
335 r'(?:www\.)?invidious\.riverside\.rocks',
336 r'(?:www\.)?invidious\.blamefran\.net',
337 r'(?:www\.)?invidious\.moomoo\.de',
338 r'(?:www\.)?ytb\.trom\.tf',
339 r'(?:www\.)?yt\.cyberhost\.uk',
340 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
341 r'(?:www\.)?qklhadlycap4cnod\.onion',
342 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
343 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
344 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
345 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
346 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
347 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
348 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
349 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
350 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
351 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
352 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
353 r'(?:www\.)?piped\.kavin\.rocks',
354 r'(?:www\.)?piped\.silkky\.cloud',
355 r'(?:www\.)?piped\.tokhmi\.xyz',
356 r'(?:www\.)?piped\.moomoo\.me',
357 r'(?:www\.)?il\.ax',
358 r'(?:www\.)?piped\.syncpundit\.com',
359 r'(?:www\.)?piped\.mha\.fi',
360 r'(?:www\.)?piped\.mint\.lgbt',
361 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
362 )
363
cce889b9 364 def _initialize_consent(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 if cookies.get('__Secure-3PSID'):
367 return
368 consent_id = None
369 consent = cookies.get('CONSENT')
370 if consent:
371 if 'YES' in consent.value:
372 return
373 consent_id = self._search_regex(
374 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
375 if not consent_id:
376 consent_id = random.randint(100, 999)
377 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 378
f3aa3c3f 379 def _initialize_pref(self):
380 cookies = self._get_cookies('https://www.youtube.com/')
381 pref_cookie = cookies.get('PREF')
382 pref = {}
383 if pref_cookie:
384 try:
385 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
386 except ValueError:
387 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 388 pref.update({'hl': 'en', 'tz': 'UTC'})
f3aa3c3f 389 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
390
b2e8bc1b 391 def _real_initialize(self):
f3aa3c3f 392 self._initialize_pref()
cce889b9 393 self._initialize_consent()
a25bca9f 394 self._check_login_required()
395
396 def _check_login_required(self):
24146491 397 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 398 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 399
b7c47b74 400 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
401 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 402
000c15a4 403 def _get_default_ytcfg(self, client='web'):
404 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 405
000c15a4 406 def _get_innertube_host(self, client='web'):
407 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 408
000c15a4 409 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 410 # try_get but with fallback to default ytcfg client values when present
411 _func = lambda y: try_get(y, getter, expected_type)
412 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
413
000c15a4 414 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 415 return self._ytcfg_get_safe(
416 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
417 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 418
000c15a4 419 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 420 return self._ytcfg_get_safe(
421 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
422 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 423
000c15a4 424 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
426
000c15a4 427 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 433 return context
434
cf87314d 435 _SAPISID = None
436
109dd3b2 437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 438 time_now = round(time.time())
cf87314d 439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
1974e99f 457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
86e5f3ed 459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 464 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
11f9be09 468 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
545cc85d 472 return self._download_json(
86e5f3ed 473 f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
a5c56234 474 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 475 data=json.dumps(data).encode('utf8'), headers=real_headers,
5dbc77df 476 query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
f4f751af 477
65141660 478 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
479 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 480
99e9e001 481 @staticmethod
482 def _extract_session_index(*data):
483 """
484 Index of current account in account list.
485 See: https://github.com/yt-dlp/yt-dlp/pull/519
486 """
487 for ytcfg in data:
488 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
489 if session_index is not None:
490 return session_index
491
492 # Deprecated?
493 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
494 if ytcfg:
495 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
496 if token:
497 return token
99e9e001 498 if webpage:
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None, fatal=False)
a1c5d2ca
M
502
503 @staticmethod
fe93e2c4 504 def _extract_account_syncid(*args):
8ea3f7b9 505 """
506 Extract syncId required to download private playlists of secondary channels
fe93e2c4 507 @params response and/or ytcfg
8ea3f7b9 508 """
fe93e2c4 509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 516 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
a1c5d2ca 521
ac56cf38 522 @staticmethod
523 def _extract_visitor_data(*args):
524 """
525 Extracts visitorData from an API response or ytcfg
526 Appears to be used to track session state
527 """
9222c381 528 return get_first(
6c73052c 529 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 530 expected_type=str)
ac56cf38 531
2762dbb1 532 @functools.cached_property
99e9e001 533 def is_authenticated(self):
534 return bool(self._generate_sapisidhash_header())
535
11f9be09 536 def extract_ytcfg(self, video_id, webpage):
8c54a305 537 if not webpage:
538 return {}
29f7c58a 539 return self._parse_json(
540 self._search_regex(
541 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 542 default='{}'), video_id, fatal=False) or {}
543
11f9be09 544 def generate_api_headers(
99e9e001 545 self, *, ytcfg=None, account_syncid=None, session_index=None,
546 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
547
11f9be09 548 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 549 headers = {
109dd3b2 550 'X-YouTube-Client-Name': compat_str(
11f9be09 551 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
552 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 553 'Origin': origin,
554 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
555 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 556 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 557 }
558 if session_index is None:
314ee305 559 session_index = self._extract_session_index(ytcfg)
560 if account_syncid or session_index is not None:
561 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 562
109dd3b2 563 auth = self._generate_sapisidhash_header(origin)
f4f751af 564 if auth is not None:
565 headers['Authorization'] = auth
109dd3b2 566 headers['X-Origin'] = origin
99e9e001 567 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 568
a25bca9f 569 def _download_ytcfg(self, client, video_id):
570 url = {
571 'web': 'https://www.youtube.com',
572 'web_music': 'https://music.youtube.com',
573 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
574 }.get(client)
575 if not url:
576 return {}
577 webpage = self._download_webpage(
578 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
579 return self.extract_ytcfg(video_id, webpage) or {}
580
2d6659b9 581 @staticmethod
582 def _build_api_continuation_query(continuation, ctp=None):
583 query = {
584 'continuation': continuation
585 }
586 # TODO: Inconsistency with clickTrackingParams.
587 # Currently we have a fixed ctp contained within context (from ytcfg)
588 # and a ctp in root query for continuation.
589 if ctp:
590 query['clickTracking'] = {'clickTrackingParams': ctp}
591 return query
592
2d6659b9 593 @classmethod
594 def _extract_next_continuation_data(cls, renderer):
595 next_continuation = try_get(
596 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
597 lambda x: x['continuation']['reloadContinuationData']), dict)
598 if not next_continuation:
599 return
600 continuation = next_continuation.get('continuation')
601 if not continuation:
602 return
603 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 604 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 605
606 @classmethod
607 def _extract_continuation_ep_data(cls, continuation_ep: dict):
608 if isinstance(continuation_ep, dict):
609 continuation = try_get(
610 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
611 if not continuation:
612 return
613 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 614 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 615
616 @classmethod
617 def _extract_continuation(cls, renderer):
618 next_continuation = cls._extract_next_continuation_data(renderer)
619 if next_continuation:
620 return next_continuation
fe93e2c4 621
2d6659b9 622 contents = []
623 for key in ('contents', 'items'):
624 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 625
2d6659b9 626 for content in contents:
627 if not isinstance(content, dict):
628 continue
629 continuation_ep = try_get(
630 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
631 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
632 dict)
633 continuation = cls._extract_continuation_ep_data(continuation_ep)
634 if continuation:
635 return continuation
636
fe93e2c4 637 @classmethod
638 def _extract_alerts(cls, data):
109dd3b2 639 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
640 if not isinstance(alert_dict, dict):
641 continue
642 for alert in alert_dict.values():
643 alert_type = alert.get('type')
644 if not alert_type:
645 continue
052e1350 646 message = cls._get_text(alert, 'text')
109dd3b2 647 if message:
648 yield alert_type, message
649
c0ac49bc 650 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 651 errors = []
652 warnings = []
653 for alert_type, alert_message in alerts:
641ad5d8 654 if alert_type.lower() == 'error' and fatal:
109dd3b2 655 errors.append([alert_type, alert_message])
656 else:
657 warnings.append([alert_type, alert_message])
658
659 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 660 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 661 if errors:
662 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
663
664 def _extract_and_report_alerts(self, data, *args, **kwargs):
665 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
666
47193e02 667 def _extract_badges(self, renderer: dict):
668 badges = set()
669 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
670 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
671 if label:
672 badges.add(label.lower())
673 return badges
674
675 @staticmethod
052e1350 676 def _get_text(data, *path_list, max_runs=None):
677 for path in path_list or [None]:
678 if path is None:
679 obj = [data]
680 else:
681 obj = traverse_obj(data, path, default=[])
682 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
683 obj = [obj]
684 for item in obj:
685 text = try_get(item, lambda x: x['simpleText'], compat_str)
686 if text:
687 return text
688 runs = try_get(item, lambda x: x['runs'], list) or []
689 if not runs and isinstance(item, list):
690 runs = item
691
692 runs = runs[:min(len(runs), max_runs or len(runs))]
693 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
694 if text:
695 return text
47193e02 696
f0d785d3 697 def _get_count(self, data, *path_list):
698 count_text = self._get_text(data, *path_list) or ''
699 count = parse_count(count_text)
700 if count is None:
701 count = str_to_int(
702 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
703 return count
704
a709d873 705 @staticmethod
706 def _extract_thumbnails(data, *path_list):
707 """
708 Extract thumbnails from thumbnails dict
709 @param path_list: path list to level that contains 'thumbnails' key
710 """
711 thumbnails = []
712 for path in path_list or [()]:
713 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
714 thumbnail_url = url_or_none(thumbnail.get('url'))
715 if not thumbnail_url:
716 continue
717 # Sometimes youtube gives a wrong thumbnail URL. See:
718 # https://github.com/yt-dlp/yt-dlp/issues/233
719 # https://github.com/ytdl-org/youtube-dl/issues/28023
720 if 'maxresdefault' in thumbnail_url:
721 thumbnail_url = thumbnail_url.split('?')[0]
722 thumbnails.append({
723 'url': thumbnail_url,
724 'height': int_or_none(thumbnail.get('height')),
725 'width': int_or_none(thumbnail.get('width')),
726 })
727 return thumbnails
728
f3aa3c3f 729 @staticmethod
730 def extract_relative_time(relative_time_text):
731 """
732 Extracts a relative time from string and converts to dt object
f0d785d3 733 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 734 """
f0d785d3 735 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 736 if mobj:
f0d785d3 737 start = mobj.group('start')
738 if start:
739 return datetime_from_str(start)
f3aa3c3f 740 try:
f0d785d3 741 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 742 except ValueError:
743 return None
744
745 def _extract_time_text(self, renderer, *path_list):
a25bca9f 746 """@returns (timestamp, time_text)"""
f3aa3c3f 747 text = self._get_text(renderer, *path_list) or ''
748 dt = self.extract_relative_time(text)
749 timestamp = None
750 if isinstance(dt, datetime.datetime):
751 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 752
753 if timestamp is None:
754 timestamp = (
755 unified_timestamp(text) or unified_timestamp(
756 self._search_regex(
17322130 757 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 758 text.lower(), 'time text', default=None)))
f0d785d3 759
f3aa3c3f 760 if text and timestamp is None:
17322130 761 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 762 return timestamp, text
763
109dd3b2 764 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
765 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 766 default_client='web'):
109dd3b2 767 response = None
768 last_error = None
769 count = -1
770 retries = self.get_param('extractor_retries', 3)
771 if check_get_keys is None:
772 check_get_keys = []
773 while count < retries:
774 count += 1
775 if last_error:
c0ac49bc 776 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 777 try:
778 response = self._call_api(
779 ep=ep, fatal=True, headers=headers,
780 video_id=item_id, query=query,
781 context=self._extract_context(ytcfg, default_client),
782 api_key=self._extract_api_key(ytcfg, default_client),
783 api_hostname=api_hostname, default_client=default_client,
784 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
785 except ExtractorError as e:
9c0d7f49 786 if isinstance(e.cause, network_exceptions):
87e8e8a7 787 if isinstance(e.cause, compat_HTTPError):
788 first_bytes = e.cause.read(512)
789 if not is_html(first_bytes):
790 yt_error = try_get(
791 self._parse_json(
792 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
793 lambda x: x['error']['message'], compat_str)
794 if yt_error:
795 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 796 # Downloading page may result in intermittent 5xx HTTP error
797 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 798 # We also want to catch all other network exceptions since errors in later pages can be troublesome
799 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
800 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 801 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 802 if count < retries:
803 continue
109dd3b2 804 if fatal:
805 raise
806 else:
807 self.report_warning(error_to_compat_str(e))
808 return
809
810 else:
109dd3b2 811 try:
ac56cf38 812 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 813 except ExtractorError as e:
c0ac49bc 814 # YouTube servers may return errors we want to retry on in a 200 OK response
815 # See: https://github.com/yt-dlp/yt-dlp/issues/839
816 if 'unknown error' in e.msg.lower():
817 last_error = e.msg
818 continue
109dd3b2 819 if fatal:
820 raise
821 self.report_warning(error_to_compat_str(e))
822 return
823 if not check_get_keys or dict_get(response, check_get_keys):
824 break
825 # Youtube sometimes sends incomplete data
826 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
827 last_error = 'Incomplete data received'
828 if count >= retries:
829 if fatal:
830 raise ExtractorError(last_error)
831 else:
832 self.report_warning(last_error)
833 return
834 return response
835
9297939e 836 @staticmethod
837 def is_music_url(url):
838 return re.match(r'https?://music\.youtube\.com/', url) is not None
839
30a074c2 840 def _extract_video(self, renderer):
841 video_id = renderer.get('videoId')
052e1350 842 title = self._get_text(renderer, 'title')
843 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 844 duration = parse_duration(self._get_text(
845 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 846 if duration is None:
847 duration = parse_duration(self._search_regex(
848 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
849 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
850 video_id, default=None, group='duration'))
851
f0d785d3 852 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 853
052e1350 854 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 855 channel_id = traverse_obj(
a44ca5a4 856 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
857 expected_type=str, get_all=False)
f3aa3c3f 858 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
859 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
860 overlay_style = traverse_obj(
a44ca5a4 861 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
862 get_all=False, expected_type=str)
f3aa3c3f 863 badges = self._extract_badges(renderer)
a709d873 864 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 865 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 866 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
867 expected_type=str)) or ''
fd2ad7cb 868 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 869 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 870 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 871
30a074c2 872 return {
39ed931e 873 '_type': 'url',
30a074c2 874 'ie_key': YoutubeIE.ie_key(),
875 'id': video_id,
fd2ad7cb 876 'url': url,
30a074c2 877 'title': title,
878 'description': description,
879 'duration': duration,
880 'view_count': view_count,
881 'uploader': uploader,
f3aa3c3f 882 'channel_id': channel_id,
a709d873 883 'thumbnails': thumbnails,
a44ca5a4 884 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
885 if self._configuration_arg('approximate_date', ie_key='youtubetab')
886 else None),
f3aa3c3f 887 'live_status': ('is_upcoming' if scheduled_timestamp is not None
888 else 'was_live' if 'streamed' in time_text.lower()
889 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
890 else None),
891 'release_timestamp': scheduled_timestamp,
892 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 893 }
894
0c148415 895
360e1ca5 896class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 897 IE_DESC = 'YouTube'
cb7dfeea 898 _VALID_URL = r"""(?x)^
c5e8d7af 899 (
edb53e2d 900 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 901 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
902 (?:www\.)?deturl\.com/www\.youtube\.com|
903 (?:www\.)?pwnyoutube\.com|
904 (?:www\.)?hooktube\.com|
905 (?:www\.)?yourepeat\.com|
906 tube\.majestyc\.net|
907 %(invidious)s|
908 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
909 (?:.*?\#/)? # handle anchor (#/) redirect urls
910 (?: # the various things that can precede the ID:
b6ce9bb0 911 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 912 |(?: # or the v= param in all its forms
f7000f3a 913 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 914 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 915 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
916 v=
917 )
f4b05232 918 ))
cbaed4bb
S
919 |(?:
920 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
921 vid\.plus| # or vid.plus/xxxx
922 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 923 %(invidious)s
cbaed4bb 924 )/
edb53e2d 925 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 926 )
c5e8d7af 927 )? # all until now is optional -> you can pass the naked ID
201c1459 928 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 929 (?(1).+)? # if we found the ID, everything can follow
9297939e 930 (?:\#|$)""" % {
d9190e44 931 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 932 }
e40c758c 933 _PLAYER_INFO_RE = (
cc2db878 934 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
935 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 936 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 937 )
2c62dc26 938 _formats = {
c2d3cb4c 939 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
940 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
941 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
942 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
943 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
944 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
946 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 947 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 948 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
949 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
950 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
952 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
953 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 954 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 955 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
956 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 957
958
959 # 3D videos
c2d3cb4c 960 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
961 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
962 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
963 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 964 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
965 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
966 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 967
96fb5605 968 # Apple HTTP Live Streaming
11f12195 969 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 970 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
972 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
973 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
974 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 975 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
976 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
977
978 # DASH mp4 video
d23028a8
S
979 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 984 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
985 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
989 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
990 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 991
f6f1fc92 992 # Dash mp4 audio
d23028a8
S
993 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
994 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
995 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
996 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
997 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
998 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
999 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1000
1001 # Dash webm
d23028a8
S
1002 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1009 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1017 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1018 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1020 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1024
1025 # Dash webm audio
d23028a8
S
1026 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1027 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1028
0857baad 1029 # Dash webm audio with opus inside
d23028a8
S
1030 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1031 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1032 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1033
ce6b9a2d
PH
1034 # RTMP (unnamed)
1035 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1036
1037 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1038 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1039 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1040 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1041 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1042 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1043 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1044 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1045 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1046 }
29f7c58a 1047 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1048
fd5c4aab
S
1049 _GEO_BYPASS = False
1050
78caa52a 1051 IE_NAME = 'youtube'
2eb88d95
PH
1052 _TESTS = [
1053 {
2d3d2997 1054 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1055 'info_dict': {
1056 'id': 'BaW_jenozKc',
1057 'ext': 'mp4',
3867038a 1058 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1059 'uploader': 'Philipp Hagemeister',
1060 'uploader_id': 'phihag',
ec85ded8 1061 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1062 'channel': 'Philipp Hagemeister',
dd4c4492
S
1063 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1064 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1065 'upload_date': '20121002',
ff9f925b 1066 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1067 'categories': ['Science & Technology'],
3867038a 1068 'tags': ['youtube-dl'],
556dbe7f 1069 'duration': 10,
dbdaaa23 1070 'view_count': int,
3e7c1224 1071 'like_count': int,
ff9f925b 1072 'availability': 'public',
1073 'playable_in_embed': True,
1074 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1075 'live_status': 'not_live',
1076 'age_limit': 0,
7c80519c 1077 'start_time': 1,
297a564b 1078 'end_time': 9,
6c73052c 1079 'channel_follower_count': int
2eb88d95 1080 }
0e853ca4 1081 },
fccd3771 1082 {
4bc3a23e
PH
1083 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1084 'note': 'Embed-only video (#1746)',
1085 'info_dict': {
1086 'id': 'yZIXLfi8CZQ',
1087 'ext': 'mp4',
1088 'upload_date': '20120608',
1089 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1090 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1091 'uploader': 'SET India',
94bfcd23 1092 'uploader_id': 'setindia',
ec85ded8 1093 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1094 'age_limit': 18,
545cc85d 1095 },
1096 'skip': 'Private video',
fccd3771 1097 },
11b56058 1098 {
8bdd16b4 1099 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1100 'note': 'Use the first video ID in the URL',
1101 'info_dict': {
1102 'id': 'BaW_jenozKc',
1103 'ext': 'mp4',
3867038a 1104 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1105 'uploader': 'Philipp Hagemeister',
1106 'uploader_id': 'phihag',
ec85ded8 1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1108 'channel': 'Philipp Hagemeister',
1109 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1110 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1111 'upload_date': '20121002',
976ae3ea 1112 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1113 'categories': ['Science & Technology'],
3867038a 1114 'tags': ['youtube-dl'],
556dbe7f 1115 'duration': 10,
dbdaaa23 1116 'view_count': int,
11b56058 1117 'like_count': int,
976ae3ea 1118 'availability': 'public',
1119 'playable_in_embed': True,
1120 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1121 'live_status': 'not_live',
1122 'age_limit': 0,
6c73052c 1123 'channel_follower_count': int
34a7de29
S
1124 },
1125 'params': {
1126 'skip_download': True,
1127 },
11b56058 1128 },
dd27fd17 1129 {
2d3d2997 1130 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1131 'note': '256k DASH audio (format 141) via DASH manifest',
1132 'info_dict': {
1133 'id': 'a9LDPn-MO4I',
1134 'ext': 'm4a',
1135 'upload_date': '20121002',
1136 'uploader_id': '8KVIDEO',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1138 'description': '',
1139 'uploader': '8KVIDEO',
1140 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1141 },
4bc3a23e
PH
1142 'params': {
1143 'youtube_include_dash_manifest': True,
1144 'format': '141',
4919603f 1145 },
de3c7fe0 1146 'skip': 'format 141 not served anymore',
dd27fd17 1147 },
8bdd16b4 1148 # DASH manifest with encrypted signature
1149 {
1150 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1151 'info_dict': {
1152 'id': 'IB3lcPjvWLA',
1153 'ext': 'm4a',
1154 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1155 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1156 'duration': 244,
1157 'uploader': 'AfrojackVEVO',
1158 'uploader_id': 'AfrojackVEVO',
1159 'upload_date': '20131011',
cc2db878 1160 'abr': 129.495,
976ae3ea 1161 'like_count': int,
1162 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1163 'playable_in_embed': True,
1164 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1165 'view_count': int,
1166 'track': 'The Spark',
1167 'live_status': 'not_live',
1168 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1169 'channel': 'Afrojack',
1170 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1171 'tags': 'count:19',
1172 'availability': 'public',
1173 'categories': ['Music'],
1174 'age_limit': 0,
1175 'alt_title': 'The Spark',
6c73052c 1176 'channel_follower_count': int
8bdd16b4 1177 },
1178 'params': {
1179 'youtube_include_dash_manifest': True,
1180 'format': '141/bestaudio[ext=m4a]',
1181 },
1182 },
65c2fde2 1183 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1184 {
65c2fde2 1185 'note': 'Embed allowed age-gate video',
2d3d2997 1186 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1187 'info_dict': {
1188 'id': 'HtVdAasjOgU',
1189 'ext': 'mp4',
1190 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1191 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1192 'duration': 142,
c522adb1
JMF
1193 'uploader': 'The Witcher',
1194 'uploader_id': 'WitcherGame',
ec85ded8 1195 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1196 'upload_date': '20140605',
34952f09 1197 'age_limit': 18,
976ae3ea 1198 'categories': ['Gaming'],
1199 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1200 'availability': 'needs_auth',
1201 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1202 'like_count': int,
1203 'channel': 'The Witcher',
1204 'live_status': 'not_live',
1205 'tags': 'count:17',
1206 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1207 'playable_in_embed': True,
1208 'view_count': int,
6c73052c 1209 'channel_follower_count': int
c522adb1
JMF
1210 },
1211 },
65c2fde2 1212 {
1213 'note': 'Age-gate video with embed allowed in public site',
1214 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1215 'info_dict': {
1216 'id': 'HsUATh_Nc2U',
1217 'ext': 'mp4',
1218 'title': 'Godzilla 2 (Official Video)',
1219 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1220 'upload_date': '20200408',
1221 'uploader_id': 'FlyingKitty900',
1222 'uploader': 'FlyingKitty',
1223 'age_limit': 18,
976ae3ea 1224 'availability': 'needs_auth',
1225 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1226 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1227 'channel': 'FlyingKitty',
1228 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1229 'view_count': int,
1230 'categories': ['Entertainment'],
1231 'live_status': 'not_live',
1232 'tags': ['Flyingkitty', 'godzilla 2'],
1233 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1234 'like_count': int,
1235 'duration': 177,
1236 'playable_in_embed': True,
6c73052c 1237 'channel_follower_count': int
65c2fde2 1238 },
1239 },
1240 {
1241 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1242 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1243 'info_dict': {
1244 'id': 'Tq92D6wQ1mg',
1245 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1246 'ext': 'mp4',
17322130 1247 'upload_date': '20191228',
65c2fde2 1248 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1249 'uploader': 'Projekt Melody',
1250 'description': 'md5:17eccca93a786d51bc67646756894066',
1251 'age_limit': 18,
976ae3ea 1252 'like_count': int,
1253 'availability': 'needs_auth',
1254 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1255 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1256 'view_count': int,
1257 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1258 'channel': 'Projekt Melody',
1259 'live_status': 'not_live',
1260 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1261 'playable_in_embed': True,
1262 'categories': ['Entertainment'],
1263 'duration': 106,
1264 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
6c73052c 1265 'channel_follower_count': int
65c2fde2 1266 },
1267 },
1268 {
1269 'note': 'Non-Agegated non-embeddable video',
1270 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1271 'info_dict': {
1272 'id': 'MeJVWBSsPAY',
1273 'ext': 'mp4',
1274 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1275 'uploader': 'Herr Lurik',
1276 'uploader_id': 'st3in234',
1277 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1278 'upload_date': '20130730',
976ae3ea 1279 'track': 'Such mich find mich',
1280 'age_limit': 0,
1281 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1282 'like_count': int,
1283 'playable_in_embed': False,
1284 'creator': 'OOMPH!',
1285 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1286 'view_count': int,
1287 'alt_title': 'Such mich find mich',
1288 'duration': 210,
1289 'channel': 'Herr Lurik',
1290 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1291 'categories': ['Music'],
1292 'availability': 'public',
1293 'uploader_url': 'http://www.youtube.com/user/st3in234',
1294 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1295 'live_status': 'not_live',
1296 'artist': 'OOMPH!',
6c73052c 1297 'channel_follower_count': int
65c2fde2 1298 },
1299 },
1300 {
1301 'note': 'Non-bypassable age-gated video',
1302 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1303 'only_matching': True,
1304 },
8bdd16b4 1305 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1306 # YouTube Red ad is not captured for creator
1307 {
1308 'url': '__2ABJjxzNo',
1309 'info_dict': {
1310 'id': '__2ABJjxzNo',
1311 'ext': 'mp4',
1312 'duration': 266,
1313 'upload_date': '20100430',
1314 'uploader_id': 'deadmau5',
1315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1316 'creator': 'deadmau5',
1317 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1318 'uploader': 'deadmau5',
1319 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1320 'alt_title': 'Some Chords',
976ae3ea 1321 'availability': 'public',
1322 'tags': 'count:14',
1323 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1324 'view_count': int,
1325 'live_status': 'not_live',
1326 'channel': 'deadmau5',
1327 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1328 'like_count': int,
1329 'track': 'Some Chords',
1330 'artist': 'deadmau5',
1331 'playable_in_embed': True,
1332 'age_limit': 0,
1333 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1334 'categories': ['Music'],
1335 'album': 'Some Chords',
6c73052c 1336 'channel_follower_count': int
8bdd16b4 1337 },
1338 'expected_warnings': [
1339 'DASH manifest missing',
1340 ]
1341 },
067aa17e 1342 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1343 {
1344 'url': 'lqQg6PlCWgI',
1345 'info_dict': {
1346 'id': 'lqQg6PlCWgI',
1347 'ext': 'mp4',
556dbe7f 1348 'duration': 6085,
90227264 1349 'upload_date': '20150827',
cbe2bd91 1350 'uploader_id': 'olympic',
ec85ded8 1351 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1352 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1353 'uploader': 'Olympics',
cbe2bd91 1354 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1355 'like_count': int,
1356 'release_timestamp': 1343767800,
1357 'playable_in_embed': True,
1358 'categories': ['Sports'],
1359 'release_date': '20120731',
1360 'channel': 'Olympics',
1361 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1362 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1363 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1364 'age_limit': 0,
1365 'availability': 'public',
1366 'live_status': 'was_live',
1367 'view_count': int,
1368 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1369 'channel_follower_count': int
cbe2bd91
PH
1370 },
1371 'params': {
1372 'skip_download': 'requires avconv',
e52a40ab 1373 }
cbe2bd91 1374 },
6271f1ca
PH
1375 # Non-square pixels
1376 {
1377 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1378 'info_dict': {
1379 'id': '_b-2C3KPAM0',
1380 'ext': 'mp4',
1381 'stretched_ratio': 16 / 9.,
556dbe7f 1382 'duration': 85,
6271f1ca
PH
1383 'upload_date': '20110310',
1384 'uploader_id': 'AllenMeow',
ec85ded8 1385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1386 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1387 'uploader': '孫ᄋᄅ',
6271f1ca 1388 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1389 'playable_in_embed': True,
1390 'channel': '孫ᄋᄅ',
1391 'age_limit': 0,
1392 'tags': 'count:11',
1393 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1394 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1395 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1396 'view_count': int,
1397 'categories': ['People & Blogs'],
1398 'like_count': int,
1399 'live_status': 'not_live',
1400 'availability': 'unlisted',
6c73052c 1401 'channel_follower_count': int
6271f1ca 1402 },
06b491eb
S
1403 },
1404 # url_encoded_fmt_stream_map is empty string
1405 {
1406 'url': 'qEJwOuvDf7I',
1407 'info_dict': {
1408 'id': 'qEJwOuvDf7I',
f57b7835 1409 'ext': 'webm',
06b491eb
S
1410 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1411 'description': '',
1412 'upload_date': '20150404',
1413 'uploader_id': 'spbelect',
1414 'uploader': 'Наблюдатели Петербурга',
1415 },
1416 'params': {
1417 'skip_download': 'requires avconv',
e323cf3f
S
1418 },
1419 'skip': 'This live event has ended.',
06b491eb 1420 },
067aa17e 1421 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1422 {
1423 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1424 'info_dict': {
1425 'id': 'FIl7x6_3R5Y',
eb6793ba 1426 'ext': 'webm',
da77d856
S
1427 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1428 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1429 'duration': 220,
da77d856
S
1430 'upload_date': '20150625',
1431 'uploader_id': 'dorappi2000',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1433 'uploader': 'dorappi2000',
eb6793ba 1434 'formats': 'mincount:31',
da77d856 1435 },
eb6793ba 1436 'skip': 'not actual anymore',
2ee8f5d8 1437 },
8a1a26ce
YCH
1438 # DASH manifest with segment_list
1439 {
1440 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1441 'md5': '8ce563a1d667b599d21064e982ab9e31',
1442 'info_dict': {
1443 'id': 'CsmdDsKjzN8',
1444 'ext': 'mp4',
17ee98e1 1445 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1446 'uploader': 'Airtek',
1447 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1448 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1449 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1450 },
1451 'params': {
1452 'youtube_include_dash_manifest': True,
1453 'format': '135', # bestvideo
be49068d
S
1454 },
1455 'skip': 'This live event has ended.',
2ee8f5d8 1456 },
cf7e015f
S
1457 {
1458 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1459 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1460 'info_dict': {
545cc85d 1461 'id': 'jvGDaLqkpTg',
1462 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1463 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1464 },
1465 'playlist': [{
1466 'info_dict': {
545cc85d 1467 'id': 'jvGDaLqkpTg',
cf7e015f 1468 'ext': 'mp4',
545cc85d 1469 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1470 'description': 'md5:e03b909557865076822aa169218d6a5d',
1471 'duration': 10643,
1472 'upload_date': '20161111',
1473 'uploader': 'Team PGP',
1474 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1475 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1476 },
1477 }, {
1478 'info_dict': {
545cc85d 1479 'id': '3AKt1R1aDnw',
cf7e015f 1480 'ext': 'mp4',
545cc85d 1481 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1482 'description': 'md5:e03b909557865076822aa169218d6a5d',
1483 'duration': 10991,
1484 'upload_date': '20161111',
1485 'uploader': 'Team PGP',
1486 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1487 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1488 },
1489 }, {
1490 'info_dict': {
545cc85d 1491 'id': 'RtAMM00gpVc',
cf7e015f 1492 'ext': 'mp4',
545cc85d 1493 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1494 'description': 'md5:e03b909557865076822aa169218d6a5d',
1495 'duration': 10995,
1496 'upload_date': '20161111',
1497 'uploader': 'Team PGP',
1498 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1499 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1500 },
1501 }, {
1502 'info_dict': {
545cc85d 1503 'id': '6N2fdlP3C5U',
cf7e015f 1504 'ext': 'mp4',
545cc85d 1505 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1506 'description': 'md5:e03b909557865076822aa169218d6a5d',
1507 'duration': 10990,
1508 'upload_date': '20161111',
1509 'uploader': 'Team PGP',
1510 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1511 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1512 },
1513 }],
1514 'params': {
1515 'skip_download': True,
1516 },
65c2fde2 1517 'skip': 'Not multifeed anymore',
cbaed4bb 1518 },
f9f49d87 1519 {
067aa17e 1520 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1521 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1522 'info_dict': {
1523 'id': 'gVfLd0zydlo',
1524 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1525 },
1526 'playlist_count': 2,
be49068d 1527 'skip': 'Not multifeed anymore',
f9f49d87 1528 },
cbaed4bb 1529 {
2d3d2997 1530 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1531 'only_matching': True,
0e49d9a6 1532 },
6d4fc66b 1533 {
2d3d2997 1534 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1535 'only_matching': True,
1536 },
0e49d9a6 1537 {
067aa17e 1538 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1539 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1540 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1541 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1542 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1543 'info_dict': {
1544 'id': 'lsguqyKfVQg',
1545 'ext': 'mp4',
1546 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1547 'alt_title': 'Dark Walk',
0e49d9a6 1548 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1549 'duration': 133,
0e49d9a6
LL
1550 'upload_date': '20151119',
1551 'uploader_id': 'IronSoulElf',
ec85ded8 1552 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1553 'uploader': 'IronSoulElf',
11f9be09 1554 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1555 'track': 'Dark Walk',
1556 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1557 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1558 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1559 'categories': ['Film & Animation'],
1560 'view_count': int,
1561 'live_status': 'not_live',
1562 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1564 'tags': 'count:13',
1565 'availability': 'public',
1566 'channel': 'IronSoulElf',
1567 'playable_in_embed': True,
1568 'like_count': int,
1569 'age_limit': 0,
6c73052c 1570 'channel_follower_count': int
0e49d9a6
LL
1571 },
1572 'params': {
1573 'skip_download': True,
1574 },
1575 },
61f92af1 1576 {
067aa17e 1577 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1578 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1579 'only_matching': True,
1580 },
313dfc45
LL
1581 {
1582 # Video with yt:stretch=17:0
1583 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1584 'info_dict': {
1585 'id': 'Q39EVAstoRM',
1586 'ext': 'mp4',
1587 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1588 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1589 'upload_date': '20151107',
1590 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1591 'uploader': 'CH GAMER DROID',
1592 },
1593 'params': {
1594 'skip_download': True,
1595 },
be49068d 1596 'skip': 'This video does not exist.',
313dfc45 1597 },
201c1459 1598 {
1599 # Video with incomplete 'yt:stretch=16:'
1600 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1601 'only_matching': True,
1602 },
7caf9830
S
1603 {
1604 # Video licensed under Creative Commons
1605 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1606 'info_dict': {
1607 'id': 'M4gD1WSo5mA',
1608 'ext': 'mp4',
1609 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1610 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1611 'duration': 721,
17322130 1612 'upload_date': '20150128',
7caf9830 1613 'uploader_id': 'BerkmanCenter',
ec85ded8 1614 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1615 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1616 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1617 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1618 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1619 'like_count': int,
1620 'age_limit': 0,
1621 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1622 'channel': 'The Berkman Klein Center for Internet & Society',
1623 'availability': 'public',
1624 'view_count': int,
1625 'categories': ['Education'],
1626 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1627 'live_status': 'not_live',
1628 'playable_in_embed': True,
6c73052c 1629 'channel_follower_count': int
7caf9830
S
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
fd050249
S
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1643 'duration': 4060,
17322130 1644 'upload_date': '20151120',
eb6793ba 1645 'uploader': 'Bernie Sanders',
fd050249 1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1648 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
6c73052c 1661 'channel_follower_count': int
fd050249
S
1662 },
1663 'params': {
1664 'skip_download': True,
1665 },
1666 },
040ac686
S
1667 {
1668 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1669 'only_matching': True,
7f29cf54
S
1670 },
1671 {
067aa17e 1672 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1673 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1674 'only_matching': True,
6496ccb4
S
1675 },
1676 {
1677 # Rental video preview
1678 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1679 'info_dict': {
1680 'id': 'uGpuVWrhIzE',
1681 'ext': 'mp4',
1682 'title': 'Piku - Trailer',
1683 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1684 'upload_date': '20150811',
1685 'uploader': 'FlixMatrix',
1686 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1687 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1688 'license': 'Standard YouTube License',
1689 },
1690 'params': {
1691 'skip_download': True,
1692 },
eb6793ba 1693 'skip': 'This video is not available.',
022a5d66 1694 },
12afdc2a
S
1695 {
1696 # YouTube Red video with episode data
1697 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1698 'info_dict': {
1699 'id': 'iqKdEhx-dD4',
1700 'ext': 'mp4',
1701 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1702 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1703 'duration': 2085,
12afdc2a
S
1704 'upload_date': '20170118',
1705 'uploader': 'Vsauce',
1706 'uploader_id': 'Vsauce',
1707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1708 'series': 'Mind Field',
1709 'season_number': 1,
1710 'episode_number': 1,
976ae3ea 1711 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1712 'tags': 'count:12',
1713 'view_count': int,
1714 'availability': 'public',
1715 'age_limit': 0,
1716 'channel': 'Vsauce',
1717 'episode': 'Episode 1',
1718 'categories': ['Entertainment'],
1719 'season': 'Season 1',
1720 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1721 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'like_count': int,
1723 'playable_in_embed': True,
1724 'live_status': 'not_live',
6c73052c 1725 'channel_follower_count': int
12afdc2a
S
1726 },
1727 'params': {
1728 'skip_download': True,
1729 },
1730 'expected_warnings': [
1731 'Skipping DASH manifest',
1732 ],
1733 },
c7121fa7
S
1734 {
1735 # The following content has been identified by the YouTube community
1736 # as inappropriate or offensive to some audiences.
1737 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1738 'info_dict': {
1739 'id': '6SJNVb0GnPI',
1740 'ext': 'mp4',
1741 'title': 'Race Differences in Intelligence',
1742 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1743 'duration': 965,
1744 'upload_date': '20140124',
1745 'uploader': 'New Century Foundation',
1746 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1747 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1748 },
1749 'params': {
1750 'skip_download': True,
1751 },
545cc85d 1752 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1753 },
022a5d66
S
1754 {
1755 # itag 212
1756 'url': '1t24XAntNCY',
1757 'only_matching': True,
fd5c4aab
S
1758 },
1759 {
1760 # geo restricted to JP
1761 'url': 'sJL6WA-aGkQ',
1762 'only_matching': True,
1763 },
cd5a74a2
S
1764 {
1765 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1766 'only_matching': True,
1767 },
bc2ca1bb 1768 {
1769 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1770 'only_matching': True,
1771 },
1772 {
1773 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1774 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1775 'only_matching': True,
1776 },
825cd268
RA
1777 {
1778 # DRM protected
1779 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1780 'only_matching': True,
4fe54c12
S
1781 },
1782 {
1783 # Video with unsupported adaptive stream type formats
1784 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1785 'info_dict': {
1786 'id': 'Z4Vy8R84T1U',
1787 'ext': 'mp4',
1788 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1789 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1790 'duration': 433,
1791 'upload_date': '20130923',
1792 'uploader': 'Amelia Putri Harwita',
1793 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1795 'formats': 'maxcount:10',
1796 },
1797 'params': {
1798 'skip_download': True,
1799 'youtube_include_dash_manifest': False,
1800 },
5429d6a9 1801 'skip': 'not actual anymore',
5caabd3c 1802 },
1803 {
822b9d9c 1804 # Youtube Music Auto-generated description
5caabd3c 1805 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1806 'info_dict': {
1807 'id': 'MgNrAu2pzNs',
1808 'ext': 'mp4',
1809 'title': 'Voyeur Girl',
1810 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1811 'upload_date': '20190312',
5429d6a9
S
1812 'uploader': 'Stephen - Topic',
1813 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1814 'artist': 'Stephen',
1815 'track': 'Voyeur Girl',
1816 'album': 'it\'s too much love to know my dear',
1817 'release_date': '20190313',
1818 'release_year': 2019,
976ae3ea 1819 'alt_title': 'Voyeur Girl',
1820 'view_count': int,
1821 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1822 'playable_in_embed': True,
1823 'like_count': int,
1824 'categories': ['Music'],
1825 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1826 'channel': 'Stephen',
1827 'availability': 'public',
1828 'creator': 'Stephen',
1829 'duration': 169,
1830 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1831 'age_limit': 0,
1832 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1833 'tags': 'count:11',
1834 'live_status': 'not_live',
6c73052c 1835 'channel_follower_count': int
5caabd3c 1836 },
1837 'params': {
1838 'skip_download': True,
1839 },
1840 },
66b48727
RA
1841 {
1842 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1843 'only_matching': True,
1844 },
011e75e6
S
1845 {
1846 # invalid -> valid video id redirection
1847 'url': 'DJztXj2GPfl',
1848 'info_dict': {
1849 'id': 'DJztXj2GPfk',
1850 'ext': 'mp4',
1851 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1852 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1853 'upload_date': '20090125',
1854 'uploader': 'Prochorowka',
1855 'uploader_id': 'Prochorowka',
1856 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1857 'artist': 'Panjabi MC',
1858 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1859 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1860 },
1861 'params': {
1862 'skip_download': True,
1863 },
545cc85d 1864 'skip': 'Video unavailable',
ea74e00b
DP
1865 },
1866 {
1867 # empty description results in an empty string
1868 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1869 'info_dict': {
1870 'id': 'x41yOUIvK2k',
1871 'ext': 'mp4',
1872 'title': 'IMG 3456',
1873 'description': '',
1874 'upload_date': '20170613',
1875 'uploader_id': 'ElevageOrVert',
1876 'uploader': 'ElevageOrVert',
976ae3ea 1877 'view_count': int,
1878 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1879 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1880 'like_count': int,
1881 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1882 'tags': [],
1883 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1884 'availability': 'public',
1885 'age_limit': 0,
1886 'categories': ['Pets & Animals'],
1887 'duration': 7,
1888 'playable_in_embed': True,
1889 'live_status': 'not_live',
1890 'channel': 'ElevageOrVert',
6c73052c 1891 'channel_follower_count': int
ea74e00b
DP
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 },
a0566bbf 1897 {
29f7c58a 1898 # with '};' inside yt initial data (see [1])
1899 # see [2] for an example with '};' inside ytInitialPlayerResponse
1900 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1901 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1902 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1903 'info_dict': {
1904 'id': 'CHqg6qOn4no',
1905 'ext': 'mp4',
1906 'title': 'Part 77 Sort a list of simple types in c#',
1907 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1908 'upload_date': '20130831',
1909 'uploader_id': 'kudvenkat',
1910 'uploader': 'kudvenkat',
976ae3ea 1911 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1912 'like_count': int,
1913 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1914 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1915 'live_status': 'not_live',
1916 'categories': ['Education'],
1917 'availability': 'public',
1918 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1919 'tags': 'count:12',
1920 'playable_in_embed': True,
1921 'age_limit': 0,
1922 'view_count': int,
1923 'duration': 522,
1924 'channel': 'kudvenkat',
6c73052c 1925 'channel_follower_count': int
a0566bbf 1926 },
1927 'params': {
1928 'skip_download': True,
1929 },
1930 },
29f7c58a 1931 {
1932 # another example of '};' in ytInitialData
1933 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1934 'only_matching': True,
1935 },
1936 {
1937 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1938 'only_matching': True,
1939 },
545cc85d 1940 {
cc2db878 1941 # https://github.com/ytdl-org/youtube-dl/pull/28094
1942 'url': 'OtqTfy26tG0',
1943 'info_dict': {
1944 'id': 'OtqTfy26tG0',
1945 'ext': 'mp4',
1946 'title': 'Burn Out',
1947 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1948 'upload_date': '20141120',
1949 'uploader': 'The Cinematic Orchestra - Topic',
1950 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1951 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1952 'artist': 'The Cinematic Orchestra',
1953 'track': 'Burn Out',
1954 'album': 'Every Day',
976ae3ea 1955 'like_count': int,
1956 'live_status': 'not_live',
1957 'alt_title': 'Burn Out',
1958 'duration': 614,
1959 'age_limit': 0,
1960 'view_count': int,
1961 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1962 'creator': 'The Cinematic Orchestra',
1963 'channel': 'The Cinematic Orchestra',
1964 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1965 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1966 'availability': 'public',
1967 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1968 'categories': ['Music'],
1969 'playable_in_embed': True,
6c73052c 1970 'channel_follower_count': int
cc2db878 1971 },
1972 'params': {
1973 'skip_download': True,
1974 },
545cc85d 1975 },
bc2ca1bb 1976 {
1977 # controversial video, only works with bpctr when authenticated with cookies
1978 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1979 'only_matching': True,
1980 },
a1a7907b 1981 {
1982 # controversial video, requires bpctr/contentCheckOk
1983 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1984 'info_dict': {
1985 'id': 'SZJvDhaSDnc',
1986 'ext': 'mp4',
1987 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1988 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1989 'uploader': 'CBS Mornings',
11f9be09 1990 'uploader_id': 'CBSThisMorning',
a1a7907b 1991 'upload_date': '20140716',
976ae3ea 1992 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1993 'duration': 170,
1994 'categories': ['News & Politics'],
1995 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1996 'view_count': int,
1997 'channel': 'CBS Mornings',
1998 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1999 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2000 'age_limit': 18,
2001 'availability': 'needs_auth',
2002 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2003 'like_count': int,
2004 'live_status': 'not_live',
2005 'playable_in_embed': True,
6c73052c 2006 'channel_follower_count': int
a1a7907b 2007 }
2008 },
f7ad7160 2009 {
2010 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2011 'url': 'cBvYw8_A0vQ',
2012 'info_dict': {
2013 'id': 'cBvYw8_A0vQ',
2014 'ext': 'mp4',
2015 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2016 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2017 'upload_date': '20201120',
2018 'uploader': 'Walk around Japan',
2019 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2020 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2021 'duration': 1456,
2022 'categories': ['Travel & Events'],
2023 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2024 'view_count': int,
2025 'channel': 'Walk around Japan',
2026 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2027 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2028 'age_limit': 0,
2029 'availability': 'public',
2030 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2031 'live_status': 'not_live',
2032 'playable_in_embed': True,
6c73052c 2033 'channel_follower_count': int
f7ad7160 2034 },
2035 'params': {
2036 'skip_download': True,
2037 },
0fb983f6 2038 }, {
2039 # Has multiple audio streams
2040 'url': 'WaOKSUlf4TM',
2041 'only_matching': True
9297939e 2042 }, {
2043 # Requires Premium: has format 141 when requested using YTM url
2044 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2045 'only_matching': True
2046 }, {
120916da 2047 # multiple subtitles with same lang_code
2048 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2049 'only_matching': True,
109dd3b2 2050 }, {
2051 # Force use android client fallback
2052 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2053 'info_dict': {
2054 'id': 'YOelRv7fMxY',
11f9be09 2055 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2056 'ext': '3gp',
2057 'upload_date': '20210624',
2058 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2059 'uploader': 'colinfurze',
11f9be09 2060 'uploader_id': 'colinfurze',
109dd3b2 2061 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2062 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2063 'duration': 596,
2064 'categories': ['Entertainment'],
2065 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2066 'view_count': int,
2067 'channel': 'colinfurze',
2068 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2069 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2070 'age_limit': 0,
2071 'availability': 'public',
2072 'like_count': int,
2073 'live_status': 'not_live',
2074 'playable_in_embed': True,
6c73052c 2075 'channel_follower_count': int
109dd3b2 2076 },
2077 'params': {
2078 'format': '17', # 3gp format available on android
2079 'extractor_args': {'youtube': {'player_client': ['android']}},
2080 },
120916da 2081 },
109dd3b2 2082 {
2083 # Skip download of additional client configs (remix client config in this case)
2084 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2085 'only_matching': True,
2086 'params': {
2087 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2088 },
8fc54b12 2089 }, {
2090 # shorts
2091 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2092 'only_matching': True,
9222c381 2093 }, {
2094 'note': 'Storyboards',
2095 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2096 'info_dict': {
2097 'id': '5KLPxDtMqe8',
2098 'ext': 'mhtml',
2099 'format_id': 'sb0',
2100 'title': 'Your Brain is Plastic',
2101 'uploader_id': 'scishow',
2102 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2103 'upload_date': '20140324',
2104 'uploader': 'SciShow',
976ae3ea 2105 'like_count': int,
2106 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2107 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2108 'view_count': int,
2109 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2110 'playable_in_embed': True,
2111 'tags': 'count:12',
2112 'uploader_url': 'http://www.youtube.com/user/scishow',
2113 'availability': 'public',
2114 'channel': 'SciShow',
2115 'live_status': 'not_live',
2116 'duration': 248,
2117 'categories': ['Education'],
2118 'age_limit': 0,
6c73052c 2119 'channel_follower_count': int
9222c381 2120 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2121 }, {
2122 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2123 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2124 'info_dict': {
2125 'id': '2NUZ8W2llS4',
2126 'ext': 'mp4',
2127 'title': 'The NP that test your phone performance 🙂',
2128 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2129 'uploader': 'Leon Nguyen',
2130 'uploader_id': 'VNSXIII',
2131 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2132 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2133 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2134 'duration': 21,
2135 'view_count': int,
2136 'age_limit': 0,
2137 'categories': ['Gaming'],
2138 'tags': 'count:23',
2139 'playable_in_embed': True,
2140 'live_status': 'not_live',
2141 'upload_date': '20220103',
2142 'like_count': int,
2143 'availability': 'public',
2144 'channel': 'Leon Nguyen',
2145 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2146 'channel_follower_count': int
2147 }
2148 }, {
2149 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2150 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2151 'info_dict': {
2152 'id': 'mzZzzBU6lrM',
2153 'ext': 'mp4',
2154 'title': 'I Met GeorgeNotFound In Real Life...',
2155 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2156 'uploader': 'Quackity',
2157 'uploader_id': 'QuackityHQ',
2158 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2159 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2160 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2161 'duration': 955,
2162 'view_count': int,
2163 'age_limit': 0,
2164 'categories': ['Entertainment'],
2165 'tags': 'count:26',
2166 'playable_in_embed': True,
2167 'live_status': 'not_live',
2168 'release_timestamp': 1641172509,
2169 'release_date': '20220103',
2170 'upload_date': '20220103',
2171 'like_count': int,
2172 'availability': 'public',
2173 'channel': 'Quackity',
2174 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2175 'channel_follower_count': int
2176 }
2177 },
2178 { # continuous livestream. Microformat upload date should be preferred.
2179 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2180 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2181 'info_dict': {
2182 'id': 'kgx4WGK0oNU',
2183 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2184 'ext': 'mp4',
2185 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2186 'availability': 'public',
2187 'age_limit': 0,
2188 'release_timestamp': 1637975704,
2189 'upload_date': '20210619',
2190 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2191 'live_status': 'is_live',
2192 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2193 'uploader': '阿鲍Abao',
2194 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2195 'channel': 'Abao in Tokyo',
2196 'channel_follower_count': int,
2197 'release_date': '20211127',
2198 'tags': 'count:39',
2199 'categories': ['People & Blogs'],
2200 'like_count': int,
2201 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2202 'view_count': int,
2203 'playable_in_embed': True,
2204 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2205 },
2206 'params': {'skip_download': True}
6e634cbe 2207 }, {
2208 # Story. Requires specific player params to work.
2209 # Note: stories get removed after some period of time
ee27297f 2210 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2211 'info_dict': {
ee27297f 2212 'id': 'vv8qTUWmulI',
6e634cbe 2213 'ext': 'mp4',
ee27297f 2214 'availability': 'unlisted',
2215 'view_count': int,
2216 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2217 'upload_date': '20220526',
2218 'categories': ['Education'],
2219 'title': 'Story',
2220 'channel': 'IT\'S HISTORY',
2221 'description': '',
2222 'uploader_id': 'BlastfromthePast',
2223 'duration': 12,
2224 'uploader': 'IT\'S HISTORY',
6e634cbe 2225 'playable_in_embed': True,
6e634cbe 2226 'age_limit': 0,
6e634cbe 2227 'live_status': 'not_live',
ee27297f 2228 'tags': [],
2229 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2230 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2231 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2232 }
2233 }, {
2234 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2235 'info_dict': {
2236 'id': 'tjjjtzRLHvA',
2237 'ext': 'mp4',
2238 'title': 'ハッシュタグ無し };if window.ytcsi',
2239 'upload_date': '20220323',
2240 'like_count': int,
2241 'availability': 'unlisted',
2242 'channel': 'nao20010128nao',
2243 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2244 'age_limit': 0,
2245 'uploader': 'nao20010128nao',
2246 'uploader_id': 'nao20010128nao',
2247 'categories': ['Music'],
6e634cbe 2248 'view_count': int,
2249 'description': '',
ee27297f 2250 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2251 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2252 'live_status': 'not_live',
2253 'playable_in_embed': True,
2254 'channel_follower_count': int,
2255 'duration': 6,
2256 'tags': [],
2257 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2258 }
2259 }
2eb88d95
PH
2260 ]
2261
201c1459 2262 @classmethod
2263 def suitable(cls, url):
4dfbf869 2264 from ..utils import parse_qs
2265
201c1459 2266 qs = parse_qs(url)
2267 if qs.get('list', [None])[0]:
2268 return False
86e5f3ed 2269 return super().suitable(url)
201c1459 2270
e0df6211 2271 def __init__(self, *args, **kwargs):
86e5f3ed 2272 super().__init__(*args, **kwargs)
545cc85d 2273 self._code_cache = {}
83799698 2274 self._player_cache = {}
e0df6211 2275
adbc4ec4 2276 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2277 lock = threading.Lock()
2278
2279 is_live = True
185bf310 2280 start_time = time.time()
adbc4ec4
THD
2281 formats = [f for f in formats if f.get('is_from_start')]
2282
185bf310 2283 def refetch_manifest(format_id, delay):
2284 nonlocal formats, start_time, is_live
2285 if time.time() <= start_time + delay:
adbc4ec4
THD
2286 return
2287
2288 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2289 video_details = traverse_obj(
2290 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2291 microformats = traverse_obj(
2292 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2293 expected_type=dict, default=[])
2294 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2295 start_time = time.time()
adbc4ec4 2296
185bf310 2297 def mpd_feed(format_id, delay):
adbc4ec4
THD
2298 """
2299 @returns (manifest_url, manifest_stream_number, is_live) or None
2300 """
2301 with lock:
185bf310 2302 refetch_manifest(format_id, delay)
adbc4ec4
THD
2303
2304 f = next((f for f in formats if f['format_id'] == format_id), None)
2305 if not f:
185bf310 2306 if not is_live:
2307 self.to_screen(f'{video_id}: Video is no longer live')
2308 else:
2309 self.report_warning(
2310 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2311 return None
2312 return f['manifest_url'], f['manifest_stream_number'], is_live
2313
2314 for f in formats:
a539f065 2315 f['is_live'] = True
adbc4ec4
THD
2316 f['protocol'] = 'http_dash_segments_generator'
2317 f['fragments'] = functools.partial(
2318 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2319
2320 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2321 FETCH_SPAN, MAX_DURATION = 5, 432000
2322
2323 mpd_url, stream_number, is_live = None, None, True
2324
2325 begin_index = 0
2326 download_start_time = ctx.get('start') or time.time()
2327
2328 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2329 if lack_early_segments:
2330 self.report_warning(bug_reports_message(
2331 'Starting download from the last 120 hours of the live stream since '
2332 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2333 lack_early_segments = True
2334
2335 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2336 fragments, fragment_base_url = None, None
2337
a539f065 2338 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2339 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2340 # Obtain from MPD's maximum seq value
2341 old_mpd_url = mpd_url
185bf310 2342 last_error = ctx.pop('last_error', None)
a539f065 2343 expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
185bf310 2344 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2345 or (mpd_url, stream_number, False))
2346 if not refresh_sequence:
2347 if expire_fast and not is_live:
2348 return False, last_seq
2349 elif old_mpd_url == mpd_url:
2350 return True, last_seq
adbc4ec4
THD
2351 try:
2352 fmts, _ = self._extract_mpd_formats_and_subtitles(
2353 mpd_url, None, note=False, errnote=False, fatal=False)
2354 except ExtractorError:
2355 fmts = None
2356 if not fmts:
a539f065 2357 no_fragment_score += 2
adbc4ec4
THD
2358 return False, last_seq
2359 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2360 fragments = fmt_info['fragments']
2361 fragment_base_url = fmt_info['fragment_base_url']
2362 assert fragment_base_url
2363
2364 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2365 return True, _last_seq
2366
2367 while is_live:
2368 fetch_time = time.time()
2369 if no_fragment_score > 30:
2370 return
2371 if last_segment_url:
2372 # Obtain from "X-Head-Seqnum" header value from each segment
2373 try:
2374 urlh = self._request_webpage(
2375 last_segment_url, None, note=False, errnote=False, fatal=False)
2376 except ExtractorError:
2377 urlh = None
2378 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2379 if last_seq is None:
a539f065 2380 no_fragment_score += 2
adbc4ec4
THD
2381 last_segment_url = None
2382 continue
2383 else:
a539f065
LNO
2384 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2385 no_fragment_score += 2
185bf310 2386 if not should_continue:
adbc4ec4
THD
2387 continue
2388
2389 if known_idx > last_seq:
2390 last_segment_url = None
2391 continue
2392
2393 last_seq += 1
2394
2395 if begin_index < 0 and known_idx < 0:
2396 # skip from the start when it's negative value
2397 known_idx = last_seq + begin_index
2398 if lack_early_segments:
2399 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2400 try:
2401 for idx in range(known_idx, last_seq):
2402 # do not update sequence here or you'll get skipped some part of it
a539f065 2403 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2404 if not should_continue:
adbc4ec4
THD
2405 known_idx = idx - 1
2406 raise ExtractorError('breaking out of outer loop')
2407 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2408 yield {
2409 'url': last_segment_url,
2410 }
2411 if known_idx == last_seq:
2412 no_fragment_score += 5
2413 else:
2414 no_fragment_score = 0
2415 known_idx = last_seq
2416 except ExtractorError:
2417 continue
2418
2419 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2420
b6de707d 2421 def _extract_player_url(self, *ytcfgs, webpage=None):
2422 player_url = traverse_obj(
2423 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2424 get_all=False, expected_type=compat_str)
11f9be09 2425 if not player_url:
b6de707d 2426 return
60f393e4 2427 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2428
b6de707d 2429 def _download_player_url(self, video_id, fatal=False):
2430 res = self._download_webpage(
2431 'https://www.youtube.com/iframe_api',
2432 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2433 if res:
2434 player_version = self._search_regex(
2435 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2436 if player_version:
2437 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2438
60064c53
PH
2439 def _signature_cache_id(self, example_sig):
2440 """ Return a string representation of a signature """
78caa52a 2441 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 2442
e40c758c
S
2443 @classmethod
2444 def _extract_player_info(cls, player_url):
2445 for player_re in cls._PLAYER_INFO_RE:
2446 id_m = re.search(player_re, player_url)
2447 if id_m:
2448 break
2449 else:
c081b35c 2450 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2451 return id_m.group('id')
e40c758c 2452
404f611f 2453 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2454 player_id = self._extract_player_info(player_url)
2455 if player_id not in self._code_cache:
1276a43a 2456 code = self._download_webpage(
109dd3b2 2457 player_url, video_id, fatal=fatal,
2458 note='Downloading player ' + player_id,
2459 errnote='Download of %s failed' % player_url)
1276a43a 2460 if code:
2461 self._code_cache[player_id] = code
404f611f 2462 return self._code_cache.get(player_id)
109dd3b2 2463
e40c758c 2464 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2465 player_id = self._extract_player_info(player_url)
e0df6211 2466
c4417ddb 2467 # Read from filesystem cache
86e5f3ed 2468 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2469 assert os.path.basename(func_id) == func_id
a0e07d31 2470
69ea8ca4 2471 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 2472 if cache_spec is not None:
78caa52a 2473 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2474
404f611f 2475 code = self._load_player(video_id, player_url)
2476 if code:
109dd3b2 2477 res = self._parse_sig_js(code)
e0df6211 2478
109dd3b2 2479 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2480 cache_res = res(test_string)
2481 cache_spec = [ord(c) for c in cache_res]
83799698 2482
109dd3b2 2483 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2484 return res
83799698 2485
60064c53 2486 def _print_sig_code(self, func, example_sig):
404f611f 2487 if not self.get_param('youtube_print_sig_code'):
2488 return
2489
edf3e38e
PH
2490 def gen_sig_code(idxs):
2491 def _genslice(start, end, step):
78caa52a 2492 starts = '' if start == 0 else str(start)
8bcc8756 2493 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2494 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2495 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2496
2497 step = None
7af808a5
PH
2498 # Quelch pyflakes warnings - start will be set when step is set
2499 start = '(Never used)'
edf3e38e
PH
2500 for i, prev in zip(idxs[1:], idxs[:-1]):
2501 if step is not None:
2502 if i - prev == step:
2503 continue
2504 yield _genslice(start, prev, step)
2505 step = None
2506 continue
2507 if i - prev in [-1, 1]:
2508 step = i - prev
2509 start = prev
2510 continue
2511 else:
78caa52a 2512 yield 's[%d]' % prev
edf3e38e 2513 if step is None:
78caa52a 2514 yield 's[%d]' % i
edf3e38e
PH
2515 else:
2516 yield _genslice(start, i, step)
2517
78caa52a 2518 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2519 cache_res = func(test_string)
edf3e38e 2520 cache_spec = [ord(c) for c in cache_res]
78caa52a 2521 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2522 signature_id_tuple = '(%s)' % (
2523 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2524 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2525 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2526 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2527
e0df6211
PH
2528 def _parse_sig_js(self, jscode):
2529 funcname = self._search_regex(
abefc03f
S
2530 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2531 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2532 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2533 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2534 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2535 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2536 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2537 # Obsolete patterns
2538 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2539 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2540 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2541 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2542 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2543 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2544 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2545 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2546 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2547
2548 jsi = JSInterpreter(jscode)
2549 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2550 return lambda s: initial_function([s])
2551
545cc85d 2552 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2553 """Turn the encrypted s field into a working signature"""
c8bf86d5 2554 try:
62af3a0e 2555 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5 2556 if player_id not in self._player_cache:
52023f12 2557 func = self._extract_signature_function(video_id, player_url, s)
c8bf86d5
PH
2558 self._player_cache[player_id] = func
2559 func = self._player_cache[player_id]
404f611f 2560 self._print_sig_code(func, s)
c8bf86d5
PH
2561 return func(s)
2562 except Exception as e:
52023f12 2563 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2564
2565 def _decrypt_nsig(self, s, video_id, player_url):
2566 """Turn the encrypted n field into a working signature"""
2567 if player_url is None:
2568 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2569 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2570
2571 sig_id = ('nsig_value', s)
2572 if sig_id in self._player_cache:
2573 return self._player_cache[sig_id]
2574
2575 try:
2576 player_id = ('nsig', player_url)
2577 if player_id not in self._player_cache:
2578 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2579 func = self._player_cache[player_id]
2580 self._player_cache[sig_id] = func(s)
2581 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2582 return self._player_cache[sig_id]
2583 except Exception as e:
aa9369a2 2584 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2585
2586 def _extract_n_function_name(self, jscode):
48416bc4 2587 nfunc, idx = self._search_regex(
c571b3a6 2588 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
48416bc4 2589 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2590 if not idx:
2591 return nfunc
2592 return json.loads(js_to_json(self._search_regex(
a7d4acc0 2593 rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
48416bc4 2594 f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
404f611f 2595
2596 def _extract_n_function(self, video_id, player_url):
2597 player_id = self._extract_player_info(player_url)
2598 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2599
2600 if func_code:
2601 jsi = JSInterpreter(func_code)
2602 else:
2603 jscode = self._load_player(video_id, player_url)
2604 funcname = self._extract_n_function_name(jscode)
2605 jsi = JSInterpreter(jscode)
2606 func_code = jsi.extract_function_code(funcname)
2607 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2608
2609 if self.get_param('youtube_print_sig_code'):
2610 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2611
2612 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2613
109dd3b2 2614 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2615 """
2616 Extract signatureTimestamp (sts)
2617 Required to tell API what sig/player version is in use.
2618 """
2619 sts = None
2620 if isinstance(ytcfg, dict):
2621 sts = int_or_none(ytcfg.get('STS'))
2622
2623 if not sts:
2624 # Attempt to extract from player
2625 if player_url is None:
2626 error_msg = 'Cannot extract signature timestamp without player_url.'
2627 if fatal:
2628 raise ExtractorError(error_msg)
2629 self.report_warning(error_msg)
2630 return
404f611f 2631 code = self._load_player(video_id, player_url, fatal=fatal)
2632 if code:
109dd3b2 2633 sts = int_or_none(self._search_regex(
2634 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2635 'JS player signature timestamp', group='sts', fatal=fatal))
2636 return sts
2637
11f9be09 2638 def _mark_watched(self, video_id, player_responses):
9222c381 2639 playback_url = get_first(
2640 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2641 expected_type=url_or_none)
d77ab8e2 2642 if not playback_url:
352d63fd 2643 self.report_warning('Unable to mark watched')
d77ab8e2
S
2644 return
2645 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2646 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2647
2648 # cpn generation algorithm is reverse engineered from base.js.
2649 # In fact it works even with dummy cpn.
2650 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
86e5f3ed 2651 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
d77ab8e2
S
2652
2653 qs.update({
2654 'ver': ['2'],
2655 'cpn': [cpn],
2656 })
2657 playback_url = compat_urlparse.urlunparse(
15707c7e 2658 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2659
2660 self._download_webpage(
2661 playback_url, video_id, 'Marking watched',
2662 'Unable to mark watched', fatal=False)
2663
66c9fa36
S
2664 @staticmethod
2665 def _extract_urls(webpage):
2666 # Embedded YouTube player
2667 entries = [
2668 unescapeHTML(mobj.group('url'))
2669 for mobj in re.finditer(r'''(?x)
2670 (?:
2671 <iframe[^>]+?src=|
2672 data-video-url=|
2673 <embed[^>]+?src=|
2674 embedSWF\(?:\s*|
2675 <object[^>]+data=|
2676 new\s+SWFObject\(
2677 )
2678 (["\'])
2679 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2680 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2681 \1''', webpage)]
2682
2683 # lazyYT YouTube embed
2684 entries.extend(list(map(
2685 unescapeHTML,
2686 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2687
2688 # Wordpress "YouTube Video Importer" plugin
2689 matches = re.findall(r'''(?x)<div[^>]+
2690 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2691 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2692 entries.extend(m[-1] for m in matches)
2693
2694 return entries
2695
2696 @staticmethod
2697 def _extract_url(webpage):
2698 urls = YoutubeIE._extract_urls(webpage)
2699 return urls[0] if urls else None
2700
97665381
PH
2701 @classmethod
2702 def extract_id(cls, url):
2703 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2704 if mobj is None:
69ea8ca4 2705 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2706 return mobj.group('id')
c5e8d7af 2707
7c365c21 2708 def _extract_chapters_from_json(self, data, duration):
2709 chapter_list = traverse_obj(
2710 data, (
2711 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2712 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2713 ), expected_type=list)
2714
2715 return self._extract_chapters(
2716 chapter_list,
2717 chapter_time=lambda chapter: float_or_none(
2718 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2719 chapter_title=lambda chapter: traverse_obj(
2720 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2721 duration=duration)
2722
2723 def _extract_chapters_from_engagement_panel(self, data, duration):
2724 content_list = traverse_obj(
8bdd16b4 2725 data,
7c365c21 2726 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2727 expected_type=list, default=[])
052e1350 2728 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2729 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2730
1890fc63 2731 return next(filter(None, (
2732 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2733 chapter_time, chapter_title, duration)
2734 for contents in content_list)), [])
7c365c21 2735
1890fc63 2736 def _extract_chapters_from_description(self, description, duration):
2737 return self._extract_chapters(
2738 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2739 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2740 duration=duration, strict=False)
84213ea8 2741
1890fc63 2742 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2743 if not duration:
2744 return
2745 chapter_list = [{
2746 'start_time': chapter_time(chapter),
2747 'title': chapter_title(chapter),
2748 } for chapter in chapter_list or []]
2749 if not strict:
2750 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2751
2752 chapters = [{'start_time': 0, 'title': '<Untitled>'}]
2753 for idx, chapter in enumerate(chapter_list):
2754 if chapter['start_time'] is None or not chapter['title']:
2755 self.report_warning(f'Incomplete chapter {idx}')
2756 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2757 chapters[-1]['end_time'] = chapter['start_time']
2758 chapters.append(chapter)
2759 else:
2760 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2761 chapters[-1]['end_time'] = duration
2762 return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
84213ea8 2763
a1c5d2ca
M
2764 def _extract_comment(self, comment_renderer, parent=None):
2765 comment_id = comment_renderer.get('commentId')
2766 if not comment_id:
2767 return
fe93e2c4 2768
052e1350 2769 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2770
49bd8c66 2771 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2772 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2773 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2774 author_id = try_get(comment_renderer,
2775 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2776
49bd8c66 2777 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2778 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2779 author_thumbnail = try_get(comment_renderer,
2780 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2781
2782 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2783 is_favorited = 'creatorHeart' in (try_get(
2784 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2785 return {
2786 'id': comment_id,
2787 'text': text,
d92f5d5a 2788 'timestamp': timestamp,
a1c5d2ca
M
2789 'time_text': time_text,
2790 'like_count': votes,
97524332 2791 'is_favorited': is_favorited,
a1c5d2ca
M
2792 'author': author,
2793 'author_id': author_id,
2794 'author_thumbnail': author_thumbnail,
2795 'author_is_uploader': author_is_uploader,
2796 'parent': parent or 'root'
2797 }
2798
46383212 2799 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2800
2801 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2802
2803 def extract_header(contents):
2d6659b9 2804 _continuation = None
2805 for content in contents:
46383212 2806 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2807 expected_comment_count = self._get_count(
2808 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2809
2d6659b9 2810 if expected_comment_count:
46383212 2811 tracker['est_total'] = expected_comment_count
2812 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2813 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2814
2815 sort_menu_item = try_get(
2816 comments_header_renderer,
2817 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2818 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2819
2820 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2821 if not _continuation:
2822 continue
2823
46383212 2824 sort_text = str_or_none(sort_menu_item.get('title'))
2825 if not sort_text:
2d6659b9 2826 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2827 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2828 break
a2160aa4 2829 return _continuation
a1c5d2ca 2830
2d6659b9 2831 def extract_thread(contents):
a1c5d2ca 2832 if not parent:
46383212 2833 tracker['current_page_thread'] = 0
a1c5d2ca 2834 for content in contents:
46383212 2835 if not parent and tracker['total_parent_comments'] >= max_parents:
2836 yield
a1c5d2ca 2837 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2838 comment_renderer = get_first(
2839 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2840 expected_type=dict, default={})
a1c5d2ca 2841
a1c5d2ca
M
2842 comment = self._extract_comment(comment_renderer, parent)
2843 if not comment:
2844 continue
46383212 2845
2846 tracker['running_total'] += 1
2847 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2848 yield comment
46383212 2849
a1c5d2ca
M
2850 # Attempt to get the replies
2851 comment_replies_renderer = try_get(
2852 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2853
2854 if comment_replies_renderer:
46383212 2855 tracker['current_page_thread'] += 1
a1c5d2ca 2856 comment_entries_iter = self._comment_entries(
99e9e001 2857 comment_replies_renderer, ytcfg, video_id,
46383212 2858 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2859 yield from itertools.islice(comment_entries_iter, min(
2860 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2861
46383212 2862 # Keeps track of counts across recursive calls
2863 if not tracker:
2864 tracker = dict(
2865 running_total=0,
2866 est_total=0,
2867 current_page_thread=0,
2868 total_parent_comments=0,
2869 total_reply_comments=0)
2870
2871 # TODO: Deprecated
2d6659b9 2872 # YouTube comments have a max depth of 2
46383212 2873 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2874 if max_depth:
2875 self._downloader.deprecation_warning(
2876 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2877 if max_depth == 1 and parent:
2878 return
a1c5d2ca 2879
46383212 2880 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2881 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2882
46383212 2883 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2884
46383212 2885 response = None
6e634cbe 2886 is_forced_continuation = False
2d6659b9 2887 is_first_continuation = parent is None
6e634cbe 2888 if is_first_continuation and not continuation:
2889 # Sometimes you can get comments by generating the continuation yourself,
2890 # even if YouTube initially reports them being disabled - e.g. stories comments.
2891 # Note: if the comment section is actually disabled, YouTube may return a response with
2892 # required check_get_keys missing. So we will disable that check initially in this case.
2893 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2894 is_forced_continuation = True
a1c5d2ca
M
2895
2896 for page_num in itertools.count(0):
2897 if not continuation:
2898 break
46383212 2899 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2900 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2901 if page_num == 0:
2902 if is_first_continuation:
2903 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2904 else:
2d6659b9 2905 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2906 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2907 else:
2908 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2909 ' ' if parent else '', ' replies' if parent else '',
2910 page_num, comment_prog_str)
2911
2912 response = self._extract_response(
fe93e2c4 2913 item_id=None, query=continuation,
2d6659b9 2914 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 2915 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2916 is_forced_continuation = False
46383212 2917 continuation_contents = traverse_obj(
2918 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2919
2d6659b9 2920 continuation = None
46383212 2921 for continuation_section in continuation_contents:
2922 continuation_items = traverse_obj(
2923 continuation_section,
2924 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2925 get_all=False, expected_type=list) or []
2926 if is_first_continuation:
2927 continuation = extract_header(continuation_items)
2928 is_first_continuation = False
2d6659b9 2929 if continuation:
a1c5d2ca 2930 break
46383212 2931 continue
a1c5d2ca 2932
46383212 2933 for entry in extract_thread(continuation_items):
2934 if not entry:
2935 return
2936 yield entry
2937 continuation = self._extract_continuation({'contents': continuation_items})
2938 if continuation:
2d6659b9 2939 break
a1c5d2ca 2940
6e634cbe 2941 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2942 if message and not parent and tracker['running_total'] == 0:
2943 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
2944
2945 @staticmethod
2946 def _generate_comment_continuation(video_id):
2947 """
2948 Generates initial comment section continuation token from given video id
2949 """
2950 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
2951 return base64.b64encode(token.encode()).decode()
2952
a2160aa4 2953 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2954 """Entry for comment extraction"""
2d6659b9 2955 def _real_comment_extract(contents):
aae16f6e 2956 renderer = next((
2957 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2958 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2959 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2960
a2160aa4 2961 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2962 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2963
109dd3b2 2964 @staticmethod
99e9e001 2965 def _get_checkok_params():
2966 return {'contentCheckOk': True, 'racyCheckOk': True}
2967
2968 @classmethod
2969 def _generate_player_context(cls, sts=None):
109dd3b2 2970 context = {
2971 'html5Preference': 'HTML5_PREF_WANTS',
2972 }
2973 if sts is not None:
2974 context['signatureTimestamp'] = sts
2975 return {
2976 'playbackContext': {
2977 'contentPlaybackContext': context
a1a7907b 2978 },
99e9e001 2979 **cls._get_checkok_params()
109dd3b2 2980 }
2981
e7e94f2a
D
2982 @staticmethod
2983 def _is_agegated(player_response):
2984 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2985 return True
e7e94f2a
D
2986
2987 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2988 AGE_GATE_REASONS = (
2989 'confirm your age', 'age-restricted', 'inappropriate', # reason
2990 'age_verification_required', 'age_check_required', # status
2991 )
2992 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2993
2994 @staticmethod
2995 def _is_unplayable(player_response):
2996 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2997
99e9e001 2998 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2999
11f9be09 3000 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3001 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3002 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3003 headers = self.generate_api_headers(
99e9e001 3004 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3005
6e634cbe 3006 yt_query = {
3007 'videoId': video_id,
3008 'params': '8AEB' # enable stories
3009 }
11f9be09 3010 yt_query.update(self._generate_player_context(sts))
3011 return self._extract_response(
3012 item_id=video_id, ep='player', query=yt_query,
379e44ed 3013 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3014 default_client=client,
11f9be09 3015 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3016 ) or None
3017
11f9be09 3018 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3019 requested_clients = []
d0d012d4 3020 default = ['android', 'web']
000c15a4 3021 allowed_clients = sorted(
86e5f3ed 3022 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3023 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3024 for client in self._configuration_arg('player_client'):
3025 if client in allowed_clients:
3026 requested_clients.append(client)
d0d012d4 3027 elif client == 'default':
3028 requested_clients.extend(default)
b4c055ba 3029 elif client == 'all':
3030 requested_clients.extend(allowed_clients)
3031 else:
3032 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3033 if not requested_clients:
d0d012d4 3034 requested_clients = default
cf7e015f 3035
11f9be09 3036 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3037 requested_clients.extend(
e7e94f2a 3038 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3039
11f9be09 3040 return orderedSet(requested_clients)
cf7e015f 3041
99e9e001 3042 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 3043 initial_pr = None
3044 if webpage:
b7c47b74 3045 initial_pr = self._search_json(
3046 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3047
ae729626 3048 all_clients = set(clients)
c0bc527b 3049 clients = clients[::-1]
b6de707d 3050 prs = []
e7e94f2a 3051
ae729626 3052 def append_client(*client_names):
e7870111 3053 """ Append the first client name that exists but not already used """
ae729626 3054 for client_name in client_names:
e7870111
D
3055 actual_client = _split_innertube_client(client_name)[0]
3056 if actual_client in INNERTUBE_CLIENTS:
3057 if actual_client not in all_clients:
ae729626 3058 clients.append(client_name)
e7870111
D
3059 all_clients.add(actual_client)
3060 return
e7e94f2a 3061
379e44ed 3062 # Android player_response does not have microFormats which are needed for
3063 # extraction of some data. So we return the initial_pr with formats
3064 # stripped out even if not requested by the user
3065 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3066 if initial_pr:
3067 pr = dict(initial_pr)
3068 pr['streamingData'] = None
b6de707d 3069 prs.append(pr)
379e44ed 3070
3071 last_error = None
b6de707d 3072 tried_iframe_fallback = False
3073 player_url = None
c0bc527b 3074 while clients:
e7870111 3075 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3076 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3077 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3078 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3079
b6de707d 3080 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3081 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3082 if 'js' in self._configuration_arg('player_skip'):
3083 require_js_player = False
3084 player_url = None
3085
3086 if not player_url and not tried_iframe_fallback and require_js_player:
3087 player_url = self._download_player_url(video_id)
3088 tried_iframe_fallback = True
3089
379e44ed 3090 try:
3091 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 3092 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 3093 except ExtractorError as e:
3094 if last_error:
3095 self.report_warning(last_error)
3096 last_error = e
3097 continue
3098
11f9be09 3099 if pr:
b6de707d 3100 prs.append(pr)
c0bc527b 3101
e7e94f2a 3102 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3103 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3104 append_client(f'{base_client}_creator')
e7e94f2a 3105 elif self._is_agegated(pr):
e7870111
D
3106 if variant == 'tv_embedded':
3107 append_client(f'{base_client}_embedded')
3108 elif not variant:
3109 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3110
379e44ed 3111 if last_error:
b6de707d 3112 if not len(prs):
379e44ed 3113 raise last_error
3114 self.report_warning(last_error)
b6de707d 3115 return prs, player_url
11f9be09 3116
a1b2d843 3117 def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3118 itags, stream_ids = {}, []
2a9c6dcd 3119 itag_qualities, res_qualities = {}, {}
d3fc8074 3120 q = qualities([
2a9c6dcd 3121 # Normally tiny is the smallest video-only formats. But
3122 # audio-only formats with unknown quality may get tagged as tiny
3123 'tiny',
3124 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3125 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3126 ])
11f9be09 3127 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3128
545cc85d 3129 for fmt in streaming_formats:
727029c5 3130 if fmt.get('targetDurationSec'):
545cc85d 3131 continue
321bf820 3132
cc2db878 3133 itag = str_or_none(fmt.get('itag'))
9297939e 3134 audio_track = fmt.get('audioTrack') or {}
3135 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3136 if stream_id in stream_ids:
3137 continue
3138
cc2db878 3139 quality = fmt.get('quality')
2a9c6dcd 3140 height = int_or_none(fmt.get('height'))
d3fc8074 3141 if quality == 'tiny' or not quality:
3142 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3143 # The 3gp format (17) in android client has a quality of "small",
3144 # but is actually worse than other formats
3145 if itag == '17':
3146 quality = 'tiny'
3147 if quality:
3148 if itag:
3149 itag_qualities[itag] = quality
3150 if height:
3151 res_qualities[height] = quality
cc2db878 3152 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3153 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3154 # number of fragment that would subsequently requested with (`&sq=N`)
3155 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3156 continue
3157
545cc85d 3158 fmt_url = fmt.get('url')
3159 if not fmt_url:
3160 sc = compat_parse_qs(fmt.get('signatureCipher'))
3161 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3162 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3163 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3164 continue
52023f12 3165 try:
3166 fmt_url += '&%s=%s' % (
3167 traverse_obj(sc, ('sp', -1)) or 'signature',
3168 self._decrypt_signature(encrypted_sig, video_id, player_url)
3169 )
3170 except ExtractorError as e:
3171 self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
3172 self.write_debug(e, only_once=True)
201e9eaa 3173 continue
545cc85d 3174
404f611f 3175 query = parse_qs(fmt_url)
3176 throttled = False
b2916526 3177 if query.get('n'):
404f611f 3178 try:
3179 fmt_url = update_url_query(fmt_url, {
3180 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
3181 except ExtractorError as e:
aa9369a2 3182 self.report_warning(
1d485a1a 3183 'nsig extraction failed: You may experience throttling for some formats\n'
52023f12 3184 f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
3185 self.write_debug(e, only_once=True)
404f611f 3186 throttled = True
3187
545cc85d 3188 if itag:
a0bb6ce5 3189 itags[itag] = 'https'
9297939e 3190 stream_ids.append(stream_id)
3191
0ad92dfb 3192 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3193 language_preference = (
3194 10 if audio_track.get('audioIsDefault') and 10
3195 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3196 else -1)
0ad92dfb 3197 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3198 # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3199 # Make sure to avoid false positives with small duration differences.
3200 # Eg: __2ABJjxzNo, ySuUZEjARPY
3201 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3202 if is_damaged:
0f06bcd7 3203 self.report_warning(
3204 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3205 dct = {
3206 'asr': int_or_none(fmt.get('audioSampleRate')),
3207 'filesize': int_or_none(fmt.get('contentLength')),
3208 'format_id': itag,
34921b43 3209 'format_note': join_nonempty(
26e8e044 3210 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3211 ' (default)' if language_preference > 0 else ''),
404f611f 3212 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
0ad92dfb 3213 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3214 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3215 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3216 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 3217 'height': height,
dca3ff4a 3218 'quality': q(quality),
727029c5 3219 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3220 'tbr': tbr,
545cc85d 3221 'url': fmt_url,
2a9c6dcd 3222 'width': int_or_none(fmt.get('width')),
ab6df717 3223 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3224 'desc' if language_preference < -1 else ''),
3225 'language_preference': language_preference,
a405b38f 3226 # Strictly de-prioritize damaged and 3gp formats
3227 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3228 }
60bdb7bd 3229 mime_mobj = re.match(
3230 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3231 if mime_mobj:
3232 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3233 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3234 no_audio = dct.get('acodec') == 'none'
3235 no_video = dct.get('vcodec') == 'none'
3236 if no_audio:
3237 dct['vbr'] = tbr
3238 if no_video:
3239 dct['abr'] = tbr
3240 if no_audio or no_video:
545cc85d 3241 dct['downloader_options'] = {
3242 # Youtube throttles chunks >~10M
3243 'http_chunk_size': 10485760,
bf1317d2 3244 }
7c60c33e 3245 if dct.get('ext'):
3246 dct['container'] = dct['ext'] + '_dash'
11f9be09 3247 yield dct
545cc85d 3248
adbc4ec4 3249 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3250 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3251 if not self.get_param('youtube_include_hls_manifest', True):
3252 skip_manifests.append('hls')
0f06bcd7 3253 if not self.get_param('youtube_include_dash_manifest', True):
3254 skip_manifests.append('dash')
adbc4ec4
THD
3255 get_dash = 'dash' not in skip_manifests and (
3256 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3257 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3258
a0bb6ce5 3259 def process_manifest_format(f, proto, itag):
3260 if itag in itags:
3261 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3262 return False
3263 itag = f'{itag}-{proto}'
3264 if itag:
3265 f['format_id'] = itag
3266 itags[itag] = proto
3267
3268 f['quality'] = next((
3269 q(qdict[val])
e339d25a 3270 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 3271 if val in qdict), -1)
3272 return True
2a9c6dcd 3273
11f9be09 3274 for sd in streaming_data:
5d3a0e79 3275 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3276 if hls_manifest_url:
2a9c6dcd 3277 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 3278 if process_manifest_format(f, 'hls', self._search_regex(
3279 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3280 yield f
545cc85d 3281
5d3a0e79 3282 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3283 if dash_manifest_url:
2a9c6dcd 3284 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 3285 if process_manifest_format(f, 'dash', f['format_id']):
3286 f['filesize'] = int_or_none(self._search_regex(
3287 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3288 if live_from_start:
3289 f['is_from_start'] = True
3290
a0bb6ce5 3291 yield f
11f9be09 3292
720c3099 3293 def _extract_storyboard(self, player_responses, duration):
3294 spec = get_first(
3295 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3296 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3297 if not base_url:
720c3099 3298 return
720c3099 3299 L = len(spec) - 1
3300 for i, args in enumerate(spec):
3301 args = args.split('#')
3302 counts = list(map(int_or_none, args[:5]))
3303 if len(args) != 8 or not all(counts):
3304 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3305 continue
3306 width, height, frame_count, cols, rows = counts
3307 N, sigh = args[6:]
3308
3309 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3310 fragment_count = frame_count / (cols * rows)
3311 fragment_duration = duration / fragment_count
3312 yield {
3313 'format_id': f'sb{i}',
3314 'format_note': 'storyboard',
3315 'ext': 'mhtml',
3316 'protocol': 'mhtml',
3317 'acodec': 'none',
3318 'vcodec': 'none',
3319 'url': url,
3320 'width': width,
3321 'height': height,
3322 'fragments': [{
b3edc806 3323 'url': url.replace('$M', str(j)),
720c3099 3324 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3325 } for j in range(math.ceil(fragment_count))],
3326 }
3327
adbc4ec4 3328 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3329 webpage = None
3330 if 'webpage' not in self._configuration_arg('player_skip'):
3331 webpage = self._download_webpage(
6e634cbe 3332 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
11f9be09 3333
3334 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3335
b6de707d 3336 player_responses, player_url = self._extract_player_responses(
11f9be09 3337 self._get_requested_clients(url, smuggled_data),
99e9e001 3338 video_id, webpage, master_ytcfg)
11f9be09 3339
adbc4ec4
THD
3340 return webpage, master_ytcfg, player_responses, player_url
3341
a1b2d843 3342 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3343 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3344 is_live = get_first(video_details, 'isLive')
3345 if is_live is None:
3346 is_live = get_first(live_broadcast_details, 'isLiveNow')
3347
3348 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
a1b2d843 3349 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
adbc4ec4
THD
3350
3351 return live_broadcast_details, is_live, streaming_data, formats
3352
3353 def _real_extract(self, url):
3354 url, smuggled_data = unsmuggle_url(url, {})
3355 video_id = self._match_id(url)
3356
3357 base_url = self.http_scheme() + '//www.youtube.com/'
3358 webpage_url = base_url + 'watch?v=' + video_id
3359
3360 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3361
11f9be09 3362 playability_statuses = traverse_obj(
3363 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3364
3365 trailer_video_id = get_first(
3366 playability_statuses,
3367 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3368 expected_type=str)
3369 if trailer_video_id:
3370 return self.url_result(
3371 trailer_video_id, self.ie_key(), trailer_video_id)
3372
3373 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3374 if webpage else (lambda x: None))
3375
3376 video_details = traverse_obj(
3377 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3378 microformats = traverse_obj(
3379 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3380 expected_type=dict, default=[])
3381 video_title = (
3382 get_first(video_details, 'title')
3383 or self._get_text(microformats, (..., 'title'))
3384 or search_meta(['og:title', 'twitter:title', 'title']))
3385 video_description = get_first(video_details, 'shortDescription')
3386
d89257f3 3387 multifeed_metadata_list = get_first(
3388 player_responses,
3389 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3390 expected_type=str)
3391 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3392 if self.get_param('noplaylist'):
11f9be09 3393 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3394 else:
3395 entries = []
3396 feed_ids = []
3397 for feed in multifeed_metadata_list.split(','):
3398 # Unquote should take place before split on comma (,) since textual
3399 # fields may contain comma as well (see
3400 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3401 feed_data = compat_parse_qs(
3402 compat_urllib_parse_unquote_plus(feed))
3403
3404 def feed_entry(name):
3405 return try_get(
3406 feed_data, lambda x: x[name][0], compat_str)
3407
3408 feed_id = feed_entry('id')
3409 if not feed_id:
3410 continue
3411 feed_title = feed_entry('title')
3412 title = video_title
3413 if feed_title:
3414 title += ' (%s)' % feed_title
3415 entries.append({
3416 '_type': 'url_transparent',
3417 'ie_key': 'Youtube',
3418 'url': smuggle_url(
3419 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3420 {'force_singlefeed': True}),
3421 'title': title,
3422 })
3423 feed_ids.append(feed_id)
3424 self.to_screen(
3425 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3426 % (', '.join(feed_ids), video_id))
3427 return self.playlist_result(
3428 entries, video_id, video_title, video_description)
11f9be09 3429
a1b2d843 3430 duration = int_or_none(
3431 get_first(video_details, 'lengthSeconds')
3432 or get_first(microformats, 'lengthSeconds')
3433 or parse_duration(search_meta('duration'))) or None
3434
829bbd1d 3435 if get_first(video_details, 'isPostLiveDvr'):
3436 self.write_debug('Video is in Post-Live Manifestless mode')
3437 if duration or 0 > 4 * 3600:
3438 self.report_warning(
3439 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3440 'This is a known issue and patches are welcome')
3441
a1b2d843 3442 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
3443 video_id, microformats, video_details, player_responses, player_url, duration)
bf1317d2 3444
545cc85d 3445 if not formats:
11f9be09 3446 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3447 self.report_drm(video_id)
11f9be09 3448 pemr = get_first(
3449 playability_statuses,
3450 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3451 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3452 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3453 if subreason:
545cc85d 3454 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3455 countries = get_first(microformats, 'availableCountries')
545cc85d 3456 if not countries:
3457 regions_allowed = search_meta('regionsAllowed')
3458 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3459 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3460 reason += f'. {subreason}'
545cc85d 3461 if reason:
b7da73eb 3462 self.raise_no_formats(reason, expected=True)
bf1317d2 3463
11f9be09 3464 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3465 if not keywords and webpage:
3466 keywords = [
3467 unescapeHTML(m.group('content'))
3468 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3469 for keyword in keywords:
3470 if keyword.startswith('yt:stretch='):
201c1459 3471 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3472 if mobj:
3473 # NB: float is intentional for forcing float division
3474 w, h = (float(v) for v in mobj.groups())
3475 if w > 0 and h > 0:
3476 ratio = w / h
3477 for f in formats:
3478 if f.get('vcodec') != 'none':
3479 f['stretched_ratio'] = ratio
3480 break
a709d873 3481 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3482 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3483 if thumbnail_url:
3484 thumbnails.append({
3485 'url': thumbnail_url,
ff2751ac 3486 })
fccf5021 3487 original_thumbnails = thumbnails.copy()
3488
0ba692ac 3489 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3490 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3491 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3492 thumbnail_names = [
bfec31be 3493 # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants
3494 # in resolution, these are not the custom thumbnail. So de-prioritize them
3495 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3496 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3497 ]
cca80fe6 3498 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3499 thumbnails.extend({
3500 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3501 video_id=video_id, name=name, ext=ext,
3502 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3503 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3504 for thumb in thumbnails:
cca80fe6 3505 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3506 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3507 self._remove_duplicate_formats(thumbnails)
fccf5021 3508 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3509
7ea65411 3510 category = get_first(microformats, 'category') or search_meta('genre')
3511 channel_id = str_or_none(
3512 get_first(video_details, 'channelId')
3513 or get_first(microformats, 'externalChannelId')
3514 or search_meta('channelId'))
7ea65411 3515 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3516
3517 live_content = get_first(video_details, 'isLiveContent')
3518 is_upcoming = get_first(video_details, 'isUpcoming')
3519 if is_live is None:
3520 if is_upcoming or live_content is False:
3521 is_live = False
3522 if is_upcoming is None and (live_content or is_live):
3523 is_upcoming = False
adbc4ec4
THD
3524 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3525 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3526 if not duration and live_end_time and live_start_time:
3527 duration = live_end_time - live_start_time
3528
3529 if is_live and self.get_param('live_from_start'):
3530 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3531
720c3099 3532 formats.extend(self._extract_storyboard(player_responses, duration))
3533
3534 # Source is given priority since formats that throttle are given lower source_preference
3535 # When throttling issue is fully fixed, remove this
3536 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3537
545cc85d 3538 info = {
3539 'id': video_id,
39ca3b5c 3540 'title': video_title,
545cc85d 3541 'formats': formats,
3542 'thumbnails': thumbnails,
fccf5021 3543 # The best thumbnail that we are sure exists. Prevents unnecessary
3544 # URL checking if user don't care about getting the best possible thumbnail
3545 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3546 'description': video_description,
11f9be09 3547 'uploader': get_first(video_details, 'author'),
545cc85d 3548 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3549 'uploader_url': owner_profile_url,
3550 'channel_id': channel_id,
e0ddbd02 3551 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
545cc85d 3552 'duration': duration,
3553 'view_count': int_or_none(
11f9be09 3554 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3555 or search_meta('interactionCount')),
11f9be09 3556 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3557 'age_limit': 18 if (
11f9be09 3558 get_first(microformats, 'isFamilySafe') is False
545cc85d 3559 or search_meta('isFamilyFriendly') == 'false'
3560 or search_meta('og:restrictions:age') == '18+') else 0,
3561 'webpage_url': webpage_url,
3562 'categories': [category] if category else None,
3563 'tags': keywords,
11f9be09 3564 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3565 'is_live': is_live,
3566 'was_live': (False if is_live or is_upcoming or live_content is False
3567 else None if is_live is None or is_upcoming is None
3568 else live_content),
3569 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3570 'release_timestamp': live_start_time,
545cc85d 3571 }
b477fc13 3572
3944e7af 3573 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3574 if pctr:
ecdc9049 3575 def get_lang_code(track):
3576 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3577 or track.get('languageCode'))
3578
3579 # Converted into dicts to remove duplicates
3580 captions = {
3581 get_lang_code(sub): sub
3582 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3583 translation_languages = {
3584 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3585 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3586
774d79cc 3587 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3588 lang_subs = container.setdefault(lang_code, [])
545cc85d 3589 for fmt in self._SUBTITLE_FORMATS:
3590 query.update({
3591 'fmt': fmt,
3592 })
3593 lang_subs.append({
3594 'ext': fmt,
60f393e4 3595 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3596 'name': sub_name,
545cc85d 3597 })
7e72694b 3598
ecdc9049 3599 subtitles, automatic_captions = {}, {}
3600 for lang_code, caption_track in captions.items():
3601 base_url = caption_track.get('baseUrl')
1235d333 3602 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3603 if not base_url:
3604 continue
ecdc9049 3605 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3606 if caption_track.get('kind') != 'asr':
545cc85d 3607 if not lang_code:
3608 continue
3609 process_language(
ecdc9049 3610 subtitles, base_url, lang_code, lang_name, {})
3611 if not caption_track.get('isTranslatable'):
3612 continue
3944e7af 3613 for trans_code, trans_name in translation_languages.items():
3614 if not trans_code:
545cc85d 3615 continue
1235d333 3616 orig_trans_code = trans_code
ecdc9049 3617 if caption_track.get('kind') != 'asr':
18e49408 3618 if 'translated_subs' in self._configuration_arg('skip'):
3619 continue
ecdc9049 3620 trans_code += f'-{lang_code}'
3621 trans_name += format_field(lang_name, template=' from %s')
d49669ac 3622 # Add an "-orig" label to the original language so that it can be distinguished.
3623 # The subs are returned without "-orig" as well for compatibility
1235d333 3624 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3625 process_language(
d49669ac 3626 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3627 # Setting tlang=lang returns damaged subtitles.
d49669ac 3628 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3629 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
ecdc9049 3630 info['automatic_captions'] = automatic_captions
3631 info['subtitles'] = subtitles
7e72694b 3632
545cc85d 3633 parsed_url = compat_urllib_parse_urlparse(url)
3634 for component in [parsed_url.fragment, parsed_url.query]:
3635 query = compat_parse_qs(component)
3636 for k, v in query.items():
3637 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3638 d_k += '_time'
3639 if d_k not in info and k in s_ks:
3640 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3641
3642 # Youtube Music Auto-generated description
822b9d9c 3643 if video_description:
1890fc63 3644 mobj = re.search(
3645 r'''(?xs)
3646 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3647 (?P<album>[^\n]+)
3648 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3649 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3650 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3651 .+\nAuto-generated\ by\ YouTube\.\s*$
3652 ''', video_description)
822b9d9c 3653 if mobj:
822b9d9c
RA
3654 release_year = mobj.group('release_year')
3655 release_date = mobj.group('release_date')
3656 if release_date:
3657 release_date = release_date.replace('-', '')
3658 if not release_year:
545cc85d 3659 release_year = release_date[:4]
3660 info.update({
3661 'album': mobj.group('album'.strip()),
3662 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3663 'track': mobj.group('track').strip(),
3664 'release_date': release_date,
cc2db878 3665 'release_year': int_or_none(release_year),
545cc85d 3666 })
7e72694b 3667
545cc85d 3668 initial_data = None
3669 if webpage:
b7c47b74 3670 initial_data = self._search_json(
3671 self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', video_id, fatal=False)
545cc85d 3672 if not initial_data:
99e9e001 3673 query = {'videoId': video_id}
3674 query.update(self._get_checkok_params())
109dd3b2 3675 initial_data = self._extract_response(
3676 item_id=video_id, ep='next', fatal=False,
99e9e001 3677 ytcfg=master_ytcfg, query=query,
3678 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3679 note='Downloading initial data API JSON')
545cc85d 3680
19a03940 3681 try: # This will error if there is no livechat
c60ee3a2 3682 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3683 except (KeyError, IndexError, TypeError):
3684 pass
3685 else:
ecdc9049 3686 info.setdefault('subtitles', {})['live_chat'] = [{
19a03940 3687 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
c60ee3a2 3688 'video_id': video_id,
3689 'ext': 'json',
f6745c49 3690 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3691 }]
545cc85d 3692
3693 if initial_data:
7c365c21 3694 info['chapters'] = (
3695 self._extract_chapters_from_json(initial_data, duration)
3696 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3697 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3698 or None)
545cc85d 3699
17322130 3700 contents = traverse_obj(
3701 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3702 expected_type=list, default=[])
3703
3704 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3705 if vpir:
3706 stl = vpir.get('superTitleLink')
3707 if stl:
3708 stl = self._get_text(stl)
3709 if try_get(
3710 vpir,
3711 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3712 info['location'] = stl
3713 else:
affc4fef 3714 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3715 if mobj:
545cc85d 3716 info.update({
17322130 3717 'series': mobj.group(1),
3718 'season_number': int(mobj.group(2)),
3719 'episode_number': int(mobj.group(3)),
545cc85d 3720 })
17322130 3721 for tlb in (try_get(
3722 vpir,
3723 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3724 list) or []):
3725 tbr = tlb.get('toggleButtonRenderer') or {}
3726 for getter, regex in [(
3727 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3728 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3729 lambda x: x['accessibility'],
3730 lambda x: x['accessibilityData']['accessibilityData'],
3731 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3732 label = (try_get(tbr, getter, dict) or {}).get('label')
3733 if label:
3734 mobj = re.match(regex, label)
3735 if mobj:
3736 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3737 break
17322130 3738 sbr_tooltip = try_get(
3739 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3740 if sbr_tooltip:
3741 like_count, dislike_count = sbr_tooltip.split(' / ')
3742 info.update({
3743 'like_count': str_to_int(like_count),
3744 'dislike_count': str_to_int(dislike_count),
3745 })
3746 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3747 if vsir:
3748 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3749 info.update({
3750 'channel': self._get_text(vor, 'title'),
3751 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3752
3753 rows = try_get(
3754 vsir,
3755 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3756 list) or []
3757 multiple_songs = False
3758 for row in rows:
3759 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3760 multiple_songs = True
3761 break
3762 for row in rows:
3763 mrr = row.get('metadataRowRenderer') or {}
3764 mrr_title = mrr.get('title')
3765 if not mrr_title:
3766 continue
3767 mrr_title = self._get_text(mrr, 'title')
3768 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3769 if mrr_title == 'License':
3770 info['license'] = mrr_contents_text
3771 elif not multiple_songs:
3772 if mrr_title == 'Album':
3773 info['album'] = mrr_contents_text
3774 elif mrr_title == 'Artist':
3775 info['artist'] = mrr_contents_text
3776 elif mrr_title == 'Song':
3777 info['track'] = mrr_contents_text
545cc85d 3778
3779 fallbacks = {
3780 'channel': 'uploader',
3781 'channel_id': 'uploader_id',
3782 'channel_url': 'uploader_url',
3783 }
992f9a73 3784
17322130 3785 # The upload date for scheduled, live and past live streams / premieres in microformats
3786 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3787 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3788 upload_date = (
3789 unified_strdate(get_first(microformats, 'uploadDate'))
3790 or unified_strdate(search_meta('uploadDate')))
3791 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3792 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3793 info['upload_date'] = upload_date
992f9a73 3794
545cc85d 3795 for to, frm in fallbacks.items():
3796 if not info.get(to):
3797 info[to] = info.get(frm)
3798
3799 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3800 v = info.get(s_k)
3801 if v:
3802 info[d_k] = v
b84071c0 3803
11f9be09 3804 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3805 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3806 is_membersonly = None
b28f8d24 3807 is_premium = None
c224251a
M
3808 if initial_data and is_private is not None:
3809 is_membersonly = False
b28f8d24 3810 is_premium = False
47193e02 3811 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3812 badge_labels = set()
3813 for content in contents:
3814 if not isinstance(content, dict):
3815 continue
3816 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3817 for badge_label in badge_labels:
3818 if badge_label.lower() == 'members only':
3819 is_membersonly = True
3820 elif badge_label.lower() == 'premium':
3821 is_premium = True
3822 elif badge_label.lower() == 'unlisted':
3823 is_unlisted = True
c224251a 3824
c224251a
M
3825 info['availability'] = self._availability(
3826 is_private=is_private,
b28f8d24 3827 needs_premium=is_premium,
c224251a
M
3828 needs_subscription=is_membersonly,
3829 needs_auth=info['age_limit'] >= 18,
3830 is_unlisted=None if is_private is None else is_unlisted)
3831
a2160aa4 3832 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3833
11f9be09 3834 self.mark_watched(video_id, player_responses)
d77ab8e2 3835
545cc85d 3836 return info
c5e8d7af 3837
a61fd4cf 3838
a6213a49 3839class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3840
182bda88 3841 @staticmethod
3842 def passthrough_smuggled_data(func):
3843 def _smuggle(entries, smuggled_data):
3844 for entry in entries:
3845 # TODO: Convert URL to music.youtube instead.
3846 # Do we need to passthrough any other smuggled_data?
3847 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3848 yield entry
3849
3850 @functools.wraps(func)
3851 def wrapper(self, url):
3852 url, smuggled_data = unsmuggle_url(url, {})
3853 if self.is_music_url(url):
3854 smuggled_data['is_music_url'] = True
3855 info_dict = func(self, url, smuggled_data)
3856 if smuggled_data and info_dict.get('entries'):
3857 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3858 return info_dict
3859 return wrapper
3860
a6213a49 3861 def _extract_channel_id(self, webpage):
3862 channel_id = self._html_search_meta(
3863 'channelId', webpage, 'channel id', default=None)
3864 if channel_id:
3865 return channel_id
3866 channel_url = self._html_search_meta(
3867 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3868 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3869 'twitter:app:url:googleplay'), webpage, 'channel url')
3870 return self._search_regex(
3871 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3872 channel_url, 'channel id')
15f6397c 3873
8bdd16b4 3874 @staticmethod
cd7c66cf 3875 def _extract_basic_item_renderer(item):
3876 # Modified from _extract_grid_item_renderer
201c1459 3877 known_basic_renderers = (
a17526e4 3878 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 3879 )
3880 for key, renderer in item.items():
201c1459 3881 if not isinstance(renderer, dict):
cd7c66cf 3882 continue
201c1459 3883 elif key in known_basic_renderers:
3884 return renderer
3885 elif key.startswith('grid') and key.endswith('Renderer'):
3886 return renderer
8bdd16b4 3887
8bdd16b4 3888 def _grid_entries(self, grid_renderer):
3889 for item in grid_renderer['items']:
3890 if not isinstance(item, dict):
39b62db1 3891 continue
cd7c66cf 3892 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3893 if not isinstance(renderer, dict):
3894 continue
052e1350 3895 title = self._get_text(renderer, 'title')
fe93e2c4 3896
8bdd16b4 3897 # playlist
3898 playlist_id = renderer.get('playlistId')
3899 if playlist_id:
3900 yield self.url_result(
3901 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3902 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3903 video_title=title)
201c1459 3904 continue
8bdd16b4 3905 # video
3906 video_id = renderer.get('videoId')
3907 if video_id:
3908 yield self._extract_video(renderer)
201c1459 3909 continue
8bdd16b4 3910 # channel
3911 channel_id = renderer.get('channelId')
3912 if channel_id:
8bdd16b4 3913 yield self.url_result(
3914 'https://www.youtube.com/channel/%s' % channel_id,
3915 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3916 continue
3917 # generic endpoint URL support
3918 ep_url = urljoin('https://www.youtube.com/', try_get(
3919 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3920 compat_str))
3921 if ep_url:
3922 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3923 if ie.suitable(ep_url):
3924 yield self.url_result(
3925 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3926 break
8bdd16b4 3927
16aa9ea4 3928 def _music_reponsive_list_entry(self, renderer):
3929 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
3930 if video_id:
3931 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
3932 ie=YoutubeIE.ie_key(), video_id=video_id)
3933 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
3934 if playlist_id:
3935 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
3936 if video_id:
3937 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
3938 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3939 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
3940 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3941 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
3942 if browse_id:
3943 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
3944 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
3945
3d3dddc9 3946 def _shelf_entries_from_content(self, shelf_renderer):
3947 content = shelf_renderer.get('content')
3948 if not isinstance(content, dict):
8bdd16b4 3949 return
cd7c66cf 3950 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3951 if renderer:
3952 # TODO: add support for nested playlists so each shelf is processed
3953 # as separate playlist
3954 # TODO: this includes only first N items
86e5f3ed 3955 yield from self._grid_entries(renderer)
3d3dddc9 3956 renderer = content.get('horizontalListRenderer')
3957 if renderer:
3958 # TODO
3959 pass
8bdd16b4 3960
29f7c58a 3961 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3962 ep = try_get(
3963 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3964 compat_str)
3965 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3966 if shelf_url:
29f7c58a 3967 # Skipping links to another channels, note that checking for
3968 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3969 # will not work
3970 if skip_channels and '/channels?' in shelf_url:
3971 return
052e1350 3972 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3973 yield self.url_result(shelf_url, video_title=title)
3974 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 3975 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 3976
8bdd16b4 3977 def _playlist_entries(self, video_list_renderer):
3978 for content in video_list_renderer['contents']:
3979 if not isinstance(content, dict):
3980 continue
3981 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3982 if not isinstance(renderer, dict):
3983 continue
3984 video_id = renderer.get('videoId')
3985 if not video_id:
3986 continue
3987 yield self._extract_video(renderer)
07aeced6 3988
3462ffa8 3989 def _rich_entries(self, rich_grid_renderer):
3990 renderer = try_get(
70d5c17b 3991 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3992 video_id = renderer.get('videoId')
3993 if not video_id:
3994 return
3995 yield self._extract_video(renderer)
3996
8bdd16b4 3997 def _video_entry(self, video_renderer):
3998 video_id = video_renderer.get('videoId')
3999 if video_id:
4000 return self._extract_video(video_renderer)
dacb3a86 4001
ad210f4f 4002 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4003 url = urljoin('https://youtube.com', traverse_obj(
4004 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4005 if url:
4006 return self.url_result(
4007 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4008
8bdd16b4 4009 def _post_thread_entries(self, post_thread_renderer):
4010 post_renderer = try_get(
4011 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4012 if not post_renderer:
4013 return
4014 # video attachment
4015 video_renderer = try_get(
895b0931 4016 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4017 video_id = video_renderer.get('videoId')
4018 if video_id:
4019 entry = self._extract_video(video_renderer)
8bdd16b4 4020 if entry:
4021 yield entry
895b0931 4022 # playlist attachment
4023 playlist_id = try_get(
4024 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
4025 if playlist_id:
4026 yield self.url_result(
e28f1c0a 4027 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4028 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4029 # inline video links
4030 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4031 for run in runs:
4032 if not isinstance(run, dict):
4033 continue
4034 ep_url = try_get(
4035 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
4036 if not ep_url:
4037 continue
4038 if not YoutubeIE.suitable(ep_url):
4039 continue
4040 ep_video_id = YoutubeIE._match_id(ep_url)
4041 if video_id == ep_video_id:
4042 continue
895b0931 4043 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4044
8bdd16b4 4045 def _post_thread_continuation_entries(self, post_thread_continuation):
4046 contents = post_thread_continuation.get('contents')
4047 if not isinstance(contents, list):
4048 return
4049 for content in contents:
4050 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4051 if isinstance(renderer, dict):
4052 yield from self._post_thread_entries(renderer)
8bdd16b4 4053 continue
6b0b0a28 4054 renderer = content.get('videoRenderer')
4055 if isinstance(renderer, dict):
4056 yield self._video_entry(renderer)
07aeced6 4057
39ed931e 4058 r''' # unused
4059 def _rich_grid_entries(self, contents):
4060 for content in contents:
4061 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4062 if video_renderer:
4063 entry = self._video_entry(video_renderer)
4064 if entry:
4065 yield entry
4066 '''
52efa4b3 4067
a6213a49 4068 def _extract_entries(self, parent_renderer, continuation_list):
4069 # continuation_list is modified in-place with continuation_list = [continuation_token]
4070 continuation_list[:] = [None]
4071 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4072 for content in contents:
4073 if not isinstance(content, dict):
4074 continue
16aa9ea4 4075 is_renderer = traverse_obj(
4076 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4077 expected_type=dict)
a6213a49 4078 if not is_renderer:
4079 renderer = content.get('richItemRenderer')
4080 if renderer:
4081 for entry in self._rich_entries(renderer):
4082 yield entry
4083 continuation_list[0] = self._extract_continuation(parent_renderer)
4084 continue
4085 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4086 for isr_content in isr_contents:
4087 if not isinstance(isr_content, dict):
8bdd16b4 4088 continue
69184e41 4089
a6213a49 4090 known_renderers = {
4091 'playlistVideoListRenderer': self._playlist_entries,
4092 'gridRenderer': self._grid_entries,
a17526e4 4093 'reelShelfRenderer': self._grid_entries,
4094 'shelfRenderer': self._shelf_entries,
16aa9ea4 4095 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4096 'backstagePostThreadRenderer': self._post_thread_entries,
4097 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4098 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4099 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4100 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4101 }
4102 for key, renderer in isr_content.items():
4103 if key not in known_renderers:
4104 continue
4105 for entry in known_renderers[key](renderer):
4106 if entry:
4107 yield entry
4108 continuation_list[0] = self._extract_continuation(renderer)
4109 break
70d5c17b 4110
4111 if not continuation_list[0]:
a6213a49 4112 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4113
a6213a49 4114 if not continuation_list[0]:
4115 continuation_list[0] = self._extract_continuation(parent_renderer)
4116
4117 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4118 continuation_list = [None]
4119 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4120 tab_content = try_get(tab, lambda x: x['content'], dict)
4121 if not tab_content:
4122 return
3462ffa8 4123 parent_renderer = (
29f7c58a 4124 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4125 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4126 yield from extract_entries(parent_renderer)
3462ffa8 4127 continuation = continuation_list[0]
d069eca7 4128
8bdd16b4 4129 for page_num in itertools.count(1):
4130 if not continuation:
4131 break
99e9e001 4132 headers = self.generate_api_headers(
4133 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4134 response = self._extract_response(
86e5f3ed 4135 item_id=f'{item_id} page {page_num}',
fe93e2c4 4136 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4137 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4138
4139 if not response:
8bdd16b4 4140 break
ac56cf38 4141 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4142 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4143 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4144
69184e41 4145 known_continuation_renderers = {
4146 'playlistVideoListContinuation': self._playlist_entries,
4147 'gridContinuation': self._grid_entries,
4148 'itemSectionContinuation': self._post_thread_continuation_entries,
4149 'sectionListContinuation': extract_entries, # for feeds
4150 }
8bdd16b4 4151 continuation_contents = try_get(
69184e41 4152 response, lambda x: x['continuationContents'], dict) or {}
4153 continuation_renderer = None
4154 for key, value in continuation_contents.items():
4155 if key not in known_continuation_renderers:
3462ffa8 4156 continue
69184e41 4157 continuation_renderer = value
4158 continuation_list = [None]
86e5f3ed 4159 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4160 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4161 break
4162 if continuation_renderer:
4163 continue
c5e8d7af 4164
a1b535bd 4165 known_renderers = {
e4b98809 4166 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4167 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4168 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4169 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4170 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4171 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4172 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4173 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4174 }
cce889b9 4175 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4176 continuation_items = try_get(
cce889b9 4177 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4178 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4179 video_items_renderer = None
4180 for key, value in continuation_item.items():
4181 if key not in known_renderers:
8bdd16b4 4182 continue
a1b535bd 4183 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4184 continuation_list = [None]
86e5f3ed 4185 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4186 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4187 break
4188 if video_items_renderer:
4189 continue
8bdd16b4 4190 break
9558dcec 4191
8bdd16b4 4192 @staticmethod
7c219ea6 4193 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4194 for tab in tabs:
cd684175 4195 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4196 if renderer.get('selected') is True:
4197 return renderer
2b3c2546 4198 else:
7c219ea6 4199 if fatal:
4200 raise ExtractorError('Unable to find selected tab')
b82f815f 4201
61d3665d 4202 def _extract_uploader(self, data):
8bdd16b4 4203 uploader = {}
61d3665d 4204 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4205 owner = try_get(
4206 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4207 if owner:
61d3665d 4208 owner_text = owner.get('text')
4209 uploader['uploader'] = self._search_regex(
4210 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4211 uploader['uploader_id'] = try_get(
4212 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
4213 uploader['uploader_url'] = urljoin(
4214 'https://www.youtube.com/',
4215 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 4216 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4217
ac56cf38 4218 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4219 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4220 tags = []
b60419c5 4221
8bdd16b4 4222 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4223 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4224 renderer = try_get(
4225 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4226 if renderer:
b60419c5 4227 channel_name = renderer.get('title')
4228 channel_url = renderer.get('channelUrl')
4229 channel_id = renderer.get('externalId')
39ed931e 4230 else:
64c0d954 4231 renderer = try_get(
4232 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4233
8bdd16b4 4234 if renderer:
4235 title = renderer.get('title')
ecc97af3 4236 description = renderer.get('description', '')
b60419c5 4237 playlist_id = channel_id
4238 tags = renderer.get('keywords', '').split()
b60419c5 4239
301d07fc 4240 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4241 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4242 def _get_uncropped(url):
4243 return url_or_none((url or '').split('=')[0] + '=s0')
4244
4245 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4246 if avatar_thumbnails:
4247 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4248 if uncropped_avatar:
4249 avatar_thumbnails.append({
4250 'url': uncropped_avatar,
4251 'id': 'avatar_uncropped',
4252 'preference': 1
4253 })
4254
4255 channel_banners = self._extract_thumbnails(
4256 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4257 for banner in channel_banners:
4258 banner['preference'] = -10
4259
4260 if channel_banners:
4261 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4262 if uncropped_banner:
4263 channel_banners.append({
4264 'url': uncropped_banner,
4265 'id': 'banner_uncropped',
4266 'preference': -5
4267 })
4268
4269 primary_thumbnails = self._extract_thumbnails(
a17526e4 4270 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4271
3462ffa8 4272 if playlist_id is None:
70d5c17b 4273 playlist_id = item_id
f0d785d3 4274
4275 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4276 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4277 if title is None:
f0d785d3 4278 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4279 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4280 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4281
b60419c5 4282 metadata = {
4283 'playlist_id': playlist_id,
4284 'playlist_title': title,
4285 'playlist_description': description,
4286 'uploader': channel_name,
4287 'uploader_id': channel_id,
4288 'uploader_url': channel_url,
301d07fc 4289 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4290 'tags': tags,
f0d785d3 4291 'view_count': self._get_count(playlist_stats, 1),
4292 'availability': self._extract_availability(data),
4293 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4294 'playlist_count': self._get_count(playlist_stats, 0),
4295 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4296 }
4297 if not channel_id:
4298 metadata.update(self._extract_uploader(data))
4299 metadata.update({
4300 'channel': metadata['uploader'],
4301 'channel_id': metadata['uploader_id'],
4302 'channel_url': metadata['uploader_url']})
4303 return self.playlist_result(
d069eca7 4304 self._entries(
ac56cf38 4305 selected_tab, playlist_id, ytcfg,
4306 self._extract_account_syncid(ytcfg, data),
4307 self._extract_visitor_data(data, ytcfg)),
b60419c5 4308 **metadata)
73c4ac2c 4309
6e634cbe 4310 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4311 first_id = last_id = response = None
2be71994 4312 for page_num in itertools.count(1):
cd7c66cf 4313 videos = list(self._playlist_entries(playlist))
4314 if not videos:
4315 return
2be71994 4316 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4317 if start >= len(videos):
4318 return
24146491 4319 yield from videos[start:]
2be71994 4320 first_id = first_id or videos[0]['id']
4321 last_id = videos[-1]['id']
79360d99 4322 watch_endpoint = try_get(
4323 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4324 headers = self.generate_api_headers(
4325 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4326 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4327 query = {
4328 'playlistId': playlist_id,
4329 'videoId': watch_endpoint.get('videoId') or last_id,
4330 'index': watch_endpoint.get('index') or len(videos),
4331 'params': watch_endpoint.get('params') or 'OAE%3D'
4332 }
4333 response = self._extract_response(
4334 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4335 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4336 check_get_keys='contents'
4337 )
cd7c66cf 4338 playlist = try_get(
79360d99 4339 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4340
ac56cf38 4341 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4342 title = playlist.get('title') or try_get(
4343 data, lambda x: x['titleText']['simpleText'], compat_str)
4344 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4345
4346 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4347 playlist_url = urljoin(url, try_get(
4348 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4349 compat_str))
6e634cbe 4350
4351 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4352 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4353 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4354
4355 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4356 return self.url_result(
4357 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4358 video_title=title)
cd7c66cf 4359
8bdd16b4 4360 return self.playlist_result(
6e634cbe 4361 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4362 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4363
47193e02 4364 def _extract_availability(self, data):
4365 """
4366 Gets the availability of a given playlist/tab.
4367 Note: Unless YouTube tells us explicitly, we do not assume it is public
4368 @param data: response
4369 """
4370 is_private = is_unlisted = None
4371 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4372 badge_labels = self._extract_badges(renderer)
4373
4374 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4375 privacy_dropdown_entries = try_get(
4376 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4377 for renderer_dict in privacy_dropdown_entries:
4378 is_selected = try_get(
4379 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4380 if not is_selected:
4381 continue
052e1350 4382 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4383 if label:
4384 badge_labels.add(label.lower())
4385 break
4386
4387 for badge_label in badge_labels:
4388 if badge_label == 'unlisted':
4389 is_unlisted = True
4390 elif badge_label == 'private':
4391 is_private = True
4392 elif badge_label == 'public':
4393 is_unlisted = is_private = False
4394 return self._availability(is_private, False, False, False, is_unlisted)
4395
4396 @staticmethod
4397 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4398 sidebar_renderer = try_get(
4399 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4400 for item in sidebar_renderer:
4401 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4402 if renderer:
4403 return renderer
4404
ac56cf38 4405 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4406 """
4407 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4408 """
5d342002 4409 browse_id = params = None
47193e02 4410 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4411 if not renderer:
4412 return
4413 menu_renderer = try_get(
4414 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4415 for menu_item in menu_renderer:
4416 if not isinstance(menu_item, dict):
358de58c 4417 continue
47193e02 4418 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4419 text = try_get(
4420 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4421 if not text or text.lower() != 'show unavailable videos':
4422 continue
4423 browse_endpoint = try_get(
4424 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4425 browse_id = browse_endpoint.get('browseId')
4426 params = browse_endpoint.get('params')
4427 break
5d342002 4428
11f9be09 4429 headers = self.generate_api_headers(
99e9e001 4430 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4431 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4432 query = {
4433 'params': params or 'wgYCCAA=',
4434 'browseId': browse_id or 'VL%s' % item_id
4435 }
4436 return self._extract_response(
4437 item_id=item_id, headers=headers, query=query,
fe93e2c4 4438 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4439 note='Downloading API JSON with unavailable videos')
358de58c 4440
2762dbb1 4441 @functools.cached_property
a25bca9f 4442 def skip_webpage(self):
4443 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4444
ac56cf38 4445 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4446 retries = self.get_param('extractor_retries', 3)
62bff2c1 4447 count = -1
ac56cf38 4448 webpage = data = last_error = None
14fdfea9 4449 while count < retries:
62bff2c1 4450 count += 1
14fdfea9 4451 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4452 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4453 if last_error:
c705177d 4454 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4455 try:
4456 webpage = self._download_webpage(
4457 url, item_id,
4458 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4459 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4460 except ExtractorError as e:
4461 if isinstance(e.cause, network_exceptions):
4462 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4463 last_error = error_to_compat_str(e.cause or e.msg)
4464 if count < retries:
4465 continue
4466 if fatal:
4467 raise
4468 self.report_warning(error_to_compat_str(e))
14fdfea9 4469 break
ac56cf38 4470 else:
4471 try:
4472 self._extract_and_report_alerts(data)
4473 except ExtractorError as e:
4474 if fatal:
4475 raise
4476 self.report_warning(error_to_compat_str(e))
4477 break
4478
7c219ea6 4479 if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
ac56cf38 4480 break
4481
4482 last_error = 'Incomplete yt initial data received'
4483 if count >= retries:
4484 if fatal:
4485 raise ExtractorError(last_error)
4486 self.report_warning(last_error)
4487 break
4488
cd7c66cf 4489 return webpage, data
4490
a25bca9f 4491 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4492 """Use if failed to extract ytcfg (and data) from initial webpage"""
4493 if not ytcfg and self.is_authenticated:
4494 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4495 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4496 raise ExtractorError(
4497 f'{msg}. If you are not downloading private content, or '
4498 'your cookies are only for the first account and channel,'
4499 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4500 expected=True)
4501 self.report_warning(msg, only_once=True)
4502
ac56cf38 4503 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4504 data = None
a25bca9f 4505 if not self.skip_webpage:
ac56cf38 4506 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4507 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4508 # Reject webpage data if redirected to home page without explicitly requesting
4509 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4510 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4511 if (url != 'https://www.youtube.com/feed/recommended'
4512 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4513 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4514 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4515 if fatal:
4516 raise ExtractorError(msg, expected=True)
4517 self.report_warning(msg, only_once=True)
ac56cf38 4518 if not data:
a25bca9f 4519 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4520 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4521 return data, ytcfg
4522
4523 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4524 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4525 resolve_response = self._extract_response(
4526 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4527 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4528 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4529 for ep_key, ep in endpoints.items():
4530 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4531 if params:
4532 return self._extract_response(
4533 item_id=item_id, query=params, ep=ep, headers=headers,
4534 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4535 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4536 err_note = 'Failed to resolve url (does the playlist exist?)'
4537 if fatal:
4538 raise ExtractorError(err_note, expected=True)
4539 self.report_warning(err_note, item_id)
4540
a6213a49 4541 _SEARCH_PARAMS = None
4542
af5c1c55 4543 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4544 data = {'query': query}
4545 if params is NO_DEFAULT:
4546 params = self._SEARCH_PARAMS
4547 if params:
4548 data['params'] = params
16aa9ea4 4549
4550 content_keys = (
4551 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4552 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4553 # ytmusic search
4554 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4555 ('continuationContents', ),
4556 )
a25bca9f 4557 display_id = f'query "{query}"'
86e5f3ed 4558 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4559 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4560 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4561
a61fd4cf 4562 continuation_list = [None]
a25bca9f 4563 search = None
a6213a49 4564 for page_num in itertools.count(1):
a61fd4cf 4565 data.update(continuation_list[0] or {})
a25bca9f 4566 headers = self.generate_api_headers(
4567 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4568 search = self._extract_response(
a25bca9f 4569 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4570 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4571 slr_contents = traverse_obj(search, *content_keys)
4572 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4573 if not continuation_list[0]:
a6213a49 4574 break
4575
4576
4577class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4578 IE_DESC = 'YouTube Tabs'
4579 _VALID_URL = r'''(?x:
4580 https?://
4581 (?:\w+\.)?
4582 (?:
4583 youtube(?:kids)?\.com|
4584 %(invidious)s
4585 )/
4586 (?:
4587 (?P<channel_type>channel|c|user|browse)/|
4588 (?P<not_channel>
4589 feed/|hashtag/|
4590 (?:playlist|watch)\?.*?\blist=
4591 )|
4592 (?!(?:%(reserved_names)s)\b) # Direct URLs
4593 )
4594 (?P<id>[^/?\#&]+)
4595 )''' % {
4596 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4597 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4598 }
4599 IE_NAME = 'youtube:tab'
4600
4601 _TESTS = [{
4602 'note': 'playlists, multipage',
4603 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4604 'playlist_mincount': 94,
4605 'info_dict': {
4606 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4607 'title': 'Igor Kleiner - Playlists',
a6213a49 4608 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4609 'uploader': 'Igor Kleiner',
a6213a49 4610 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4611 'channel': 'Igor Kleiner',
4612 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4613 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4614 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4615 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4616 'channel_follower_count': int
a6213a49 4617 },
4618 }, {
4619 'note': 'playlists, multipage, different order',
4620 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4621 'playlist_mincount': 94,
4622 'info_dict': {
4623 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4624 'title': 'Igor Kleiner - Playlists',
a6213a49 4625 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4626 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4627 'uploader': 'Igor Kleiner',
4628 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4629 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4630 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4631 'channel': 'Igor Kleiner',
4632 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4633 'channel_follower_count': int
a6213a49 4634 },
4635 }, {
4636 'note': 'playlists, series',
4637 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4638 'playlist_mincount': 5,
4639 'info_dict': {
4640 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4641 'title': '3Blue1Brown - Playlists',
4642 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4643 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4644 'uploader': '3Blue1Brown',
976ae3ea 4645 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4646 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4647 'channel': '3Blue1Brown',
4648 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4649 'tags': ['Mathematics'],
6c73052c 4650 'channel_follower_count': int
a6213a49 4651 },
4652 }, {
4653 'note': 'playlists, singlepage',
4654 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4655 'playlist_mincount': 4,
4656 'info_dict': {
4657 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4658 'title': 'ThirstForScience - Playlists',
4659 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4660 'uploader': 'ThirstForScience',
4661 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4662 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4663 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4664 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4665 'tags': 'count:13',
4666 'channel': 'ThirstForScience',
6c73052c 4667 'channel_follower_count': int
a6213a49 4668 }
4669 }, {
4670 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4671 'only_matching': True,
4672 }, {
4673 'note': 'basic, single video playlist',
4674 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4675 'info_dict': {
4676 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4677 'uploader': 'Sergey M.',
4678 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4679 'title': 'youtube-dl public playlist',
976ae3ea 4680 'description': '',
4681 'tags': [],
4682 'view_count': int,
4683 'modified_date': '20201130',
4684 'channel': 'Sergey M.',
4685 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4686 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4687 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4688 },
4689 'playlist_count': 1,
4690 }, {
4691 'note': 'empty playlist',
4692 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4693 'info_dict': {
4694 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4695 'uploader': 'Sergey M.',
4696 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4697 'title': 'youtube-dl empty playlist',
976ae3ea 4698 'tags': [],
4699 'channel': 'Sergey M.',
4700 'description': '',
4701 'modified_date': '20160902',
4702 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4703 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4704 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4705 },
4706 'playlist_count': 0,
4707 }, {
4708 'note': 'Home tab',
4709 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4710 'info_dict': {
4711 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4712 'title': 'lex will - Home',
4713 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4714 'uploader': 'lex will',
4715 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4716 'channel': 'lex will',
4717 'tags': ['bible', 'history', 'prophesy'],
4718 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4719 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4720 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4721 'channel_follower_count': int
a6213a49 4722 },
4723 'playlist_mincount': 2,
4724 }, {
4725 'note': 'Videos tab',
4726 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4727 'info_dict': {
4728 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4729 'title': 'lex will - Videos',
4730 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4731 'uploader': 'lex will',
4732 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4733 'tags': ['bible', 'history', 'prophesy'],
4734 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4735 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4736 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4737 'channel': 'lex will',
6c73052c 4738 'channel_follower_count': int
a6213a49 4739 },
4740 'playlist_mincount': 975,
4741 }, {
4742 'note': 'Videos tab, sorted by popular',
4743 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4744 'info_dict': {
4745 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4746 'title': 'lex will - Videos',
4747 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4748 'uploader': 'lex will',
4749 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4750 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4751 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4752 'channel': 'lex will',
4753 'tags': ['bible', 'history', 'prophesy'],
4754 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4755 'channel_follower_count': int
a6213a49 4756 },
4757 'playlist_mincount': 199,
4758 }, {
4759 'note': 'Playlists tab',
4760 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4761 'info_dict': {
4762 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4763 'title': 'lex will - Playlists',
4764 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4765 'uploader': 'lex will',
4766 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4767 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4768 'channel': 'lex will',
4769 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4770 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4771 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4772 'channel_follower_count': int
a6213a49 4773 },
4774 'playlist_mincount': 17,
4775 }, {
4776 'note': 'Community tab',
4777 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4778 'info_dict': {
4779 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4780 'title': 'lex will - Community',
4781 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4782 'uploader': 'lex will',
4783 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4784 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4785 'channel': 'lex will',
4786 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4787 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4788 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4789 'channel_follower_count': int
a6213a49 4790 },
4791 'playlist_mincount': 18,
4792 }, {
4793 'note': 'Channels tab',
4794 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4795 'info_dict': {
4796 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4797 'title': 'lex will - Channels',
4798 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4799 'uploader': 'lex will',
4800 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4801 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4802 'channel': 'lex will',
4803 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4804 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4805 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4806 'channel_follower_count': int
a6213a49 4807 },
4808 'playlist_mincount': 12,
4809 }, {
4810 'note': 'Search tab',
4811 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4812 'playlist_mincount': 40,
4813 'info_dict': {
4814 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4815 'title': '3Blue1Brown - Search - linear algebra',
4816 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4817 'uploader': '3Blue1Brown',
4818 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4819 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4820 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4821 'tags': ['Mathematics'],
4822 'channel': '3Blue1Brown',
4823 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4824 'channel_follower_count': int
a6213a49 4825 },
4826 }, {
4827 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4828 'only_matching': True,
4829 }, {
4830 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4831 'only_matching': True,
4832 }, {
4833 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4834 'only_matching': True,
4835 }, {
4836 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4837 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4838 'info_dict': {
4839 'title': '29C3: Not my department',
4840 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4841 'uploader': 'Christiaan008',
4842 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4843 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4844 'tags': [],
4845 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4846 'view_count': int,
4847 'modified_date': '20150605',
4848 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4849 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4850 'channel': 'Christiaan008',
a6213a49 4851 },
4852 'playlist_count': 96,
4853 }, {
4854 'note': 'Large playlist',
4855 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4856 'info_dict': {
4857 'title': 'Uploads from Cauchemar',
4858 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4859 'uploader': 'Cauchemar',
4860 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4861 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4862 'tags': [],
4863 'modified_date': r're:\d{8}',
4864 'channel': 'Cauchemar',
4865 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4866 'view_count': int,
4867 'description': '',
4868 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4869 },
4870 'playlist_mincount': 1123,
976ae3ea 4871 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4872 }, {
4873 'note': 'even larger playlist, 8832 videos',
4874 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4875 'only_matching': True,
4876 }, {
4877 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4878 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4879 'info_dict': {
4880 'title': 'Uploads from Interstellar Movie',
4881 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4882 'uploader': 'Interstellar Movie',
4883 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4884 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4885 'tags': [],
4886 'view_count': int,
4887 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4888 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4889 'channel': 'Interstellar Movie',
4890 'description': '',
4891 'modified_date': r're:\d{8}',
a6213a49 4892 },
4893 'playlist_mincount': 21,
4894 }, {
4895 'note': 'Playlist with "show unavailable videos" button',
4896 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4897 'info_dict': {
4898 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4899 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4900 'uploader': 'Phim Siêu Nhân Nhật Bản',
4901 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4902 'view_count': int,
4903 'channel': 'Phim Siêu Nhân Nhật Bản',
4904 'tags': [],
4905 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4906 'description': '',
4907 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4908 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4909 'modified_date': r're:\d{8}',
a6213a49 4910 },
4911 'playlist_mincount': 200,
976ae3ea 4912 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4913 }, {
4914 'note': 'Playlist with unavailable videos in page 7',
4915 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4916 'info_dict': {
4917 'title': 'Uploads from BlankTV',
4918 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4919 'uploader': 'BlankTV',
4920 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 4921 'channel': 'BlankTV',
4922 'channel_url': 'https://www.youtube.com/c/blanktv',
4923 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4924 'view_count': int,
4925 'tags': [],
4926 'uploader_url': 'https://www.youtube.com/c/blanktv',
4927 'modified_date': r're:\d{8}',
4928 'description': '',
a6213a49 4929 },
4930 'playlist_mincount': 1000,
976ae3ea 4931 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4932 }, {
4933 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4934 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4935 'info_dict': {
4936 'title': 'Data Analysis with Dr Mike Pound',
4937 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4938 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4939 'uploader': 'Computerphile',
4940 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 4941 'uploader_url': 'https://www.youtube.com/user/Computerphile',
4942 'tags': [],
4943 'view_count': int,
4944 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4945 'channel_url': 'https://www.youtube.com/user/Computerphile',
4946 'channel': 'Computerphile',
a6213a49 4947 },
4948 'playlist_mincount': 11,
4949 }, {
4950 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4951 'only_matching': True,
4952 }, {
4953 'note': 'Playlist URL that does not actually serve a playlist',
4954 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4955 'info_dict': {
4956 'id': 'FqZTN594JQw',
4957 'ext': 'webm',
4958 'title': "Smiley's People 01 detective, Adventure Series, Action",
4959 'uploader': 'STREEM',
4960 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4961 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4962 'upload_date': '20150526',
4963 'license': 'Standard YouTube License',
4964 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4965 'categories': ['People & Blogs'],
4966 'tags': list,
4967 'view_count': int,
4968 'like_count': int,
a6213a49 4969 },
4970 'params': {
4971 'skip_download': True,
4972 },
4973 'skip': 'This video is not available.',
4974 'add_ie': [YoutubeIE.ie_key()],
4975 }, {
4976 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4977 'only_matching': True,
4978 }, {
4979 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4980 'only_matching': True,
4981 }, {
4982 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4983 'info_dict': {
6c73052c 4984 'id': 'GgL890LIznQ', # This will keep changing
a6213a49 4985 'ext': 'mp4',
976ae3ea 4986 'title': str,
a6213a49 4987 'uploader': 'Sky News',
4988 'uploader_id': 'skynews',
4989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4990 'upload_date': r're:\d{8}',
976ae3ea 4991 'description': str,
a6213a49 4992 'categories': ['News & Politics'],
4993 'tags': list,
4994 'like_count': int,
6c73052c 4995 'release_timestamp': 1642502819,
976ae3ea 4996 'channel': 'Sky News',
4997 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4998 'age_limit': 0,
4999 'view_count': int,
6c73052c 5000 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5001 'playable_in_embed': True,
6c73052c 5002 'release_date': '20220118',
976ae3ea 5003 'availability': 'public',
5004 'live_status': 'is_live',
5005 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5006 'channel_follower_count': int
a6213a49 5007 },
5008 'params': {
5009 'skip_download': True,
5010 },
976ae3ea 5011 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5012 }, {
5013 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5014 'info_dict': {
5015 'id': 'a48o2S1cPoo',
5016 'ext': 'mp4',
5017 'title': 'The Young Turks - Live Main Show',
5018 'uploader': 'The Young Turks',
5019 'uploader_id': 'TheYoungTurks',
5020 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5021 'upload_date': '20150715',
5022 'license': 'Standard YouTube License',
5023 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5024 'categories': ['News & Politics'],
5025 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5026 'like_count': int,
a6213a49 5027 },
5028 'params': {
5029 'skip_download': True,
5030 },
5031 'only_matching': True,
5032 }, {
5033 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5034 'only_matching': True,
5035 }, {
5036 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5037 'only_matching': True,
5038 }, {
5039 'note': 'A channel that is not live. Should raise error',
5040 'url': 'https://www.youtube.com/user/numberphile/live',
5041 'only_matching': True,
5042 }, {
5043 'url': 'https://www.youtube.com/feed/trending',
5044 'only_matching': True,
5045 }, {
5046 'url': 'https://www.youtube.com/feed/library',
5047 'only_matching': True,
5048 }, {
5049 'url': 'https://www.youtube.com/feed/history',
5050 'only_matching': True,
5051 }, {
5052 'url': 'https://www.youtube.com/feed/subscriptions',
5053 'only_matching': True,
5054 }, {
5055 'url': 'https://www.youtube.com/feed/watch_later',
5056 'only_matching': True,
5057 }, {
5058 'note': 'Recommended - redirects to home page.',
5059 'url': 'https://www.youtube.com/feed/recommended',
5060 'only_matching': True,
5061 }, {
5062 'note': 'inline playlist with not always working continuations',
5063 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5064 'only_matching': True,
5065 }, {
5066 'url': 'https://www.youtube.com/course',
5067 'only_matching': True,
5068 }, {
5069 'url': 'https://www.youtube.com/zsecurity',
5070 'only_matching': True,
5071 }, {
5072 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5073 'only_matching': True,
5074 }, {
5075 'url': 'https://www.youtube.com/TheYoungTurks/live',
5076 'only_matching': True,
5077 }, {
5078 'url': 'https://www.youtube.com/hashtag/cctv9',
5079 'info_dict': {
5080 'id': 'cctv9',
5081 'title': '#cctv9',
976ae3ea 5082 'tags': [],
a6213a49 5083 },
5084 'playlist_mincount': 350,
5085 }, {
5086 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5087 'only_matching': True,
5088 }, {
5089 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5090 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5091 'only_matching': True
5092 }, {
5093 'note': '/browse/ should redirect to /channel/',
5094 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5095 'only_matching': True
5096 }, {
5097 'note': 'VLPL, should redirect to playlist?list=PL...',
5098 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5099 'info_dict': {
5100 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5101 'uploader': 'NoCopyrightSounds',
5102 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5103 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5104 'title': 'NCS Releases',
976ae3ea 5105 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5106 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5107 'modified_date': r're:\d{8}',
5108 'view_count': int,
5109 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5110 'tags': [],
5111 'channel': 'NoCopyrightSounds',
a6213a49 5112 },
5113 'playlist_mincount': 166,
976ae3ea 5114 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5115 }, {
5116 'note': 'Topic, should redirect to playlist?list=UU...',
5117 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5118 'info_dict': {
5119 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5120 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5121 'title': 'Uploads from Royalty Free Music - Topic',
5122 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5123 'tags': [],
5124 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5125 'channel': 'Royalty Free Music - Topic',
5126 'view_count': int,
5127 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5128 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5129 'modified_date': r're:\d{8}',
5130 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5131 'description': '',
a6213a49 5132 },
5133 'expected_warnings': [
a6213a49 5134 'The URL does not have a videos tab',
976ae3ea 5135 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5136 ],
5137 'playlist_mincount': 101,
5138 }, {
5139 'note': 'Topic without a UU playlist',
5140 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5141 'info_dict': {
5142 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5143 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5144 'tags': [],
a6213a49 5145 },
5146 'expected_warnings': [
976ae3ea 5147 'the playlist redirect gave error',
a6213a49 5148 ],
5149 'playlist_mincount': 9,
5150 }, {
5151 'note': 'Youtube music Album',
5152 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5153 'info_dict': {
5154 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5155 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5156 'tags': [],
5157 'view_count': int,
5158 'description': '',
5159 'availability': 'unlisted',
5160 'modified_date': r're:\d{8}',
a6213a49 5161 },
5162 'playlist_count': 50,
5163 }, {
5164 'note': 'unlisted single video playlist',
5165 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5166 'info_dict': {
5167 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5168 'uploader': 'colethedj',
5169 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5170 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5171 'availability': 'unlisted',
5172 'tags': [],
5173 'modified_date': '20211208',
5174 'channel': 'colethedj',
5175 'view_count': int,
5176 'description': '',
5177 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5178 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5179 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5180 },
5181 'playlist_count': 1,
5182 }, {
5183 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5184 'url': 'https://www.youtube.com/feed/recommended',
5185 'info_dict': {
5186 'id': 'recommended',
5187 'title': 'recommended',
6c73052c 5188 'tags': [],
a6213a49 5189 },
5190 'playlist_mincount': 50,
5191 'params': {
5192 'skip_download': True,
5193 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5194 },
5195 }, {
5196 'note': 'API Fallback: /videos tab, sorted by oldest first',
5197 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5198 'info_dict': {
5199 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5200 'title': 'Cody\'sLab - Videos',
5201 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5202 'uploader': 'Cody\'sLab',
5203 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5204 'channel': 'Cody\'sLab',
5205 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5206 'tags': [],
5207 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5208 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5209 'channel_follower_count': int
a6213a49 5210 },
5211 'playlist_mincount': 650,
5212 'params': {
5213 'skip_download': True,
5214 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5215 },
5216 }, {
5217 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5218 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5219 'info_dict': {
5220 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5221 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5222 'title': 'Uploads from Royalty Free Music - Topic',
5223 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5224 'modified_date': r're:\d{8}',
5225 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5226 'description': '',
5227 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5228 'tags': [],
5229 'channel': 'Royalty Free Music - Topic',
5230 'view_count': int,
5231 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5232 },
5233 'expected_warnings': [
976ae3ea 5234 'does not have a videos tab',
5235 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5236 ],
5237 'playlist_mincount': 101,
5238 'params': {
5239 'skip_download': True,
5240 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5241 },
7c219ea6 5242 }, {
5243 'note': 'non-standard redirect to regional channel',
5244 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5245 'only_matching': True
61d3665d 5246 }, {
5247 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5248 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5249 'info_dict': {
5250 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5251 'modified_date': '20220407',
5252 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5253 'tags': [],
5254 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5255 'uploader': 'pukkandan',
5256 'availability': 'unlisted',
5257 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5258 'channel': 'pukkandan',
5259 'description': 'Test for collaborative playlist',
5260 'title': 'yt-dlp test - collaborative playlist',
5261 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5262 },
5263 'playlist_mincount': 2
a6213a49 5264 }]
5265
5266 @classmethod
5267 def suitable(cls, url):
86e5f3ed 5268 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5269
64f36541 5270 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5271
182bda88 5272 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5273 def _real_extract(self, url, smuggled_data):
cd7c66cf 5274 item_id = self._match_id(url)
5275 url = compat_urlparse.urlunparse(
5276 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5277 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5278
fe03a6cd 5279 def get_mobj(url):
37e57a9f 5280 mobj = self._URL_RE.match(url).groupdict()
07cce701 5281 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5282 return mobj
5283
37e57a9f 5284 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5285 # Youtube returns incomplete data if tabname is not lower case
5286 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5287 if is_channel:
5288 if smuggled_data.get('is_music_url'):
37e57a9f 5289 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5290 item_id = item_id[2:]
37e57a9f 5291 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5292 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5293 mdata = self._extract_tab_endpoint(
37e57a9f 5294 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5295 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5296 get_all=False, expected_type=compat_str)
ac56cf38 5297 if not murl:
37e57a9f 5298 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5299 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5300 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5301 pre = f'https://www.youtube.com/channel/{item_id}'
5302
64f36541 5303 original_tab_name = tab
fe03a6cd 5304 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5305 # Home URLs should redirect to /videos/
37e57a9f 5306 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5307 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5308 tab = '/videos'
5309
5310 url = ''.join((pre, tab, post))
5311 mobj = get_mobj(url)
cd7c66cf 5312
5313 # Handle both video/playlist URLs
201c1459 5314 qs = parse_qs(url)
86e5f3ed 5315 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5316
fe03a6cd 5317 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5318 if not playlist_id:
fe03a6cd 5319 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5320 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5321 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5322 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5323 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5324 mobj = get_mobj(url)
cd7c66cf 5325
5326 if video_id and playlist_id:
a06916d9 5327 if self.get_param('noplaylist'):
37e57a9f 5328 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5329 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5330 ie=YoutubeIE.ie_key(), video_id=video_id)
5331 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5332
ac56cf38 5333 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5334
7c219ea6 5335 # YouTube may provide a non-standard redirect to the regional channel
5336 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5337 redirect_url = traverse_obj(
5338 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5339 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5340 redirect_url = ''.join((
5341 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5342 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5343 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5344
37e57a9f 5345 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5346 if tabs:
5347 selected_tab = self._extract_selected_tab(tabs)
64f36541 5348 selected_tab_name = selected_tab.get('title', '').lower()
5349 if selected_tab_name == 'home':
5350 selected_tab_name = 'featured'
5351 requested_tab_name = mobj['tab'][1:]
09f1580e 5352 if 'no-youtube-channel-redirect' not in compat_opts:
64f36541 5353 if requested_tab_name == 'live':
09f1580e 5354 # Live tab should have redirected to the video
5355 raise ExtractorError('The channel is not currently live', expected=True)
64f36541 5356 if requested_tab_name not in ('', selected_tab_name):
5357 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5358 if not original_tab_name:
5359 if item_id[:2] == 'UC':
5360 # Topic channels don't have /videos. Use the equivalent playlist instead
5361 pl_id = f'UU{item_id[2:]}'
5362 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5363 try:
5364 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5365 except ExtractorError:
5366 redirect_warning += ' and the playlist redirect gave error'
5367 else:
5368 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5369 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5370 if selected_tab_name and selected_tab_name != requested_tab_name:
5371 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5372 else:
5373 raise ExtractorError(redirect_warning, expected=True)
18db7548 5374
37e57a9f 5375 if redirect_warning:
64f36541 5376 self.to_screen(redirect_warning)
37e57a9f 5377 self.write_debug(f'Final URL: {url}')
18db7548 5378
358de58c 5379 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5380 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5381 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5382 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5383 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5384 if tabs:
ac56cf38 5385 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5386
37e57a9f 5387 playlist = traverse_obj(
5388 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5389 if playlist:
ac56cf38 5390 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5391
37e57a9f 5392 video_id = traverse_obj(
5393 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5394 if video_id:
09f1580e 5395 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5396 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5397 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5398 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5399
8bdd16b4 5400 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5401
c5e8d7af 5402
8bdd16b4 5403class YoutubePlaylistIE(InfoExtractor):
96565c7e 5404 IE_DESC = 'YouTube playlists'
8bdd16b4 5405 _VALID_URL = r'''(?x)(?:
5406 (?:https?://)?
5407 (?:\w+\.)?
5408 (?:
5409 (?:
5410 youtube(?:kids)?\.com|
d9190e44 5411 %(invidious)s
8bdd16b4 5412 )
5413 /.*?\?.*?\blist=
5414 )?
5415 (?P<id>%(playlist_id)s)
d9190e44
RH
5416 )''' % {
5417 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5418 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5419 }
8bdd16b4 5420 IE_NAME = 'youtube:playlist'
cdc628a4 5421 _TESTS = [{
8bdd16b4 5422 'note': 'issue #673',
5423 'url': 'PLBB231211A4F62143',
cdc628a4 5424 'info_dict': {
8bdd16b4 5425 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5426 'id': 'PLBB231211A4F62143',
976ae3ea 5427 'uploader': 'Wickman',
8bdd16b4 5428 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5429 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5430 'view_count': int,
5431 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5432 'modified_date': r're:\d{8}',
5433 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5434 'channel': 'Wickman',
5435 'tags': [],
5436 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5437 },
5438 'playlist_mincount': 29,
5439 }, {
5440 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5441 'info_dict': {
5442 'title': 'YDL_safe_search',
5443 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5444 },
5445 'playlist_count': 2,
5446 'skip': 'This playlist is private',
9558dcec 5447 }, {
8bdd16b4 5448 'note': 'embedded',
5449 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5450 'playlist_count': 4,
9558dcec 5451 'info_dict': {
8bdd16b4 5452 'title': 'JODA15',
5453 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5454 'uploader': 'milan',
5455 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5456 'description': '',
5457 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5458 'tags': [],
5459 'modified_date': '20140919',
5460 'view_count': int,
5461 'channel': 'milan',
5462 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5463 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5464 },
5465 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5466 }, {
8bdd16b4 5467 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 5468 'playlist_mincount': 654,
8bdd16b4 5469 'info_dict': {
5470 'title': '2018 Chinese New Singles (11/6 updated)',
5471 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5472 'uploader': 'LBK',
5473 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5474 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5475 'channel': 'LBK',
5476 'view_count': int,
5477 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5478 'tags': [],
5479 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5480 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5481 'modified_date': r're:\d{8}',
5482 },
5483 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5484 }, {
29f7c58a 5485 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5486 'only_matching': True,
5487 }, {
5488 # music album playlist
5489 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5490 'only_matching': True,
5491 }]
5492
5493 @classmethod
5494 def suitable(cls, url):
201c1459 5495 if YoutubeTabIE.suitable(url):
5496 return False
49a57e70 5497 from ..utils import parse_qs
201c1459 5498 qs = parse_qs(url)
5499 if qs.get('v', [None])[0]:
5500 return False
86e5f3ed 5501 return super().suitable(url)
29f7c58a 5502
5503 def _real_extract(self, url):
5504 playlist_id = self._match_id(url)
46953e7e 5505 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5506 url = update_url_query(
5507 'https://www.youtube.com/playlist',
5508 parse_qs(url) or {'list': playlist_id})
5509 if is_music_url:
5510 url = smuggle_url(url, {'is_music_url': True})
5511 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5512
5513
5514class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5515 IE_DESC = 'youtu.be'
29f7c58a 5516 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5517 _TESTS = [{
8bdd16b4 5518 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5519 'info_dict': {
5520 'id': 'yeWKywCrFtk',
5521 'ext': 'mp4',
5522 'title': 'Small Scale Baler and Braiding Rugs',
5523 'uploader': 'Backus-Page House Museum',
5524 'uploader_id': 'backuspagemuseum',
5525 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5526 'upload_date': '20161008',
5527 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5528 'categories': ['Nonprofits & Activism'],
5529 'tags': list,
5530 'like_count': int,
976ae3ea 5531 'age_limit': 0,
5532 'playable_in_embed': True,
5533 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5534 'channel': 'Backus-Page House Museum',
5535 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5536 'live_status': 'not_live',
5537 'view_count': int,
5538 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5539 'availability': 'public',
5540 'duration': 59,
8bdd16b4 5541 },
5542 'params': {
5543 'noplaylist': True,
5544 'skip_download': True,
5545 },
39e7107d 5546 }, {
8bdd16b4 5547 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5548 'only_matching': True,
cdc628a4
PH
5549 }]
5550
8bdd16b4 5551 def _real_extract(self, url):
5ad28e7f 5552 mobj = self._match_valid_url(url)
29f7c58a 5553 video_id = mobj.group('id')
5554 playlist_id = mobj.group('playlist_id')
8bdd16b4 5555 return self.url_result(
29f7c58a 5556 update_url_query('https://www.youtube.com/watch', {
5557 'v': video_id,
5558 'list': playlist_id,
5559 'feature': 'youtu.be',
5560 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5561
5562
b6ce9bb0 5563class YoutubeLivestreamEmbedIE(InfoExtractor):
5564 IE_DESC = 'YouTube livestream embeds'
5565 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5566 _TESTS = [{
5567 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5568 'only_matching': True,
5569 }]
5570
5571 def _real_extract(self, url):
5572 channel_id = self._match_id(url)
5573 return self.url_result(
5574 f'https://www.youtube.com/channel/{channel_id}/live',
5575 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5576
5577
8bdd16b4 5578class YoutubeYtUserIE(InfoExtractor):
96565c7e 5579 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5580 IE_NAME = 'youtube:user'
8bdd16b4 5581 _VALID_URL = r'ytuser:(?P<id>.+)'
5582 _TESTS = [{
5583 'url': 'ytuser:phihag',
5584 'only_matching': True,
5585 }]
5586
5587 def _real_extract(self, url):
5588 user_id = self._match_id(url)
5589 return self.url_result(
c586f9e8 5590 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5591 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5592
b05654f0 5593
3d3dddc9 5594class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5595 IE_NAME = 'youtube:favorites'
96565c7e 5596 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5597 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5598 _LOGIN_REQUIRED = True
5599 _TESTS = [{
5600 'url': ':ytfav',
5601 'only_matching': True,
5602 }, {
5603 'url': ':ytfavorites',
5604 'only_matching': True,
5605 }]
5606
5607 def _real_extract(self, url):
5608 return self.url_result(
5609 'https://www.youtube.com/playlist?list=LL',
5610 ie=YoutubeTabIE.ie_key())
5611
5612
ca5300c7 5613class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5614 IE_NAME = 'youtube:notif'
5615 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5616 _VALID_URL = r':ytnotif(?:ication)?s?'
5617 _LOGIN_REQUIRED = True
5618 _TESTS = [{
5619 'url': ':ytnotif',
5620 'only_matching': True,
5621 }, {
5622 'url': ':ytnotifications',
5623 'only_matching': True,
5624 }]
5625
5626 def _extract_notification_menu(self, response, continuation_list):
5627 notification_list = traverse_obj(
5628 response,
5629 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5630 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5631 expected_type=list) or []
5632 continuation_list[0] = None
5633 for item in notification_list:
5634 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5635 if entry:
5636 yield entry
5637 continuation = item.get('continuationItemRenderer')
5638 if continuation:
5639 continuation_list[0] = continuation
5640
5641 def _extract_notification_renderer(self, notification):
5642 video_id = traverse_obj(
5643 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5644 url = f'https://www.youtube.com/watch?v={video_id}'
5645 channel_id = None
5646 if not video_id:
5647 browse_ep = traverse_obj(
5648 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5649 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5650 post_id = self._search_regex(
5651 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5652 'post id', default=None)
5653 if not channel_id or not post_id:
5654 return
5655 # The direct /post url redirects to this in the browser
5656 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5657
5658 channel = traverse_obj(
5659 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5660 expected_type=str)
c7a7baaa 5661 notification_title = self._get_text(notification, 'shortMessage')
5662 if notification_title:
5663 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5664 # TODO: handle recommended videos
ca5300c7 5665 title = self._search_regex(
c7a7baaa 5666 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5667 'video title', default=None)
ca5300c7 5668 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5669 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5670 else None)
5671 return {
5672 '_type': 'url',
5673 'url': url,
5674 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5675 'video_id': video_id,
5676 'title': title,
5677 'channel_id': channel_id,
5678 'channel': channel,
5679 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5680 'upload_date': upload_date,
5681 }
5682
5683 def _notification_menu_entries(self, ytcfg):
5684 continuation_list = [None]
5685 response = None
5686 for page in itertools.count(1):
5687 ctoken = traverse_obj(
5688 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5689 response = self._extract_response(
5690 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5691 ep='notification/get_notification_menu', check_get_keys='actions',
5692 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5693 yield from self._extract_notification_menu(response, continuation_list)
5694 if not continuation_list[0]:
5695 break
5696
5697 def _real_extract(self, url):
5698 display_id = 'notifications'
5699 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5700 self._report_playlist_authcheck(ytcfg)
5701 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5702
5703
a6213a49 5704class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5705 IE_DESC = 'YouTube search'
78caa52a 5706 IE_NAME = 'youtube:search'
b05654f0 5707 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5708 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5709 _TESTS = [{
5710 'url': 'ytsearch5:youtube-dl test video',
5711 'playlist_count': 5,
5712 'info_dict': {
5713 'id': 'youtube-dl test video',
5714 'title': 'youtube-dl test video',
5715 }
5716 }]
b05654f0 5717
a61fd4cf 5718
5f7cb91a 5719class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5720 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5721 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5722 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5723 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5724 _TESTS = [{
5725 'url': 'ytsearchdate5:youtube-dl test video',
5726 'playlist_count': 5,
5727 'info_dict': {
5728 'id': 'youtube-dl test video',
5729 'title': 'youtube-dl test video',
5730 }
5731 }]
75dff0ee 5732
c9ae7b95 5733
a6213a49 5734class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5735 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5736 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5737 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5738 _TESTS = [{
5739 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5740 'playlist_mincount': 5,
5741 'info_dict': {
11f9be09 5742 'id': 'youtube-dl test video',
3462ffa8 5743 'title': 'youtube-dl test video',
5744 }
a61fd4cf 5745 }, {
5746 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5747 'playlist_mincount': 5,
5748 'info_dict': {
5749 'id': 'python',
5750 'title': 'python',
5751 }
ad210f4f 5752 }, {
5753 'url': 'https://www.youtube.com/results?search_query=%23cats',
5754 'playlist_mincount': 1,
5755 'info_dict': {
5756 'id': '#cats',
5757 'title': '#cats',
5758 'entries': [{
5759 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5760 'title': '#cats',
5761 }],
5762 },
3462ffa8 5763 }, {
5764 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5765 'only_matching': True,
5766 }]
5767
5768 def _real_extract(self, url):
4dfbf869 5769 qs = parse_qs(url)
386e1dd9 5770 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5771 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5772
5773
16aa9ea4 5774class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
455a15e2 5775 IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
16aa9ea4 5776 IE_NAME = 'youtube:music:search_url'
5777 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5778 _TESTS = [{
5779 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5780 'playlist_count': 16,
5781 'info_dict': {
5782 'id': 'royalty free music',
5783 'title': 'royalty free music',
5784 }
5785 }, {
5786 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5787 'playlist_mincount': 30,
5788 'info_dict': {
5789 'id': 'royalty free music - songs',
5790 'title': 'royalty free music - songs',
5791 },
5792 'params': {'extract_flat': 'in_playlist'}
5793 }, {
5794 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5795 'playlist_mincount': 30,
5796 'info_dict': {
5797 'id': 'royalty free music - community playlists',
5798 'title': 'royalty free music - community playlists',
5799 },
5800 'params': {'extract_flat': 'in_playlist'}
5801 }]
5802
5803 _SECTIONS = {
5804 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5805 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5806 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5807 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5808 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5809 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5810 }
5811
5812 def _real_extract(self, url):
5813 qs = parse_qs(url)
5814 query = (qs.get('search_query') or qs.get('q'))[0]
5815 params = qs.get('sp', (None,))[0]
5816 if params:
5817 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5818 else:
5819 section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
5820 params = self._SECTIONS.get(section)
5821 if not params:
5822 section = None
5823 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5824 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5825
5826
182bda88 5827class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5828 """
25f14e9f 5829 Base class for feed extractors
82d02080 5830 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5831 """
b2e8bc1b 5832 _LOGIN_REQUIRED = True
82d02080 5833 _FEED_NAME = 'feeds'
a25bca9f 5834
5835 def _real_initialize(self):
5836 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5837
82d02080 5838 @classproperty
d7ae0639 5839 def IE_NAME(self):
82d02080 5840 return f'youtube:{self._FEED_NAME}'
04cc9617 5841
3853309f 5842 def _real_extract(self, url):
3d3dddc9 5843 return self.url_result(
182bda88 5844 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5845
5846
ef2f3c7f 5847class YoutubeWatchLaterIE(InfoExtractor):
5848 IE_NAME = 'youtube:watchlater'
96565c7e 5849 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5850 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5851 _TESTS = [{
8bdd16b4 5852 'url': ':ytwatchlater',
bc7a9cd8
S
5853 'only_matching': True,
5854 }]
25f14e9f
S
5855
5856 def _real_extract(self, url):
ef2f3c7f 5857 return self.url_result(
5858 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5859
5860
25f14e9f 5861class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5862 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5863 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5864 _FEED_NAME = 'recommended'
45db527f 5865 _LOGIN_REQUIRED = False
3d3dddc9 5866 _TESTS = [{
5867 'url': ':ytrec',
5868 'only_matching': True,
5869 }, {
5870 'url': ':ytrecommended',
5871 'only_matching': True,
5872 }, {
5873 'url': 'https://youtube.com',
5874 'only_matching': True,
5875 }]
1ed5b5c9 5876
1ed5b5c9 5877
25f14e9f 5878class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5879 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5880 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5881 _FEED_NAME = 'subscriptions'
3d3dddc9 5882 _TESTS = [{
5883 'url': ':ytsubs',
5884 'only_matching': True,
5885 }, {
5886 'url': ':ytsubscriptions',
5887 'only_matching': True,
5888 }]
1ed5b5c9 5889
1ed5b5c9 5890
25f14e9f 5891class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5892 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5893 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5894 _FEED_NAME = 'history'
3d3dddc9 5895 _TESTS = [{
5896 'url': ':ythistory',
5897 'only_matching': True,
5898 }]
1ed5b5c9
JMF
5899
5900
6e634cbe 5901class YoutubeStoriesIE(InfoExtractor):
5902 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
5903 IE_NAME = 'youtube:stories'
5904 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
5905 _TESTS = [{
5906 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
5907 'only_matching': True,
5908 }]
5909
5910 def _real_extract(self, url):
5911 playlist_id = f'RLTD{self._match_id(url)}'
5912 return self.url_result(
5913 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
5914 ie=YoutubeTabIE, video_id=playlist_id)
5915
5916
15870e90
PH
5917class YoutubeTruncatedURLIE(InfoExtractor):
5918 IE_NAME = 'youtube:truncated_url'
5919 IE_DESC = False # Do not list
975d35db 5920 _VALID_URL = r'''(?x)
b95aab84
PH
5921 (?:https?://)?
5922 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5923 (?:watch\?(?:
c4808c60 5924 feature=[a-z_]+|
b95aab84
PH
5925 annotation_id=annotation_[^&]+|
5926 x-yt-cl=[0-9]+|
c1708b89 5927 hl=[^&]*|
287be8c6 5928 t=[0-9]+
b95aab84
PH
5929 )?
5930 |
5931 attribution_link\?a=[^&]+
5932 )
5933 $
975d35db 5934 '''
15870e90 5935
c4808c60 5936 _TESTS = [{
2d3d2997 5937 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 5938 'only_matching': True,
dc2fc736 5939 }, {
2d3d2997 5940 'url': 'https://www.youtube.com/watch?',
dc2fc736 5941 'only_matching': True,
b95aab84
PH
5942 }, {
5943 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5944 'only_matching': True,
5945 }, {
5946 'url': 'https://www.youtube.com/watch?feature=foo',
5947 'only_matching': True,
c1708b89
PH
5948 }, {
5949 'url': 'https://www.youtube.com/watch?hl=en-GB',
5950 'only_matching': True,
287be8c6
PH
5951 }, {
5952 'url': 'https://www.youtube.com/watch?t=2372',
5953 'only_matching': True,
c4808c60
PH
5954 }]
5955
15870e90
PH
5956 def _real_extract(self, url):
5957 raise ExtractorError(
78caa52a
PH
5958 'Did you forget to quote the URL? Remember that & is a meta '
5959 'character in most shells, so you want to put the URL in quotes, '
3867038a 5960 'like youtube-dl '
2d3d2997 5961 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 5962 ' or simply youtube-dl BaW_jenozKc .',
15870e90 5963 expected=True)
772fd5cc
PH
5964
5965
3cd786db 5966class YoutubeClipIE(InfoExtractor):
5967 IE_NAME = 'youtube:clip'
5968 IE_DESC = False # Do not list
5969 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5970
5971 def _real_extract(self, url):
5972 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5973 return self.url_result(url, 'Generic')
5974
5975
772fd5cc
PH
5976class YoutubeTruncatedIDIE(InfoExtractor):
5977 IE_NAME = 'youtube:truncated_id'
5978 IE_DESC = False # Do not list
b95aab84 5979 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
5980
5981 _TESTS = [{
5982 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5983 'only_matching': True,
5984 }]
5985
5986 def _real_extract(self, url):
5987 video_id = self._match_id(url)
5988 raise ExtractorError(
86e5f3ed 5989 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 5990 expected=True)