]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor] Use classmethod/property where possible
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
adbc4ec4 5import functools
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
720c3099 9import math
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
46383212 13import sys
f8271158 14import threading
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
f8271158 31 NO_DEFAULT,
32 ExtractorError,
720c3099 33 bug_reports_message,
82d02080 34 classproperty,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
ff91cf74 41 get_first,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
48416bc4 45 js_to_json,
94278f72 46 mimetype2ext,
9c0d7f49 47 network_exceptions,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
f0d785d3 65 unified_timestamp,
cf7e015f 66 unsmuggle_url,
8bdd16b4 67 update_url_query,
21c340b8 68 url_or_none,
fe93e2c4 69 urljoin,
7c365c21 70 variadic,
c5e8d7af
PH
71)
72
000c15a4 73# any clients starting with _ cannot be explicity requested by the user
74INNERTUBE_CLIENTS = {
75 'web': {
76 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
77 'INNERTUBE_CONTEXT': {
78 'client': {
79 'clientName': 'WEB',
18c7683d 80 'clientVersion': '2.20211221.00.00',
000c15a4 81 }
82 },
83 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
84 },
85 'web_embedded': {
86 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
87 'INNERTUBE_CONTEXT': {
88 'client': {
89 'clientName': 'WEB_EMBEDDED_PLAYER',
18c7683d 90 'clientVersion': '1.20211215.00.01',
000c15a4 91 },
92 },
93 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
94 },
95 'web_music': {
96 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
97 'INNERTUBE_HOST': 'music.youtube.com',
98 'INNERTUBE_CONTEXT': {
99 'client': {
100 'clientName': 'WEB_REMIX',
18c7683d 101 'clientVersion': '1.20211213.00.00',
000c15a4 102 }
103 },
104 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
105 },
e7e94f2a 106 'web_creator': {
18c7683d 107 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
108 'INNERTUBE_CONTEXT': {
109 'client': {
110 'clientName': 'WEB_CREATOR',
18c7683d 111 'clientVersion': '1.20211220.02.00',
e7e94f2a
D
112 }
113 },
114 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
115 },
000c15a4 116 'android': {
18c7683d 117 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 118 'INNERTUBE_CONTEXT': {
119 'client': {
120 'clientName': 'ANDROID',
18c7683d 121 'clientVersion': '16.49',
000c15a4 122 }
123 },
124 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 125 'REQUIRE_JS_PLAYER': False
000c15a4 126 },
127 'android_embedded': {
18c7683d 128 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 129 'INNERTUBE_CONTEXT': {
130 'client': {
131 'clientName': 'ANDROID_EMBEDDED_PLAYER',
18c7683d 132 'clientVersion': '16.49',
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
18c7683d 143 'clientVersion': '4.57',
000c15a4 144 }
145 },
146 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 147 'REQUIRE_JS_PLAYER': False
000c15a4 148 },
e7e94f2a 149 'android_creator': {
18c7683d 150 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
18c7683d 154 'clientVersion': '21.47',
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
18c7683d 167 'clientVersion': '16.46',
168 'deviceModel': 'iPhone14,3',
000c15a4 169 }
170 },
b6de707d 171 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
172 'REQUIRE_JS_PLAYER': False
000c15a4 173 },
174 'ios_embedded': {
000c15a4 175 'INNERTUBE_CONTEXT': {
176 'client': {
177 'clientName': 'IOS_MESSAGES_EXTENSION',
18c7683d 178 'clientVersion': '16.46',
179 'deviceModel': 'iPhone14,3',
000c15a4 180 },
181 },
b6de707d 182 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
183 'REQUIRE_JS_PLAYER': False
000c15a4 184 },
185 'ios_music': {
18c7683d 186 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 187 'INNERTUBE_CONTEXT': {
188 'client': {
189 'clientName': 'IOS_MUSIC',
18c7683d 190 'clientVersion': '4.57',
000c15a4 191 },
192 },
b6de707d 193 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
194 'REQUIRE_JS_PLAYER': False
000c15a4 195 },
e7e94f2a
D
196 'ios_creator': {
197 'INNERTUBE_CONTEXT': {
198 'client': {
199 'clientName': 'IOS_CREATOR',
18c7683d 200 'clientVersion': '21.47',
e7e94f2a
D
201 },
202 },
b6de707d 203 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
204 'REQUIRE_JS_PLAYER': False
e7e94f2a 205 },
3619f78d 206 # mweb has 'ultralow' formats
207 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 208 'mweb': {
18c7683d 209 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 210 'INNERTUBE_CONTEXT': {
211 'client': {
212 'clientName': 'MWEB',
18c7683d 213 'clientVersion': '2.20211221.01.00',
000c15a4 214 }
215 },
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
217 },
218 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
219 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
220 'tv_embedded': {
221 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
222 'INNERTUBE_CONTEXT': {
223 'client': {
224 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
225 'clientVersion': '2.0',
226 },
227 },
228 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
229 },
000c15a4 230}
231
232
e7870111
D
233def _split_innertube_client(client_name):
234 variant, *base = client_name.rsplit('.', 1)
235 if base:
236 return variant, base[0], variant
237 base, *variant = client_name.split('_', 1)
238 return client_name, base, variant[0] if variant else None
239
240
000c15a4 241def build_innertube_clients():
2e4cacd0 242 THIRD_PARTY = {
e7870111 243 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 244 }
e7870111 245 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 246 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 247
248 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 249 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 250 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 251 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 252 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 253
e7870111 254 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 255 ytcfg['priority'] = 10 * priority(base_client)
256
e48b3875 257 if not variant:
e7870111
D
258 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
259 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
260 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
261 embedscreen['priority'] -= 3
262 elif variant == 'embedded':
e48b3875 263 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 264 ytcfg['priority'] -= 2
e48b3875 265 else:
000c15a4 266 ytcfg['priority'] -= 3
267
268
269build_innertube_clients()
270
271
de7f3446 272class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 273 """Provide base functions for Youtube extractors"""
e00eb564 274
3462ffa8 275 _RESERVED_NAMES = (
3cd786db 276 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 277 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 278 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 279 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 280
3619f78d 281 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
282
52efa4b3 283 # _NETRC_MACHINE = 'youtube'
3619f78d 284
b2e8bc1b
JMF
285 # If True it will raise an error if no login info is provided
286 _LOGIN_REQUIRED = False
287
d9190e44
RH
288 _INVIDIOUS_SITES = (
289 # invidious-redirect websites
290 r'(?:www\.)?redirect\.invidious\.io',
291 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 292 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
293 r'(?:www\.)?invidious\.pussthecat\.org',
294 r'(?:www\.)?invidious\.zee\.li',
295 r'(?:www\.)?invidious\.ethibox\.fr',
296 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
297 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
298 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
299 # youtube-dl invidious instances list
300 r'(?:(?:www|no)\.)?invidiou\.sh',
301 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
302 r'(?:www\.)?invidious\.kabi\.tk',
303 r'(?:www\.)?invidious\.mastodon\.host',
304 r'(?:www\.)?invidious\.zapashcanon\.fr',
305 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
306 r'(?:www\.)?invidious\.tinfoil-hat\.net',
307 r'(?:www\.)?invidious\.himiko\.cloud',
308 r'(?:www\.)?invidious\.reallyancient\.tech',
309 r'(?:www\.)?invidious\.tube',
310 r'(?:www\.)?invidiou\.site',
311 r'(?:www\.)?invidious\.site',
312 r'(?:www\.)?invidious\.xyz',
313 r'(?:www\.)?invidious\.nixnet\.xyz',
314 r'(?:www\.)?invidious\.048596\.xyz',
315 r'(?:www\.)?invidious\.drycat\.fr',
316 r'(?:www\.)?inv\.skyn3t\.in',
317 r'(?:www\.)?tube\.poal\.co',
318 r'(?:www\.)?tube\.connect\.cafe',
319 r'(?:www\.)?vid\.wxzm\.sx',
320 r'(?:www\.)?vid\.mint\.lgbt',
321 r'(?:www\.)?vid\.puffyan\.us',
322 r'(?:www\.)?yewtu\.be',
323 r'(?:www\.)?yt\.elukerio\.org',
324 r'(?:www\.)?yt\.lelux\.fi',
325 r'(?:www\.)?invidious\.ggc-project\.de',
326 r'(?:www\.)?yt\.maisputain\.ovh',
327 r'(?:www\.)?ytprivate\.com',
328 r'(?:www\.)?invidious\.13ad\.de',
329 r'(?:www\.)?invidious\.toot\.koeln',
330 r'(?:www\.)?invidious\.fdn\.fr',
331 r'(?:www\.)?watch\.nettohikari\.com',
332 r'(?:www\.)?invidious\.namazso\.eu',
333 r'(?:www\.)?invidious\.silkky\.cloud',
334 r'(?:www\.)?invidious\.exonip\.de',
335 r'(?:www\.)?invidious\.riverside\.rocks',
336 r'(?:www\.)?invidious\.blamefran\.net',
337 r'(?:www\.)?invidious\.moomoo\.de',
338 r'(?:www\.)?ytb\.trom\.tf',
339 r'(?:www\.)?yt\.cyberhost\.uk',
340 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
341 r'(?:www\.)?qklhadlycap4cnod\.onion',
342 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
343 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
344 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
345 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
346 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
347 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
348 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
349 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
350 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
351 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
352 )
353
cce889b9 354 def _initialize_consent(self):
355 cookies = self._get_cookies('https://www.youtube.com/')
356 if cookies.get('__Secure-3PSID'):
357 return
358 consent_id = None
359 consent = cookies.get('CONSENT')
360 if consent:
361 if 'YES' in consent.value:
362 return
363 consent_id = self._search_regex(
364 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
365 if not consent_id:
366 consent_id = random.randint(100, 999)
367 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 368
f3aa3c3f 369 def _initialize_pref(self):
370 cookies = self._get_cookies('https://www.youtube.com/')
371 pref_cookie = cookies.get('PREF')
372 pref = {}
373 if pref_cookie:
374 try:
375 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
376 except ValueError:
377 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 378 pref.update({'hl': 'en', 'tz': 'UTC'})
f3aa3c3f 379 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
380
b2e8bc1b 381 def _real_initialize(self):
f3aa3c3f 382 self._initialize_pref()
cce889b9 383 self._initialize_consent()
a25bca9f 384 self._check_login_required()
385
386 def _check_login_required(self):
52efa4b3 387 if (self._LOGIN_REQUIRED
388 and self.get_param('cookiefile') is None
389 and self.get_param('cookiesfrombrowser') is None):
390 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 391
a0566bbf 392 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 393 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
394 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 395
000c15a4 396 def _get_default_ytcfg(self, client='web'):
397 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 398
000c15a4 399 def _get_innertube_host(self, client='web'):
400 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 401
000c15a4 402 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 403 # try_get but with fallback to default ytcfg client values when present
404 _func = lambda y: try_get(y, getter, expected_type)
405 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
406
000c15a4 407 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 408 return self._ytcfg_get_safe(
409 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
410 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 411
000c15a4 412 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 413 return self._ytcfg_get_safe(
414 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
415 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 416
000c15a4 417 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 418 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
419
000c15a4 420 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 421 context = get_first(
422 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 423 # Enforce language and tz for extraction
424 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
425 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 426 return context
427
cf87314d 428 _SAPISID = None
429
109dd3b2 430 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 431 time_now = round(time.time())
cf87314d 432 if self._SAPISID is None:
433 yt_cookies = self._get_cookies('https://www.youtube.com')
434 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
435 # See: https://github.com/yt-dlp/yt-dlp/issues/393
436 sapisid_cookie = dict_get(
437 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
438 if sapisid_cookie and sapisid_cookie.value:
439 self._SAPISID = sapisid_cookie.value
440 self.write_debug('Extracted SAPISID cookie')
441 # SAPISID cookie is required if not already present
442 if not yt_cookies.get('SAPISID'):
443 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
444 self._set_cookie(
445 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
446 else:
447 self._SAPISID = False
448 if not self._SAPISID:
449 return None
1974e99f 450 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
451 sapisidhash = hashlib.sha1(
86e5f3ed 452 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 453 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
454
455 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 456 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 457 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 458
109dd3b2 459 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 460 data.update(query)
11f9be09 461 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 462 real_headers.update({'content-type': 'application/json'})
463 if headers:
464 real_headers.update(headers)
545cc85d 465 return self._download_json(
86e5f3ed 466 f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
a5c56234 467 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 468 data=json.dumps(data).encode('utf8'), headers=real_headers,
5dbc77df 469 query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
f4f751af 470
ac56cf38 471 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
472 data = self._search_regex(
86e5f3ed 473 (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
ac56cf38 474 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
475 if data:
476 return self._parse_json(data, item_id, fatal=fatal)
0c148415 477
99e9e001 478 @staticmethod
479 def _extract_session_index(*data):
480 """
481 Index of current account in account list.
482 See: https://github.com/yt-dlp/yt-dlp/pull/519
483 """
484 for ytcfg in data:
485 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
486 if session_index is not None:
487 return session_index
488
489 # Deprecated?
490 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
491 if ytcfg:
492 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
493 if token:
494 return token
99e9e001 495 if webpage:
496 return self._search_regex(
497 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
498 'identity token', default=None, fatal=False)
a1c5d2ca
M
499
500 @staticmethod
fe93e2c4 501 def _extract_account_syncid(*args):
8ea3f7b9 502 """
503 Extract syncId required to download private playlists of secondary channels
fe93e2c4 504 @params response and/or ytcfg
8ea3f7b9 505 """
fe93e2c4 506 for data in args:
507 # ytcfg includes channel_syncid if on secondary channel
508 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
509 if delegated_sid:
510 return delegated_sid
511 sync_ids = (try_get(
512 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 513 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 514 if len(sync_ids) >= 2 and sync_ids[1]:
515 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
516 # and just "user_syncid||" for primary channel. We only want the channel_syncid
517 return sync_ids[0]
a1c5d2ca 518
ac56cf38 519 @staticmethod
520 def _extract_visitor_data(*args):
521 """
522 Extracts visitorData from an API response or ytcfg
523 Appears to be used to track session state
524 """
9222c381 525 return get_first(
6c73052c 526 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 527 expected_type=str)
ac56cf38 528
99e9e001 529 @property
530 def is_authenticated(self):
531 return bool(self._generate_sapisidhash_header())
532
11f9be09 533 def extract_ytcfg(self, video_id, webpage):
8c54a305 534 if not webpage:
535 return {}
29f7c58a 536 return self._parse_json(
537 self._search_regex(
538 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 539 default='{}'), video_id, fatal=False) or {}
540
11f9be09 541 def generate_api_headers(
99e9e001 542 self, *, ytcfg=None, account_syncid=None, session_index=None,
543 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
544
11f9be09 545 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 546 headers = {
109dd3b2 547 'X-YouTube-Client-Name': compat_str(
11f9be09 548 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
549 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 550 'Origin': origin,
551 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
552 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 553 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 554 }
555 if session_index is None:
314ee305 556 session_index = self._extract_session_index(ytcfg)
557 if account_syncid or session_index is not None:
558 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 559
109dd3b2 560 auth = self._generate_sapisidhash_header(origin)
f4f751af 561 if auth is not None:
562 headers['Authorization'] = auth
109dd3b2 563 headers['X-Origin'] = origin
99e9e001 564 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 565
a25bca9f 566 def _download_ytcfg(self, client, video_id):
567 url = {
568 'web': 'https://www.youtube.com',
569 'web_music': 'https://music.youtube.com',
570 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
571 }.get(client)
572 if not url:
573 return {}
574 webpage = self._download_webpage(
575 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
576 return self.extract_ytcfg(video_id, webpage) or {}
577
2d6659b9 578 @staticmethod
579 def _build_api_continuation_query(continuation, ctp=None):
580 query = {
581 'continuation': continuation
582 }
583 # TODO: Inconsistency with clickTrackingParams.
584 # Currently we have a fixed ctp contained within context (from ytcfg)
585 # and a ctp in root query for continuation.
586 if ctp:
587 query['clickTracking'] = {'clickTrackingParams': ctp}
588 return query
589
2d6659b9 590 @classmethod
591 def _extract_next_continuation_data(cls, renderer):
592 next_continuation = try_get(
593 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
594 lambda x: x['continuation']['reloadContinuationData']), dict)
595 if not next_continuation:
596 return
597 continuation = next_continuation.get('continuation')
598 if not continuation:
599 return
600 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 601 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 602
603 @classmethod
604 def _extract_continuation_ep_data(cls, continuation_ep: dict):
605 if isinstance(continuation_ep, dict):
606 continuation = try_get(
607 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
608 if not continuation:
609 return
610 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 611 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 612
613 @classmethod
614 def _extract_continuation(cls, renderer):
615 next_continuation = cls._extract_next_continuation_data(renderer)
616 if next_continuation:
617 return next_continuation
fe93e2c4 618
2d6659b9 619 contents = []
620 for key in ('contents', 'items'):
621 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 622
2d6659b9 623 for content in contents:
624 if not isinstance(content, dict):
625 continue
626 continuation_ep = try_get(
627 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
628 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
629 dict)
630 continuation = cls._extract_continuation_ep_data(continuation_ep)
631 if continuation:
632 return continuation
633
fe93e2c4 634 @classmethod
635 def _extract_alerts(cls, data):
109dd3b2 636 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
637 if not isinstance(alert_dict, dict):
638 continue
639 for alert in alert_dict.values():
640 alert_type = alert.get('type')
641 if not alert_type:
642 continue
052e1350 643 message = cls._get_text(alert, 'text')
109dd3b2 644 if message:
645 yield alert_type, message
646
c0ac49bc 647 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 648 errors = []
649 warnings = []
650 for alert_type, alert_message in alerts:
641ad5d8 651 if alert_type.lower() == 'error' and fatal:
109dd3b2 652 errors.append([alert_type, alert_message])
653 else:
654 warnings.append([alert_type, alert_message])
655
656 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 657 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 658 if errors:
659 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
660
661 def _extract_and_report_alerts(self, data, *args, **kwargs):
662 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
663
47193e02 664 def _extract_badges(self, renderer: dict):
665 badges = set()
666 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
667 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
668 if label:
669 badges.add(label.lower())
670 return badges
671
672 @staticmethod
052e1350 673 def _get_text(data, *path_list, max_runs=None):
674 for path in path_list or [None]:
675 if path is None:
676 obj = [data]
677 else:
678 obj = traverse_obj(data, path, default=[])
679 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
680 obj = [obj]
681 for item in obj:
682 text = try_get(item, lambda x: x['simpleText'], compat_str)
683 if text:
684 return text
685 runs = try_get(item, lambda x: x['runs'], list) or []
686 if not runs and isinstance(item, list):
687 runs = item
688
689 runs = runs[:min(len(runs), max_runs or len(runs))]
690 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
691 if text:
692 return text
47193e02 693
f0d785d3 694 def _get_count(self, data, *path_list):
695 count_text = self._get_text(data, *path_list) or ''
696 count = parse_count(count_text)
697 if count is None:
698 count = str_to_int(
699 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
700 return count
701
a709d873 702 @staticmethod
703 def _extract_thumbnails(data, *path_list):
704 """
705 Extract thumbnails from thumbnails dict
706 @param path_list: path list to level that contains 'thumbnails' key
707 """
708 thumbnails = []
709 for path in path_list or [()]:
710 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
711 thumbnail_url = url_or_none(thumbnail.get('url'))
712 if not thumbnail_url:
713 continue
714 # Sometimes youtube gives a wrong thumbnail URL. See:
715 # https://github.com/yt-dlp/yt-dlp/issues/233
716 # https://github.com/ytdl-org/youtube-dl/issues/28023
717 if 'maxresdefault' in thumbnail_url:
718 thumbnail_url = thumbnail_url.split('?')[0]
719 thumbnails.append({
720 'url': thumbnail_url,
721 'height': int_or_none(thumbnail.get('height')),
722 'width': int_or_none(thumbnail.get('width')),
723 })
724 return thumbnails
725
f3aa3c3f 726 @staticmethod
727 def extract_relative_time(relative_time_text):
728 """
729 Extracts a relative time from string and converts to dt object
f0d785d3 730 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 731 """
f0d785d3 732 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 733 if mobj:
f0d785d3 734 start = mobj.group('start')
735 if start:
736 return datetime_from_str(start)
f3aa3c3f 737 try:
f0d785d3 738 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 739 except ValueError:
740 return None
741
742 def _extract_time_text(self, renderer, *path_list):
a25bca9f 743 """@returns (timestamp, time_text)"""
f3aa3c3f 744 text = self._get_text(renderer, *path_list) or ''
745 dt = self.extract_relative_time(text)
746 timestamp = None
747 if isinstance(dt, datetime.datetime):
748 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 749
750 if timestamp is None:
751 timestamp = (
752 unified_timestamp(text) or unified_timestamp(
753 self._search_regex(
17322130 754 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 755 text.lower(), 'time text', default=None)))
f0d785d3 756
f3aa3c3f 757 if text and timestamp is None:
17322130 758 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 759 return timestamp, text
760
109dd3b2 761 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
762 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 763 default_client='web'):
109dd3b2 764 response = None
765 last_error = None
766 count = -1
767 retries = self.get_param('extractor_retries', 3)
768 if check_get_keys is None:
769 check_get_keys = []
770 while count < retries:
771 count += 1
772 if last_error:
c0ac49bc 773 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 774 try:
775 response = self._call_api(
776 ep=ep, fatal=True, headers=headers,
777 video_id=item_id, query=query,
778 context=self._extract_context(ytcfg, default_client),
779 api_key=self._extract_api_key(ytcfg, default_client),
780 api_hostname=api_hostname, default_client=default_client,
781 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
782 except ExtractorError as e:
9c0d7f49 783 if isinstance(e.cause, network_exceptions):
87e8e8a7 784 if isinstance(e.cause, compat_HTTPError):
785 first_bytes = e.cause.read(512)
786 if not is_html(first_bytes):
787 yt_error = try_get(
788 self._parse_json(
789 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
790 lambda x: x['error']['message'], compat_str)
791 if yt_error:
792 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 793 # Downloading page may result in intermittent 5xx HTTP error
794 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 795 # We also want to catch all other network exceptions since errors in later pages can be troublesome
796 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
797 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 798 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 799 if count < retries:
800 continue
109dd3b2 801 if fatal:
802 raise
803 else:
804 self.report_warning(error_to_compat_str(e))
805 return
806
807 else:
109dd3b2 808 try:
ac56cf38 809 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 810 except ExtractorError as e:
c0ac49bc 811 # YouTube servers may return errors we want to retry on in a 200 OK response
812 # See: https://github.com/yt-dlp/yt-dlp/issues/839
813 if 'unknown error' in e.msg.lower():
814 last_error = e.msg
815 continue
109dd3b2 816 if fatal:
817 raise
818 self.report_warning(error_to_compat_str(e))
819 return
820 if not check_get_keys or dict_get(response, check_get_keys):
821 break
822 # Youtube sometimes sends incomplete data
823 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
824 last_error = 'Incomplete data received'
825 if count >= retries:
826 if fatal:
827 raise ExtractorError(last_error)
828 else:
829 self.report_warning(last_error)
830 return
831 return response
832
9297939e 833 @staticmethod
834 def is_music_url(url):
835 return re.match(r'https?://music\.youtube\.com/', url) is not None
836
30a074c2 837 def _extract_video(self, renderer):
838 video_id = renderer.get('videoId')
052e1350 839 title = self._get_text(renderer, 'title')
840 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 841 duration = parse_duration(self._get_text(
842 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 843 if duration is None:
844 duration = parse_duration(self._search_regex(
845 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
846 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
847 video_id, default=None, group='duration'))
848
f0d785d3 849 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 850
052e1350 851 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 852 channel_id = traverse_obj(
a44ca5a4 853 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
854 expected_type=str, get_all=False)
f3aa3c3f 855 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
856 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
857 overlay_style = traverse_obj(
a44ca5a4 858 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
859 get_all=False, expected_type=str)
f3aa3c3f 860 badges = self._extract_badges(renderer)
a709d873 861 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 862 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 863 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
864 expected_type=str)) or ''
fd2ad7cb 865 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 866 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 867 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 868
30a074c2 869 return {
39ed931e 870 '_type': 'url',
30a074c2 871 'ie_key': YoutubeIE.ie_key(),
872 'id': video_id,
fd2ad7cb 873 'url': url,
30a074c2 874 'title': title,
875 'description': description,
876 'duration': duration,
877 'view_count': view_count,
878 'uploader': uploader,
f3aa3c3f 879 'channel_id': channel_id,
a709d873 880 'thumbnails': thumbnails,
a44ca5a4 881 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
882 if self._configuration_arg('approximate_date', ie_key='youtubetab')
883 else None),
f3aa3c3f 884 'live_status': ('is_upcoming' if scheduled_timestamp is not None
885 else 'was_live' if 'streamed' in time_text.lower()
886 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
887 else None),
888 'release_timestamp': scheduled_timestamp,
889 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 890 }
891
0c148415 892
360e1ca5 893class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 894 IE_DESC = 'YouTube'
cb7dfeea 895 _VALID_URL = r"""(?x)^
c5e8d7af 896 (
edb53e2d 897 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 898 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
899 (?:www\.)?deturl\.com/www\.youtube\.com|
900 (?:www\.)?pwnyoutube\.com|
901 (?:www\.)?hooktube\.com|
902 (?:www\.)?yourepeat\.com|
903 tube\.majestyc\.net|
904 %(invidious)s|
905 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
906 (?:.*?\#/)? # handle anchor (#/) redirect urls
907 (?: # the various things that can precede the ID:
b6ce9bb0 908 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 909 |(?: # or the v= param in all its forms
f7000f3a 910 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 911 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 912 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
913 v=
914 )
f4b05232 915 ))
cbaed4bb
S
916 |(?:
917 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
918 vid\.plus| # or vid.plus/xxxx
919 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 920 %(invidious)s
cbaed4bb 921 )/
edb53e2d 922 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 923 )
c5e8d7af 924 )? # all until now is optional -> you can pass the naked ID
201c1459 925 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 926 (?(1).+)? # if we found the ID, everything can follow
9297939e 927 (?:\#|$)""" % {
d9190e44 928 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 929 }
e40c758c 930 _PLAYER_INFO_RE = (
cc2db878 931 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
932 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 933 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 934 )
2c62dc26 935 _formats = {
c2d3cb4c 936 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
937 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
938 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
939 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
940 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
941 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
942 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
943 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 944 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 945 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
946 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
947 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
948 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
949 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
950 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 951 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 952 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
953 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 954
955
956 # 3D videos
c2d3cb4c 957 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
958 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
959 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
960 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 961 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
962 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
963 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 964
96fb5605 965 # Apple HTTP Live Streaming
11f12195 966 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 967 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
968 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
969 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
970 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
971 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 972 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
973 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
974
975 # DASH mp4 video
d23028a8
S
976 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 981 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
982 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
984 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
985 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
986 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
987 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 988
f6f1fc92 989 # Dash mp4 audio
d23028a8
S
990 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
991 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
992 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
993 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
994 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
995 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
996 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
997
998 # Dash webm
d23028a8
S
999 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1006 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1014 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1015 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1018 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1021
1022 # Dash webm audio
d23028a8
S
1023 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1024 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1025
0857baad 1026 # Dash webm audio with opus inside
d23028a8
S
1027 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1028 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1029 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1030
ce6b9a2d
PH
1031 # RTMP (unnamed)
1032 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1033
1034 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1035 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1036 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1037 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1038 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1039 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1040 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1041 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1042 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1043 }
29f7c58a 1044 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1045
fd5c4aab
S
1046 _GEO_BYPASS = False
1047
78caa52a 1048 IE_NAME = 'youtube'
2eb88d95
PH
1049 _TESTS = [
1050 {
2d3d2997 1051 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1052 'info_dict': {
1053 'id': 'BaW_jenozKc',
1054 'ext': 'mp4',
3867038a 1055 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1056 'uploader': 'Philipp Hagemeister',
1057 'uploader_id': 'phihag',
ec85ded8 1058 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1059 'channel': 'Philipp Hagemeister',
dd4c4492
S
1060 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1061 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1062 'upload_date': '20121002',
ff9f925b 1063 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1064 'categories': ['Science & Technology'],
3867038a 1065 'tags': ['youtube-dl'],
556dbe7f 1066 'duration': 10,
dbdaaa23 1067 'view_count': int,
3e7c1224 1068 'like_count': int,
ff9f925b 1069 'availability': 'public',
1070 'playable_in_embed': True,
1071 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1072 'live_status': 'not_live',
1073 'age_limit': 0,
7c80519c 1074 'start_time': 1,
297a564b 1075 'end_time': 9,
6c73052c 1076 'channel_follower_count': int
2eb88d95 1077 }
0e853ca4 1078 },
fccd3771 1079 {
4bc3a23e
PH
1080 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1081 'note': 'Embed-only video (#1746)',
1082 'info_dict': {
1083 'id': 'yZIXLfi8CZQ',
1084 'ext': 'mp4',
1085 'upload_date': '20120608',
1086 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1087 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1088 'uploader': 'SET India',
94bfcd23 1089 'uploader_id': 'setindia',
ec85ded8 1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1091 'age_limit': 18,
545cc85d 1092 },
1093 'skip': 'Private video',
fccd3771 1094 },
11b56058 1095 {
8bdd16b4 1096 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1097 'note': 'Use the first video ID in the URL',
1098 'info_dict': {
1099 'id': 'BaW_jenozKc',
1100 'ext': 'mp4',
3867038a 1101 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1102 'uploader': 'Philipp Hagemeister',
1103 'uploader_id': 'phihag',
ec85ded8 1104 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1105 'channel': 'Philipp Hagemeister',
1106 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1107 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1108 'upload_date': '20121002',
976ae3ea 1109 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1110 'categories': ['Science & Technology'],
3867038a 1111 'tags': ['youtube-dl'],
556dbe7f 1112 'duration': 10,
dbdaaa23 1113 'view_count': int,
11b56058 1114 'like_count': int,
976ae3ea 1115 'availability': 'public',
1116 'playable_in_embed': True,
1117 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1118 'live_status': 'not_live',
1119 'age_limit': 0,
6c73052c 1120 'channel_follower_count': int
34a7de29
S
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
11b56058 1125 },
dd27fd17 1126 {
2d3d2997 1127 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1128 'note': '256k DASH audio (format 141) via DASH manifest',
1129 'info_dict': {
1130 'id': 'a9LDPn-MO4I',
1131 'ext': 'm4a',
1132 'upload_date': '20121002',
1133 'uploader_id': '8KVIDEO',
ec85ded8 1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1135 'description': '',
1136 'uploader': '8KVIDEO',
1137 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1138 },
4bc3a23e
PH
1139 'params': {
1140 'youtube_include_dash_manifest': True,
1141 'format': '141',
4919603f 1142 },
de3c7fe0 1143 'skip': 'format 141 not served anymore',
dd27fd17 1144 },
8bdd16b4 1145 # DASH manifest with encrypted signature
1146 {
1147 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1148 'info_dict': {
1149 'id': 'IB3lcPjvWLA',
1150 'ext': 'm4a',
1151 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1152 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1153 'duration': 244,
1154 'uploader': 'AfrojackVEVO',
1155 'uploader_id': 'AfrojackVEVO',
1156 'upload_date': '20131011',
cc2db878 1157 'abr': 129.495,
976ae3ea 1158 'like_count': int,
1159 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1160 'playable_in_embed': True,
1161 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1162 'view_count': int,
1163 'track': 'The Spark',
1164 'live_status': 'not_live',
1165 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1166 'channel': 'Afrojack',
1167 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1168 'tags': 'count:19',
1169 'availability': 'public',
1170 'categories': ['Music'],
1171 'age_limit': 0,
1172 'alt_title': 'The Spark',
6c73052c 1173 'channel_follower_count': int
8bdd16b4 1174 },
1175 'params': {
1176 'youtube_include_dash_manifest': True,
1177 'format': '141/bestaudio[ext=m4a]',
1178 },
1179 },
65c2fde2 1180 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1181 {
65c2fde2 1182 'note': 'Embed allowed age-gate video',
2d3d2997 1183 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1184 'info_dict': {
1185 'id': 'HtVdAasjOgU',
1186 'ext': 'mp4',
1187 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1188 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1189 'duration': 142,
c522adb1
JMF
1190 'uploader': 'The Witcher',
1191 'uploader_id': 'WitcherGame',
ec85ded8 1192 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1193 'upload_date': '20140605',
34952f09 1194 'age_limit': 18,
976ae3ea 1195 'categories': ['Gaming'],
1196 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1197 'availability': 'needs_auth',
1198 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1199 'like_count': int,
1200 'channel': 'The Witcher',
1201 'live_status': 'not_live',
1202 'tags': 'count:17',
1203 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1204 'playable_in_embed': True,
1205 'view_count': int,
6c73052c 1206 'channel_follower_count': int
c522adb1
JMF
1207 },
1208 },
65c2fde2 1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
976ae3ea 1221 'availability': 'needs_auth',
1222 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1223 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1224 'channel': 'FlyingKitty',
1225 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1226 'view_count': int,
1227 'categories': ['Entertainment'],
1228 'live_status': 'not_live',
1229 'tags': ['Flyingkitty', 'godzilla 2'],
1230 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1231 'like_count': int,
1232 'duration': 177,
1233 'playable_in_embed': True,
6c73052c 1234 'channel_follower_count': int
65c2fde2 1235 },
1236 },
1237 {
1238 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1239 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1240 'info_dict': {
1241 'id': 'Tq92D6wQ1mg',
1242 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1243 'ext': 'mp4',
17322130 1244 'upload_date': '20191228',
65c2fde2 1245 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1246 'uploader': 'Projekt Melody',
1247 'description': 'md5:17eccca93a786d51bc67646756894066',
1248 'age_limit': 18,
976ae3ea 1249 'like_count': int,
1250 'availability': 'needs_auth',
1251 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1252 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'view_count': int,
1254 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1255 'channel': 'Projekt Melody',
1256 'live_status': 'not_live',
1257 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1258 'playable_in_embed': True,
1259 'categories': ['Entertainment'],
1260 'duration': 106,
1261 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
6c73052c 1262 'channel_follower_count': int
65c2fde2 1263 },
1264 },
1265 {
1266 'note': 'Non-Agegated non-embeddable video',
1267 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1268 'info_dict': {
1269 'id': 'MeJVWBSsPAY',
1270 'ext': 'mp4',
1271 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1272 'uploader': 'Herr Lurik',
1273 'uploader_id': 'st3in234',
1274 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1275 'upload_date': '20130730',
976ae3ea 1276 'track': 'Such mich find mich',
1277 'age_limit': 0,
1278 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1279 'like_count': int,
1280 'playable_in_embed': False,
1281 'creator': 'OOMPH!',
1282 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1283 'view_count': int,
1284 'alt_title': 'Such mich find mich',
1285 'duration': 210,
1286 'channel': 'Herr Lurik',
1287 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1288 'categories': ['Music'],
1289 'availability': 'public',
1290 'uploader_url': 'http://www.youtube.com/user/st3in234',
1291 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1292 'live_status': 'not_live',
1293 'artist': 'OOMPH!',
6c73052c 1294 'channel_follower_count': int
65c2fde2 1295 },
1296 },
1297 {
1298 'note': 'Non-bypassable age-gated video',
1299 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1300 'only_matching': True,
1301 },
8bdd16b4 1302 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1303 # YouTube Red ad is not captured for creator
1304 {
1305 'url': '__2ABJjxzNo',
1306 'info_dict': {
1307 'id': '__2ABJjxzNo',
1308 'ext': 'mp4',
1309 'duration': 266,
1310 'upload_date': '20100430',
1311 'uploader_id': 'deadmau5',
1312 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1313 'creator': 'deadmau5',
1314 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1315 'uploader': 'deadmau5',
1316 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1317 'alt_title': 'Some Chords',
976ae3ea 1318 'availability': 'public',
1319 'tags': 'count:14',
1320 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1321 'view_count': int,
1322 'live_status': 'not_live',
1323 'channel': 'deadmau5',
1324 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1325 'like_count': int,
1326 'track': 'Some Chords',
1327 'artist': 'deadmau5',
1328 'playable_in_embed': True,
1329 'age_limit': 0,
1330 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1331 'categories': ['Music'],
1332 'album': 'Some Chords',
6c73052c 1333 'channel_follower_count': int
8bdd16b4 1334 },
1335 'expected_warnings': [
1336 'DASH manifest missing',
1337 ]
1338 },
067aa17e 1339 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1340 {
1341 'url': 'lqQg6PlCWgI',
1342 'info_dict': {
1343 'id': 'lqQg6PlCWgI',
1344 'ext': 'mp4',
556dbe7f 1345 'duration': 6085,
90227264 1346 'upload_date': '20150827',
cbe2bd91 1347 'uploader_id': 'olympic',
ec85ded8 1348 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1349 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1350 'uploader': 'Olympics',
cbe2bd91 1351 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1352 'like_count': int,
1353 'release_timestamp': 1343767800,
1354 'playable_in_embed': True,
1355 'categories': ['Sports'],
1356 'release_date': '20120731',
1357 'channel': 'Olympics',
1358 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1359 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1360 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1361 'age_limit': 0,
1362 'availability': 'public',
1363 'live_status': 'was_live',
1364 'view_count': int,
1365 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1366 'channel_follower_count': int
cbe2bd91
PH
1367 },
1368 'params': {
1369 'skip_download': 'requires avconv',
e52a40ab 1370 }
cbe2bd91 1371 },
6271f1ca
PH
1372 # Non-square pixels
1373 {
1374 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1375 'info_dict': {
1376 'id': '_b-2C3KPAM0',
1377 'ext': 'mp4',
1378 'stretched_ratio': 16 / 9.,
556dbe7f 1379 'duration': 85,
6271f1ca
PH
1380 'upload_date': '20110310',
1381 'uploader_id': 'AllenMeow',
ec85ded8 1382 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1383 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1384 'uploader': '孫ᄋᄅ',
6271f1ca 1385 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1386 'playable_in_embed': True,
1387 'channel': '孫ᄋᄅ',
1388 'age_limit': 0,
1389 'tags': 'count:11',
1390 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1391 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1392 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1393 'view_count': int,
1394 'categories': ['People & Blogs'],
1395 'like_count': int,
1396 'live_status': 'not_live',
1397 'availability': 'unlisted',
6c73052c 1398 'channel_follower_count': int
6271f1ca 1399 },
06b491eb
S
1400 },
1401 # url_encoded_fmt_stream_map is empty string
1402 {
1403 'url': 'qEJwOuvDf7I',
1404 'info_dict': {
1405 'id': 'qEJwOuvDf7I',
f57b7835 1406 'ext': 'webm',
06b491eb
S
1407 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1408 'description': '',
1409 'upload_date': '20150404',
1410 'uploader_id': 'spbelect',
1411 'uploader': 'Наблюдатели Петербурга',
1412 },
1413 'params': {
1414 'skip_download': 'requires avconv',
e323cf3f
S
1415 },
1416 'skip': 'This live event has ended.',
06b491eb 1417 },
067aa17e 1418 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1419 {
1420 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1421 'info_dict': {
1422 'id': 'FIl7x6_3R5Y',
eb6793ba 1423 'ext': 'webm',
da77d856
S
1424 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1425 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1426 'duration': 220,
da77d856
S
1427 'upload_date': '20150625',
1428 'uploader_id': 'dorappi2000',
ec85ded8 1429 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1430 'uploader': 'dorappi2000',
eb6793ba 1431 'formats': 'mincount:31',
da77d856 1432 },
eb6793ba 1433 'skip': 'not actual anymore',
2ee8f5d8 1434 },
8a1a26ce
YCH
1435 # DASH manifest with segment_list
1436 {
1437 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1438 'md5': '8ce563a1d667b599d21064e982ab9e31',
1439 'info_dict': {
1440 'id': 'CsmdDsKjzN8',
1441 'ext': 'mp4',
17ee98e1 1442 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1443 'uploader': 'Airtek',
1444 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1445 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1446 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1447 },
1448 'params': {
1449 'youtube_include_dash_manifest': True,
1450 'format': '135', # bestvideo
be49068d
S
1451 },
1452 'skip': 'This live event has ended.',
2ee8f5d8 1453 },
cf7e015f
S
1454 {
1455 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1456 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1457 'info_dict': {
545cc85d 1458 'id': 'jvGDaLqkpTg',
1459 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1460 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1461 },
1462 'playlist': [{
1463 'info_dict': {
545cc85d 1464 'id': 'jvGDaLqkpTg',
cf7e015f 1465 'ext': 'mp4',
545cc85d 1466 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1467 'description': 'md5:e03b909557865076822aa169218d6a5d',
1468 'duration': 10643,
1469 'upload_date': '20161111',
1470 'uploader': 'Team PGP',
1471 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1472 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1473 },
1474 }, {
1475 'info_dict': {
545cc85d 1476 'id': '3AKt1R1aDnw',
cf7e015f 1477 'ext': 'mp4',
545cc85d 1478 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1479 'description': 'md5:e03b909557865076822aa169218d6a5d',
1480 'duration': 10991,
1481 'upload_date': '20161111',
1482 'uploader': 'Team PGP',
1483 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1485 },
1486 }, {
1487 'info_dict': {
545cc85d 1488 'id': 'RtAMM00gpVc',
cf7e015f 1489 'ext': 'mp4',
545cc85d 1490 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1491 'description': 'md5:e03b909557865076822aa169218d6a5d',
1492 'duration': 10995,
1493 'upload_date': '20161111',
1494 'uploader': 'Team PGP',
1495 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1496 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1497 },
1498 }, {
1499 'info_dict': {
545cc85d 1500 'id': '6N2fdlP3C5U',
cf7e015f 1501 'ext': 'mp4',
545cc85d 1502 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1503 'description': 'md5:e03b909557865076822aa169218d6a5d',
1504 'duration': 10990,
1505 'upload_date': '20161111',
1506 'uploader': 'Team PGP',
1507 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1508 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1509 },
1510 }],
1511 'params': {
1512 'skip_download': True,
1513 },
65c2fde2 1514 'skip': 'Not multifeed anymore',
cbaed4bb 1515 },
f9f49d87 1516 {
067aa17e 1517 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1518 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1519 'info_dict': {
1520 'id': 'gVfLd0zydlo',
1521 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1522 },
1523 'playlist_count': 2,
be49068d 1524 'skip': 'Not multifeed anymore',
f9f49d87 1525 },
cbaed4bb 1526 {
2d3d2997 1527 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1528 'only_matching': True,
0e49d9a6 1529 },
6d4fc66b 1530 {
2d3d2997 1531 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1532 'only_matching': True,
1533 },
0e49d9a6 1534 {
067aa17e 1535 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1536 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1537 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1538 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1539 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1540 'info_dict': {
1541 'id': 'lsguqyKfVQg',
1542 'ext': 'mp4',
1543 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1544 'alt_title': 'Dark Walk',
0e49d9a6 1545 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1546 'duration': 133,
0e49d9a6
LL
1547 'upload_date': '20151119',
1548 'uploader_id': 'IronSoulElf',
ec85ded8 1549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1550 'uploader': 'IronSoulElf',
11f9be09 1551 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1552 'track': 'Dark Walk',
1553 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1554 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1555 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1556 'categories': ['Film & Animation'],
1557 'view_count': int,
1558 'live_status': 'not_live',
1559 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1560 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1561 'tags': 'count:13',
1562 'availability': 'public',
1563 'channel': 'IronSoulElf',
1564 'playable_in_embed': True,
1565 'like_count': int,
1566 'age_limit': 0,
6c73052c 1567 'channel_follower_count': int
0e49d9a6
LL
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 },
61f92af1 1573 {
067aa17e 1574 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1575 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1576 'only_matching': True,
1577 },
313dfc45
LL
1578 {
1579 # Video with yt:stretch=17:0
1580 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1581 'info_dict': {
1582 'id': 'Q39EVAstoRM',
1583 'ext': 'mp4',
1584 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1585 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1586 'upload_date': '20151107',
1587 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1588 'uploader': 'CH GAMER DROID',
1589 },
1590 'params': {
1591 'skip_download': True,
1592 },
be49068d 1593 'skip': 'This video does not exist.',
313dfc45 1594 },
201c1459 1595 {
1596 # Video with incomplete 'yt:stretch=16:'
1597 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1598 'only_matching': True,
1599 },
7caf9830
S
1600 {
1601 # Video licensed under Creative Commons
1602 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1603 'info_dict': {
1604 'id': 'M4gD1WSo5mA',
1605 'ext': 'mp4',
1606 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1607 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1608 'duration': 721,
17322130 1609 'upload_date': '20150128',
7caf9830 1610 'uploader_id': 'BerkmanCenter',
ec85ded8 1611 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1612 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1613 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1614 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1615 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1616 'like_count': int,
1617 'age_limit': 0,
1618 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1619 'channel': 'The Berkman Klein Center for Internet & Society',
1620 'availability': 'public',
1621 'view_count': int,
1622 'categories': ['Education'],
1623 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1624 'live_status': 'not_live',
1625 'playable_in_embed': True,
6c73052c 1626 'channel_follower_count': int
7caf9830
S
1627 },
1628 'params': {
1629 'skip_download': True,
1630 },
1631 },
fd050249
S
1632 {
1633 # Channel-like uploader_url
1634 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1635 'info_dict': {
1636 'id': 'eQcmzGIKrzg',
1637 'ext': 'mp4',
1638 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1639 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1640 'duration': 4060,
17322130 1641 'upload_date': '20151120',
eb6793ba 1642 'uploader': 'Bernie Sanders',
fd050249 1643 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1644 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1645 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1646 'playable_in_embed': True,
1647 'tags': 'count:12',
1648 'like_count': int,
1649 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1650 'age_limit': 0,
1651 'availability': 'public',
1652 'categories': ['News & Politics'],
1653 'channel': 'Bernie Sanders',
1654 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1655 'view_count': int,
1656 'live_status': 'not_live',
1657 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
6c73052c 1658 'channel_follower_count': int
fd050249
S
1659 },
1660 'params': {
1661 'skip_download': True,
1662 },
1663 },
040ac686
S
1664 {
1665 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1666 'only_matching': True,
7f29cf54
S
1667 },
1668 {
067aa17e 1669 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1670 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1671 'only_matching': True,
6496ccb4
S
1672 },
1673 {
1674 # Rental video preview
1675 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1676 'info_dict': {
1677 'id': 'uGpuVWrhIzE',
1678 'ext': 'mp4',
1679 'title': 'Piku - Trailer',
1680 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1681 'upload_date': '20150811',
1682 'uploader': 'FlixMatrix',
1683 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1684 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1685 'license': 'Standard YouTube License',
1686 },
1687 'params': {
1688 'skip_download': True,
1689 },
eb6793ba 1690 'skip': 'This video is not available.',
022a5d66 1691 },
12afdc2a
S
1692 {
1693 # YouTube Red video with episode data
1694 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1695 'info_dict': {
1696 'id': 'iqKdEhx-dD4',
1697 'ext': 'mp4',
1698 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1699 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1700 'duration': 2085,
12afdc2a
S
1701 'upload_date': '20170118',
1702 'uploader': 'Vsauce',
1703 'uploader_id': 'Vsauce',
1704 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1705 'series': 'Mind Field',
1706 'season_number': 1,
1707 'episode_number': 1,
976ae3ea 1708 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1709 'tags': 'count:12',
1710 'view_count': int,
1711 'availability': 'public',
1712 'age_limit': 0,
1713 'channel': 'Vsauce',
1714 'episode': 'Episode 1',
1715 'categories': ['Entertainment'],
1716 'season': 'Season 1',
1717 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1718 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1719 'like_count': int,
1720 'playable_in_embed': True,
1721 'live_status': 'not_live',
6c73052c 1722 'channel_follower_count': int
12afdc2a
S
1723 },
1724 'params': {
1725 'skip_download': True,
1726 },
1727 'expected_warnings': [
1728 'Skipping DASH manifest',
1729 ],
1730 },
c7121fa7
S
1731 {
1732 # The following content has been identified by the YouTube community
1733 # as inappropriate or offensive to some audiences.
1734 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1735 'info_dict': {
1736 'id': '6SJNVb0GnPI',
1737 'ext': 'mp4',
1738 'title': 'Race Differences in Intelligence',
1739 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1740 'duration': 965,
1741 'upload_date': '20140124',
1742 'uploader': 'New Century Foundation',
1743 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1744 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1745 },
1746 'params': {
1747 'skip_download': True,
1748 },
545cc85d 1749 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1750 },
022a5d66
S
1751 {
1752 # itag 212
1753 'url': '1t24XAntNCY',
1754 'only_matching': True,
fd5c4aab
S
1755 },
1756 {
1757 # geo restricted to JP
1758 'url': 'sJL6WA-aGkQ',
1759 'only_matching': True,
1760 },
cd5a74a2
S
1761 {
1762 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1763 'only_matching': True,
1764 },
bc2ca1bb 1765 {
1766 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1767 'only_matching': True,
1768 },
1769 {
1770 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1771 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1772 'only_matching': True,
1773 },
825cd268
RA
1774 {
1775 # DRM protected
1776 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1777 'only_matching': True,
4fe54c12
S
1778 },
1779 {
1780 # Video with unsupported adaptive stream type formats
1781 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1782 'info_dict': {
1783 'id': 'Z4Vy8R84T1U',
1784 'ext': 'mp4',
1785 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1786 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1787 'duration': 433,
1788 'upload_date': '20130923',
1789 'uploader': 'Amelia Putri Harwita',
1790 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1791 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1792 'formats': 'maxcount:10',
1793 },
1794 'params': {
1795 'skip_download': True,
1796 'youtube_include_dash_manifest': False,
1797 },
5429d6a9 1798 'skip': 'not actual anymore',
5caabd3c 1799 },
1800 {
822b9d9c 1801 # Youtube Music Auto-generated description
5caabd3c 1802 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1803 'info_dict': {
1804 'id': 'MgNrAu2pzNs',
1805 'ext': 'mp4',
1806 'title': 'Voyeur Girl',
1807 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1808 'upload_date': '20190312',
5429d6a9
S
1809 'uploader': 'Stephen - Topic',
1810 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1811 'artist': 'Stephen',
1812 'track': 'Voyeur Girl',
1813 'album': 'it\'s too much love to know my dear',
1814 'release_date': '20190313',
1815 'release_year': 2019,
976ae3ea 1816 'alt_title': 'Voyeur Girl',
1817 'view_count': int,
1818 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1819 'playable_in_embed': True,
1820 'like_count': int,
1821 'categories': ['Music'],
1822 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1823 'channel': 'Stephen',
1824 'availability': 'public',
1825 'creator': 'Stephen',
1826 'duration': 169,
1827 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1828 'age_limit': 0,
1829 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1830 'tags': 'count:11',
1831 'live_status': 'not_live',
6c73052c 1832 'channel_follower_count': int
5caabd3c 1833 },
1834 'params': {
1835 'skip_download': True,
1836 },
1837 },
66b48727
RA
1838 {
1839 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1840 'only_matching': True,
1841 },
011e75e6
S
1842 {
1843 # invalid -> valid video id redirection
1844 'url': 'DJztXj2GPfl',
1845 'info_dict': {
1846 'id': 'DJztXj2GPfk',
1847 'ext': 'mp4',
1848 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1849 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1850 'upload_date': '20090125',
1851 'uploader': 'Prochorowka',
1852 'uploader_id': 'Prochorowka',
1853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1854 'artist': 'Panjabi MC',
1855 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1856 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1857 },
1858 'params': {
1859 'skip_download': True,
1860 },
545cc85d 1861 'skip': 'Video unavailable',
ea74e00b
DP
1862 },
1863 {
1864 # empty description results in an empty string
1865 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1866 'info_dict': {
1867 'id': 'x41yOUIvK2k',
1868 'ext': 'mp4',
1869 'title': 'IMG 3456',
1870 'description': '',
1871 'upload_date': '20170613',
1872 'uploader_id': 'ElevageOrVert',
1873 'uploader': 'ElevageOrVert',
976ae3ea 1874 'view_count': int,
1875 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1876 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1877 'like_count': int,
1878 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1879 'tags': [],
1880 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1881 'availability': 'public',
1882 'age_limit': 0,
1883 'categories': ['Pets & Animals'],
1884 'duration': 7,
1885 'playable_in_embed': True,
1886 'live_status': 'not_live',
1887 'channel': 'ElevageOrVert',
6c73052c 1888 'channel_follower_count': int
ea74e00b
DP
1889 },
1890 'params': {
1891 'skip_download': True,
1892 },
1893 },
a0566bbf 1894 {
29f7c58a 1895 # with '};' inside yt initial data (see [1])
1896 # see [2] for an example with '};' inside ytInitialPlayerResponse
1897 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1898 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1899 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1900 'info_dict': {
1901 'id': 'CHqg6qOn4no',
1902 'ext': 'mp4',
1903 'title': 'Part 77 Sort a list of simple types in c#',
1904 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1905 'upload_date': '20130831',
1906 'uploader_id': 'kudvenkat',
1907 'uploader': 'kudvenkat',
976ae3ea 1908 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1909 'like_count': int,
1910 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1911 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1912 'live_status': 'not_live',
1913 'categories': ['Education'],
1914 'availability': 'public',
1915 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1916 'tags': 'count:12',
1917 'playable_in_embed': True,
1918 'age_limit': 0,
1919 'view_count': int,
1920 'duration': 522,
1921 'channel': 'kudvenkat',
6c73052c 1922 'channel_follower_count': int
a0566bbf 1923 },
1924 'params': {
1925 'skip_download': True,
1926 },
1927 },
29f7c58a 1928 {
1929 # another example of '};' in ytInitialData
1930 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1931 'only_matching': True,
1932 },
1933 {
1934 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1935 'only_matching': True,
1936 },
545cc85d 1937 {
cc2db878 1938 # https://github.com/ytdl-org/youtube-dl/pull/28094
1939 'url': 'OtqTfy26tG0',
1940 'info_dict': {
1941 'id': 'OtqTfy26tG0',
1942 'ext': 'mp4',
1943 'title': 'Burn Out',
1944 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1945 'upload_date': '20141120',
1946 'uploader': 'The Cinematic Orchestra - Topic',
1947 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1948 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1949 'artist': 'The Cinematic Orchestra',
1950 'track': 'Burn Out',
1951 'album': 'Every Day',
976ae3ea 1952 'like_count': int,
1953 'live_status': 'not_live',
1954 'alt_title': 'Burn Out',
1955 'duration': 614,
1956 'age_limit': 0,
1957 'view_count': int,
1958 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1959 'creator': 'The Cinematic Orchestra',
1960 'channel': 'The Cinematic Orchestra',
1961 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1962 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1963 'availability': 'public',
1964 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1965 'categories': ['Music'],
1966 'playable_in_embed': True,
6c73052c 1967 'channel_follower_count': int
cc2db878 1968 },
1969 'params': {
1970 'skip_download': True,
1971 },
545cc85d 1972 },
bc2ca1bb 1973 {
1974 # controversial video, only works with bpctr when authenticated with cookies
1975 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1976 'only_matching': True,
1977 },
a1a7907b 1978 {
1979 # controversial video, requires bpctr/contentCheckOk
1980 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1981 'info_dict': {
1982 'id': 'SZJvDhaSDnc',
1983 'ext': 'mp4',
1984 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1985 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1986 'uploader': 'CBS Mornings',
11f9be09 1987 'uploader_id': 'CBSThisMorning',
a1a7907b 1988 'upload_date': '20140716',
976ae3ea 1989 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1990 'duration': 170,
1991 'categories': ['News & Politics'],
1992 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1993 'view_count': int,
1994 'channel': 'CBS Mornings',
1995 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1996 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
1997 'age_limit': 18,
1998 'availability': 'needs_auth',
1999 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2000 'like_count': int,
2001 'live_status': 'not_live',
2002 'playable_in_embed': True,
6c73052c 2003 'channel_follower_count': int
a1a7907b 2004 }
2005 },
f7ad7160 2006 {
2007 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2008 'url': 'cBvYw8_A0vQ',
2009 'info_dict': {
2010 'id': 'cBvYw8_A0vQ',
2011 'ext': 'mp4',
2012 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2013 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2014 'upload_date': '20201120',
2015 'uploader': 'Walk around Japan',
2016 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2018 'duration': 1456,
2019 'categories': ['Travel & Events'],
2020 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2021 'view_count': int,
2022 'channel': 'Walk around Japan',
2023 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2024 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2025 'age_limit': 0,
2026 'availability': 'public',
2027 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2028 'live_status': 'not_live',
2029 'playable_in_embed': True,
6c73052c 2030 'channel_follower_count': int
f7ad7160 2031 },
2032 'params': {
2033 'skip_download': True,
2034 },
0fb983f6 2035 }, {
2036 # Has multiple audio streams
2037 'url': 'WaOKSUlf4TM',
2038 'only_matching': True
9297939e 2039 }, {
2040 # Requires Premium: has format 141 when requested using YTM url
2041 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2042 'only_matching': True
2043 }, {
120916da 2044 # multiple subtitles with same lang_code
2045 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2046 'only_matching': True,
109dd3b2 2047 }, {
2048 # Force use android client fallback
2049 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2050 'info_dict': {
2051 'id': 'YOelRv7fMxY',
11f9be09 2052 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2053 'ext': '3gp',
2054 'upload_date': '20210624',
2055 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2056 'uploader': 'colinfurze',
11f9be09 2057 'uploader_id': 'colinfurze',
109dd3b2 2058 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2059 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2060 'duration': 596,
2061 'categories': ['Entertainment'],
2062 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2063 'view_count': int,
2064 'channel': 'colinfurze',
2065 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2066 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2067 'age_limit': 0,
2068 'availability': 'public',
2069 'like_count': int,
2070 'live_status': 'not_live',
2071 'playable_in_embed': True,
6c73052c 2072 'channel_follower_count': int
109dd3b2 2073 },
2074 'params': {
2075 'format': '17', # 3gp format available on android
2076 'extractor_args': {'youtube': {'player_client': ['android']}},
2077 },
120916da 2078 },
109dd3b2 2079 {
2080 # Skip download of additional client configs (remix client config in this case)
2081 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2082 'only_matching': True,
2083 'params': {
2084 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2085 },
8fc54b12 2086 }, {
2087 # shorts
2088 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2089 'only_matching': True,
9222c381 2090 }, {
2091 'note': 'Storyboards',
2092 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2093 'info_dict': {
2094 'id': '5KLPxDtMqe8',
2095 'ext': 'mhtml',
2096 'format_id': 'sb0',
2097 'title': 'Your Brain is Plastic',
2098 'uploader_id': 'scishow',
2099 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2100 'upload_date': '20140324',
2101 'uploader': 'SciShow',
976ae3ea 2102 'like_count': int,
2103 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2104 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2105 'view_count': int,
2106 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2107 'playable_in_embed': True,
2108 'tags': 'count:12',
2109 'uploader_url': 'http://www.youtube.com/user/scishow',
2110 'availability': 'public',
2111 'channel': 'SciShow',
2112 'live_status': 'not_live',
2113 'duration': 248,
2114 'categories': ['Education'],
2115 'age_limit': 0,
6c73052c 2116 'channel_follower_count': int
9222c381 2117 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2118 }, {
2119 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2120 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2121 'info_dict': {
2122 'id': '2NUZ8W2llS4',
2123 'ext': 'mp4',
2124 'title': 'The NP that test your phone performance 🙂',
2125 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2126 'uploader': 'Leon Nguyen',
2127 'uploader_id': 'VNSXIII',
2128 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2129 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2130 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2131 'duration': 21,
2132 'view_count': int,
2133 'age_limit': 0,
2134 'categories': ['Gaming'],
2135 'tags': 'count:23',
2136 'playable_in_embed': True,
2137 'live_status': 'not_live',
2138 'upload_date': '20220103',
2139 'like_count': int,
2140 'availability': 'public',
2141 'channel': 'Leon Nguyen',
2142 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2143 'channel_follower_count': int
2144 }
2145 }, {
2146 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2147 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2148 'info_dict': {
2149 'id': 'mzZzzBU6lrM',
2150 'ext': 'mp4',
2151 'title': 'I Met GeorgeNotFound In Real Life...',
2152 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2153 'uploader': 'Quackity',
2154 'uploader_id': 'QuackityHQ',
2155 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2156 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2157 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2158 'duration': 955,
2159 'view_count': int,
2160 'age_limit': 0,
2161 'categories': ['Entertainment'],
2162 'tags': 'count:26',
2163 'playable_in_embed': True,
2164 'live_status': 'not_live',
2165 'release_timestamp': 1641172509,
2166 'release_date': '20220103',
2167 'upload_date': '20220103',
2168 'like_count': int,
2169 'availability': 'public',
2170 'channel': 'Quackity',
2171 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2172 'channel_follower_count': int
2173 }
2174 },
2175 { # continuous livestream. Microformat upload date should be preferred.
2176 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2177 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2178 'info_dict': {
2179 'id': 'kgx4WGK0oNU',
2180 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2181 'ext': 'mp4',
2182 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2183 'availability': 'public',
2184 'age_limit': 0,
2185 'release_timestamp': 1637975704,
2186 'upload_date': '20210619',
2187 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2188 'live_status': 'is_live',
2189 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2190 'uploader': '阿鲍Abao',
2191 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2192 'channel': 'Abao in Tokyo',
2193 'channel_follower_count': int,
2194 'release_date': '20211127',
2195 'tags': 'count:39',
2196 'categories': ['People & Blogs'],
2197 'like_count': int,
2198 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2199 'view_count': int,
2200 'playable_in_embed': True,
2201 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2202 },
2203 'params': {'skip_download': True}
6e634cbe 2204 }, {
2205 # Story. Requires specific player params to work.
2206 # Note: stories get removed after some period of time
2207 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
2208 'info_dict': {
2209 'id': 'yN3x1t3sieA',
2210 'ext': 'mp4',
2211 'uploader': 'Linus Tech Tips',
2212 'duration': 13,
2213 'channel': 'Linus Tech Tips',
2214 'playable_in_embed': True,
2215 'tags': [],
2216 'age_limit': 0,
2217 'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
2218 'upload_date': '20220402',
2219 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
2220 'title': 'Story',
2221 'live_status': 'not_live',
2222 'uploader_id': 'LinusTechTips',
2223 'view_count': int,
2224 'description': '',
2225 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
2226 'categories': ['Science & Technology'],
2227 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
2228 'availability': 'unlisted',
2229 }
2230 }
2eb88d95
PH
2231 ]
2232
201c1459 2233 @classmethod
2234 def suitable(cls, url):
4dfbf869 2235 from ..utils import parse_qs
2236
201c1459 2237 qs = parse_qs(url)
2238 if qs.get('list', [None])[0]:
2239 return False
86e5f3ed 2240 return super().suitable(url)
201c1459 2241
e0df6211 2242 def __init__(self, *args, **kwargs):
86e5f3ed 2243 super().__init__(*args, **kwargs)
545cc85d 2244 self._code_cache = {}
83799698 2245 self._player_cache = {}
e0df6211 2246
adbc4ec4 2247 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2248 lock = threading.Lock()
2249
2250 is_live = True
185bf310 2251 start_time = time.time()
adbc4ec4
THD
2252 formats = [f for f in formats if f.get('is_from_start')]
2253
185bf310 2254 def refetch_manifest(format_id, delay):
2255 nonlocal formats, start_time, is_live
2256 if time.time() <= start_time + delay:
adbc4ec4
THD
2257 return
2258
2259 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2260 video_details = traverse_obj(
2261 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2262 microformats = traverse_obj(
2263 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2264 expected_type=dict, default=[])
2265 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2266 start_time = time.time()
adbc4ec4 2267
185bf310 2268 def mpd_feed(format_id, delay):
adbc4ec4
THD
2269 """
2270 @returns (manifest_url, manifest_stream_number, is_live) or None
2271 """
2272 with lock:
185bf310 2273 refetch_manifest(format_id, delay)
adbc4ec4
THD
2274
2275 f = next((f for f in formats if f['format_id'] == format_id), None)
2276 if not f:
185bf310 2277 if not is_live:
2278 self.to_screen(f'{video_id}: Video is no longer live')
2279 else:
2280 self.report_warning(
2281 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2282 return None
2283 return f['manifest_url'], f['manifest_stream_number'], is_live
2284
2285 for f in formats:
a539f065 2286 f['is_live'] = True
adbc4ec4
THD
2287 f['protocol'] = 'http_dash_segments_generator'
2288 f['fragments'] = functools.partial(
2289 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2290
2291 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2292 FETCH_SPAN, MAX_DURATION = 5, 432000
2293
2294 mpd_url, stream_number, is_live = None, None, True
2295
2296 begin_index = 0
2297 download_start_time = ctx.get('start') or time.time()
2298
2299 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2300 if lack_early_segments:
2301 self.report_warning(bug_reports_message(
2302 'Starting download from the last 120 hours of the live stream since '
2303 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2304 lack_early_segments = True
2305
2306 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2307 fragments, fragment_base_url = None, None
2308
a539f065 2309 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2310 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2311 # Obtain from MPD's maximum seq value
2312 old_mpd_url = mpd_url
185bf310 2313 last_error = ctx.pop('last_error', None)
a539f065 2314 expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
185bf310 2315 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2316 or (mpd_url, stream_number, False))
2317 if not refresh_sequence:
2318 if expire_fast and not is_live:
2319 return False, last_seq
2320 elif old_mpd_url == mpd_url:
2321 return True, last_seq
adbc4ec4
THD
2322 try:
2323 fmts, _ = self._extract_mpd_formats_and_subtitles(
2324 mpd_url, None, note=False, errnote=False, fatal=False)
2325 except ExtractorError:
2326 fmts = None
2327 if not fmts:
a539f065 2328 no_fragment_score += 2
adbc4ec4
THD
2329 return False, last_seq
2330 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2331 fragments = fmt_info['fragments']
2332 fragment_base_url = fmt_info['fragment_base_url']
2333 assert fragment_base_url
2334
2335 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2336 return True, _last_seq
2337
2338 while is_live:
2339 fetch_time = time.time()
2340 if no_fragment_score > 30:
2341 return
2342 if last_segment_url:
2343 # Obtain from "X-Head-Seqnum" header value from each segment
2344 try:
2345 urlh = self._request_webpage(
2346 last_segment_url, None, note=False, errnote=False, fatal=False)
2347 except ExtractorError:
2348 urlh = None
2349 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2350 if last_seq is None:
a539f065 2351 no_fragment_score += 2
adbc4ec4
THD
2352 last_segment_url = None
2353 continue
2354 else:
a539f065
LNO
2355 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2356 no_fragment_score += 2
185bf310 2357 if not should_continue:
adbc4ec4
THD
2358 continue
2359
2360 if known_idx > last_seq:
2361 last_segment_url = None
2362 continue
2363
2364 last_seq += 1
2365
2366 if begin_index < 0 and known_idx < 0:
2367 # skip from the start when it's negative value
2368 known_idx = last_seq + begin_index
2369 if lack_early_segments:
2370 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2371 try:
2372 for idx in range(known_idx, last_seq):
2373 # do not update sequence here or you'll get skipped some part of it
a539f065 2374 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2375 if not should_continue:
adbc4ec4
THD
2376 known_idx = idx - 1
2377 raise ExtractorError('breaking out of outer loop')
2378 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2379 yield {
2380 'url': last_segment_url,
2381 }
2382 if known_idx == last_seq:
2383 no_fragment_score += 5
2384 else:
2385 no_fragment_score = 0
2386 known_idx = last_seq
2387 except ExtractorError:
2388 continue
2389
2390 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2391
b6de707d 2392 def _extract_player_url(self, *ytcfgs, webpage=None):
2393 player_url = traverse_obj(
2394 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2395 get_all=False, expected_type=compat_str)
11f9be09 2396 if not player_url:
b6de707d 2397 return
60f393e4 2398 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2399
b6de707d 2400 def _download_player_url(self, video_id, fatal=False):
2401 res = self._download_webpage(
2402 'https://www.youtube.com/iframe_api',
2403 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2404 if res:
2405 player_version = self._search_regex(
2406 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2407 if player_version:
2408 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2409
60064c53
PH
2410 def _signature_cache_id(self, example_sig):
2411 """ Return a string representation of a signature """
78caa52a 2412 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 2413
e40c758c
S
2414 @classmethod
2415 def _extract_player_info(cls, player_url):
2416 for player_re in cls._PLAYER_INFO_RE:
2417 id_m = re.search(player_re, player_url)
2418 if id_m:
2419 break
2420 else:
c081b35c 2421 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2422 return id_m.group('id')
e40c758c 2423
404f611f 2424 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2425 player_id = self._extract_player_info(player_url)
2426 if player_id not in self._code_cache:
1276a43a 2427 code = self._download_webpage(
109dd3b2 2428 player_url, video_id, fatal=fatal,
2429 note='Downloading player ' + player_id,
2430 errnote='Download of %s failed' % player_url)
1276a43a 2431 if code:
2432 self._code_cache[player_id] = code
404f611f 2433 return self._code_cache.get(player_id)
109dd3b2 2434
e40c758c 2435 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2436 player_id = self._extract_player_info(player_url)
e0df6211 2437
c4417ddb 2438 # Read from filesystem cache
86e5f3ed 2439 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2440 assert os.path.basename(func_id) == func_id
a0e07d31 2441
69ea8ca4 2442 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 2443 if cache_spec is not None:
78caa52a 2444 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2445
404f611f 2446 code = self._load_player(video_id, player_url)
2447 if code:
109dd3b2 2448 res = self._parse_sig_js(code)
e0df6211 2449
109dd3b2 2450 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2451 cache_res = res(test_string)
2452 cache_spec = [ord(c) for c in cache_res]
83799698 2453
109dd3b2 2454 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2455 return res
83799698 2456
60064c53 2457 def _print_sig_code(self, func, example_sig):
404f611f 2458 if not self.get_param('youtube_print_sig_code'):
2459 return
2460
edf3e38e
PH
2461 def gen_sig_code(idxs):
2462 def _genslice(start, end, step):
78caa52a 2463 starts = '' if start == 0 else str(start)
8bcc8756 2464 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2465 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2466 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2467
2468 step = None
7af808a5
PH
2469 # Quelch pyflakes warnings - start will be set when step is set
2470 start = '(Never used)'
edf3e38e
PH
2471 for i, prev in zip(idxs[1:], idxs[:-1]):
2472 if step is not None:
2473 if i - prev == step:
2474 continue
2475 yield _genslice(start, prev, step)
2476 step = None
2477 continue
2478 if i - prev in [-1, 1]:
2479 step = i - prev
2480 start = prev
2481 continue
2482 else:
78caa52a 2483 yield 's[%d]' % prev
edf3e38e 2484 if step is None:
78caa52a 2485 yield 's[%d]' % i
edf3e38e
PH
2486 else:
2487 yield _genslice(start, i, step)
2488
78caa52a 2489 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2490 cache_res = func(test_string)
edf3e38e 2491 cache_spec = [ord(c) for c in cache_res]
78caa52a 2492 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2493 signature_id_tuple = '(%s)' % (
2494 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2495 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2496 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2497 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2498
e0df6211
PH
2499 def _parse_sig_js(self, jscode):
2500 funcname = self._search_regex(
abefc03f
S
2501 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2502 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2503 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2504 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2505 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2506 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2507 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2508 # Obsolete patterns
2509 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2510 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2511 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2512 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2513 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2514 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2515 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2516 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2517 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2518
2519 jsi = JSInterpreter(jscode)
2520 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2521 return lambda s: initial_function([s])
2522
545cc85d 2523 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2524 """Turn the encrypted s field into a working signature"""
6b37f0be 2525
c8bf86d5 2526 if player_url is None:
69ea8ca4 2527 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2528
c8bf86d5 2529 try:
62af3a0e 2530 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2531 if player_id not in self._player_cache:
2532 func = self._extract_signature_function(
60064c53 2533 video_id, player_url, s
c8bf86d5
PH
2534 )
2535 self._player_cache[player_id] = func
2536 func = self._player_cache[player_id]
404f611f 2537 self._print_sig_code(func, s)
c8bf86d5
PH
2538 return func(s)
2539 except Exception as e:
404f611f 2540 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2541
2542 def _decrypt_nsig(self, s, video_id, player_url):
2543 """Turn the encrypted n field into a working signature"""
2544 if player_url is None:
2545 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2546 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2547
2548 sig_id = ('nsig_value', s)
2549 if sig_id in self._player_cache:
2550 return self._player_cache[sig_id]
2551
2552 try:
2553 player_id = ('nsig', player_url)
2554 if player_id not in self._player_cache:
2555 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2556 func = self._player_cache[player_id]
2557 self._player_cache[sig_id] = func(s)
2558 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2559 return self._player_cache[sig_id]
2560 except Exception as e:
aa9369a2 2561 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2562
2563 def _extract_n_function_name(self, jscode):
48416bc4 2564 nfunc, idx = self._search_regex(
c571b3a6 2565 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
48416bc4 2566 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2567 if not idx:
2568 return nfunc
2569 return json.loads(js_to_json(self._search_regex(
a7d4acc0 2570 rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
48416bc4 2571 f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
404f611f 2572
2573 def _extract_n_function(self, video_id, player_url):
2574 player_id = self._extract_player_info(player_url)
2575 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2576
2577 if func_code:
2578 jsi = JSInterpreter(func_code)
2579 else:
2580 jscode = self._load_player(video_id, player_url)
2581 funcname = self._extract_n_function_name(jscode)
2582 jsi = JSInterpreter(jscode)
2583 func_code = jsi.extract_function_code(funcname)
2584 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2585
2586 if self.get_param('youtube_print_sig_code'):
2587 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2588
2589 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2590
109dd3b2 2591 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2592 """
2593 Extract signatureTimestamp (sts)
2594 Required to tell API what sig/player version is in use.
2595 """
2596 sts = None
2597 if isinstance(ytcfg, dict):
2598 sts = int_or_none(ytcfg.get('STS'))
2599
2600 if not sts:
2601 # Attempt to extract from player
2602 if player_url is None:
2603 error_msg = 'Cannot extract signature timestamp without player_url.'
2604 if fatal:
2605 raise ExtractorError(error_msg)
2606 self.report_warning(error_msg)
2607 return
404f611f 2608 code = self._load_player(video_id, player_url, fatal=fatal)
2609 if code:
109dd3b2 2610 sts = int_or_none(self._search_regex(
2611 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2612 'JS player signature timestamp', group='sts', fatal=fatal))
2613 return sts
2614
11f9be09 2615 def _mark_watched(self, video_id, player_responses):
9222c381 2616 playback_url = get_first(
2617 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2618 expected_type=url_or_none)
d77ab8e2 2619 if not playback_url:
352d63fd 2620 self.report_warning('Unable to mark watched')
d77ab8e2
S
2621 return
2622 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2623 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2624
2625 # cpn generation algorithm is reverse engineered from base.js.
2626 # In fact it works even with dummy cpn.
2627 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
86e5f3ed 2628 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
d77ab8e2
S
2629
2630 qs.update({
2631 'ver': ['2'],
2632 'cpn': [cpn],
2633 })
2634 playback_url = compat_urlparse.urlunparse(
15707c7e 2635 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2636
2637 self._download_webpage(
2638 playback_url, video_id, 'Marking watched',
2639 'Unable to mark watched', fatal=False)
2640
66c9fa36
S
2641 @staticmethod
2642 def _extract_urls(webpage):
2643 # Embedded YouTube player
2644 entries = [
2645 unescapeHTML(mobj.group('url'))
2646 for mobj in re.finditer(r'''(?x)
2647 (?:
2648 <iframe[^>]+?src=|
2649 data-video-url=|
2650 <embed[^>]+?src=|
2651 embedSWF\(?:\s*|
2652 <object[^>]+data=|
2653 new\s+SWFObject\(
2654 )
2655 (["\'])
2656 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2657 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2658 \1''', webpage)]
2659
2660 # lazyYT YouTube embed
2661 entries.extend(list(map(
2662 unescapeHTML,
2663 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2664
2665 # Wordpress "YouTube Video Importer" plugin
2666 matches = re.findall(r'''(?x)<div[^>]+
2667 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2668 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2669 entries.extend(m[-1] for m in matches)
2670
2671 return entries
2672
2673 @staticmethod
2674 def _extract_url(webpage):
2675 urls = YoutubeIE._extract_urls(webpage)
2676 return urls[0] if urls else None
2677
97665381
PH
2678 @classmethod
2679 def extract_id(cls, url):
2680 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2681 if mobj is None:
69ea8ca4 2682 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2683 return mobj.group('id')
c5e8d7af 2684
7c365c21 2685 def _extract_chapters_from_json(self, data, duration):
2686 chapter_list = traverse_obj(
2687 data, (
2688 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2689 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2690 ), expected_type=list)
2691
2692 return self._extract_chapters(
2693 chapter_list,
2694 chapter_time=lambda chapter: float_or_none(
2695 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2696 chapter_title=lambda chapter: traverse_obj(
2697 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2698 duration=duration)
2699
2700 def _extract_chapters_from_engagement_panel(self, data, duration):
2701 content_list = traverse_obj(
8bdd16b4 2702 data,
7c365c21 2703 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2704 expected_type=list, default=[])
052e1350 2705 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2706 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2707
2708 return next((
2709 filter(None, (
2710 self._extract_chapters(
2711 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2712 chapter_time, chapter_title, duration)
2713 for contents in content_list
2714 ))), [])
2715
2716 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2717 chapters = []
7c365c21 2718 last_chapter = {'start_time': 0}
2719 for idx, chapter in enumerate(chapter_list or []):
2720 title = chapter_title(chapter)
84213ea8
S
2721 start_time = chapter_time(chapter)
2722 if start_time is None:
2723 continue
7c365c21 2724 last_chapter['end_time'] = start_time
2725 if start_time < last_chapter['start_time']:
2726 if idx == 1:
2727 chapters.pop()
2728 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2729 else:
2730 self.report_warning(f'Invalid start time for chapter "{title}"')
2731 continue
2732 last_chapter = {'start_time': start_time, 'title': title}
2733 chapters.append(last_chapter)
2734 last_chapter['end_time'] = duration
84213ea8
S
2735 return chapters
2736
545cc85d 2737 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2738 return self._parse_json(self._search_regex(
86e5f3ed 2739 (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
545cc85d 2740 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2741
a1c5d2ca
M
2742 def _extract_comment(self, comment_renderer, parent=None):
2743 comment_id = comment_renderer.get('commentId')
2744 if not comment_id:
2745 return
fe93e2c4 2746
052e1350 2747 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2748
49bd8c66 2749 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2750 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2751 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2752 author_id = try_get(comment_renderer,
2753 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2754
49bd8c66 2755 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2756 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2757 author_thumbnail = try_get(comment_renderer,
2758 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2759
2760 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2761 is_favorited = 'creatorHeart' in (try_get(
2762 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2763 return {
2764 'id': comment_id,
2765 'text': text,
d92f5d5a 2766 'timestamp': timestamp,
a1c5d2ca
M
2767 'time_text': time_text,
2768 'like_count': votes,
97524332 2769 'is_favorited': is_favorited,
a1c5d2ca
M
2770 'author': author,
2771 'author_id': author_id,
2772 'author_thumbnail': author_thumbnail,
2773 'author_is_uploader': author_is_uploader,
2774 'parent': parent or 'root'
2775 }
2776
46383212 2777 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2778
2779 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2780
2781 def extract_header(contents):
2d6659b9 2782 _continuation = None
2783 for content in contents:
46383212 2784 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2785 expected_comment_count = self._get_count(
2786 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2787
2d6659b9 2788 if expected_comment_count:
46383212 2789 tracker['est_total'] = expected_comment_count
2790 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2791 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2792
2793 sort_menu_item = try_get(
2794 comments_header_renderer,
2795 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2796 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2797
2798 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2799 if not _continuation:
2800 continue
2801
46383212 2802 sort_text = str_or_none(sort_menu_item.get('title'))
2803 if not sort_text:
2d6659b9 2804 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2805 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2806 break
a2160aa4 2807 return _continuation
a1c5d2ca 2808
2d6659b9 2809 def extract_thread(contents):
a1c5d2ca 2810 if not parent:
46383212 2811 tracker['current_page_thread'] = 0
a1c5d2ca 2812 for content in contents:
46383212 2813 if not parent and tracker['total_parent_comments'] >= max_parents:
2814 yield
a1c5d2ca 2815 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2816 comment_renderer = get_first(
2817 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2818 expected_type=dict, default={})
a1c5d2ca 2819
a1c5d2ca
M
2820 comment = self._extract_comment(comment_renderer, parent)
2821 if not comment:
2822 continue
46383212 2823
2824 tracker['running_total'] += 1
2825 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2826 yield comment
46383212 2827
a1c5d2ca
M
2828 # Attempt to get the replies
2829 comment_replies_renderer = try_get(
2830 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2831
2832 if comment_replies_renderer:
46383212 2833 tracker['current_page_thread'] += 1
a1c5d2ca 2834 comment_entries_iter = self._comment_entries(
99e9e001 2835 comment_replies_renderer, ytcfg, video_id,
46383212 2836 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2837 yield from itertools.islice(comment_entries_iter, min(
2838 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2839
46383212 2840 # Keeps track of counts across recursive calls
2841 if not tracker:
2842 tracker = dict(
2843 running_total=0,
2844 est_total=0,
2845 current_page_thread=0,
2846 total_parent_comments=0,
2847 total_reply_comments=0)
2848
2849 # TODO: Deprecated
2d6659b9 2850 # YouTube comments have a max depth of 2
46383212 2851 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2852 if max_depth:
2853 self._downloader.deprecation_warning(
2854 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2855 if max_depth == 1 and parent:
2856 return
a1c5d2ca 2857
46383212 2858 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2859 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2860
46383212 2861 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2862
46383212 2863 response = None
6e634cbe 2864 is_forced_continuation = False
2d6659b9 2865 is_first_continuation = parent is None
6e634cbe 2866 if is_first_continuation and not continuation:
2867 # Sometimes you can get comments by generating the continuation yourself,
2868 # even if YouTube initially reports them being disabled - e.g. stories comments.
2869 # Note: if the comment section is actually disabled, YouTube may return a response with
2870 # required check_get_keys missing. So we will disable that check initially in this case.
2871 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2872 is_forced_continuation = True
a1c5d2ca
M
2873
2874 for page_num in itertools.count(0):
2875 if not continuation:
2876 break
46383212 2877 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2878 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2879 if page_num == 0:
2880 if is_first_continuation:
2881 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2882 else:
2d6659b9 2883 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2884 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2885 else:
2886 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2887 ' ' if parent else '', ' replies' if parent else '',
2888 page_num, comment_prog_str)
2889
2890 response = self._extract_response(
fe93e2c4 2891 item_id=None, query=continuation,
2d6659b9 2892 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 2893 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2894 is_forced_continuation = False
46383212 2895 continuation_contents = traverse_obj(
2896 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2897
2d6659b9 2898 continuation = None
46383212 2899 for continuation_section in continuation_contents:
2900 continuation_items = traverse_obj(
2901 continuation_section,
2902 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2903 get_all=False, expected_type=list) or []
2904 if is_first_continuation:
2905 continuation = extract_header(continuation_items)
2906 is_first_continuation = False
2d6659b9 2907 if continuation:
a1c5d2ca 2908 break
46383212 2909 continue
a1c5d2ca 2910
46383212 2911 for entry in extract_thread(continuation_items):
2912 if not entry:
2913 return
2914 yield entry
2915 continuation = self._extract_continuation({'contents': continuation_items})
2916 if continuation:
2d6659b9 2917 break
a1c5d2ca 2918
6e634cbe 2919 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2920 if message and not parent and tracker['running_total'] == 0:
2921 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
2922
2923 @staticmethod
2924 def _generate_comment_continuation(video_id):
2925 """
2926 Generates initial comment section continuation token from given video id
2927 """
2928 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
2929 return base64.b64encode(token.encode()).decode()
2930
a2160aa4 2931 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2932 """Entry for comment extraction"""
2d6659b9 2933 def _real_comment_extract(contents):
aae16f6e 2934 renderer = next((
2935 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2936 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2937 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2938
a2160aa4 2939 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2940 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2941
109dd3b2 2942 @staticmethod
99e9e001 2943 def _get_checkok_params():
2944 return {'contentCheckOk': True, 'racyCheckOk': True}
2945
2946 @classmethod
2947 def _generate_player_context(cls, sts=None):
109dd3b2 2948 context = {
2949 'html5Preference': 'HTML5_PREF_WANTS',
2950 }
2951 if sts is not None:
2952 context['signatureTimestamp'] = sts
2953 return {
2954 'playbackContext': {
2955 'contentPlaybackContext': context
a1a7907b 2956 },
99e9e001 2957 **cls._get_checkok_params()
109dd3b2 2958 }
2959
e7e94f2a
D
2960 @staticmethod
2961 def _is_agegated(player_response):
2962 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2963 return True
e7e94f2a
D
2964
2965 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2966 AGE_GATE_REASONS = (
2967 'confirm your age', 'age-restricted', 'inappropriate', # reason
2968 'age_verification_required', 'age_check_required', # status
2969 )
2970 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2971
2972 @staticmethod
2973 def _is_unplayable(player_response):
2974 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2975
99e9e001 2976 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2977
11f9be09 2978 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2979 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2980 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2981 headers = self.generate_api_headers(
99e9e001 2982 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2983
6e634cbe 2984 yt_query = {
2985 'videoId': video_id,
2986 'params': '8AEB' # enable stories
2987 }
11f9be09 2988 yt_query.update(self._generate_player_context(sts))
2989 return self._extract_response(
2990 item_id=video_id, ep='player', query=yt_query,
379e44ed 2991 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2992 default_client=client,
11f9be09 2993 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2994 ) or None
2995
11f9be09 2996 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2997 requested_clients = []
d0d012d4 2998 default = ['android', 'web']
000c15a4 2999 allowed_clients = sorted(
86e5f3ed 3000 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3001 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3002 for client in self._configuration_arg('player_client'):
3003 if client in allowed_clients:
3004 requested_clients.append(client)
d0d012d4 3005 elif client == 'default':
3006 requested_clients.extend(default)
b4c055ba 3007 elif client == 'all':
3008 requested_clients.extend(allowed_clients)
3009 else:
3010 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3011 if not requested_clients:
d0d012d4 3012 requested_clients = default
cf7e015f 3013
11f9be09 3014 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3015 requested_clients.extend(
e7e94f2a 3016 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3017
11f9be09 3018 return orderedSet(requested_clients)
cf7e015f 3019
99e9e001 3020 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 3021 initial_pr = None
3022 if webpage:
3023 initial_pr = self._extract_yt_initial_variable(
3024 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
3025 video_id, 'initial player response')
6b09401b 3026
ae729626 3027 all_clients = set(clients)
c0bc527b 3028 clients = clients[::-1]
b6de707d 3029 prs = []
e7e94f2a 3030
ae729626 3031 def append_client(*client_names):
e7870111 3032 """ Append the first client name that exists but not already used """
ae729626 3033 for client_name in client_names:
e7870111
D
3034 actual_client = _split_innertube_client(client_name)[0]
3035 if actual_client in INNERTUBE_CLIENTS:
3036 if actual_client not in all_clients:
ae729626 3037 clients.append(client_name)
e7870111
D
3038 all_clients.add(actual_client)
3039 return
e7e94f2a 3040
379e44ed 3041 # Android player_response does not have microFormats which are needed for
3042 # extraction of some data. So we return the initial_pr with formats
3043 # stripped out even if not requested by the user
3044 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3045 if initial_pr:
3046 pr = dict(initial_pr)
3047 pr['streamingData'] = None
b6de707d 3048 prs.append(pr)
379e44ed 3049
3050 last_error = None
b6de707d 3051 tried_iframe_fallback = False
3052 player_url = None
c0bc527b 3053 while clients:
e7870111 3054 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3055 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3056 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3057 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3058
b6de707d 3059 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3060 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3061 if 'js' in self._configuration_arg('player_skip'):
3062 require_js_player = False
3063 player_url = None
3064
3065 if not player_url and not tried_iframe_fallback and require_js_player:
3066 player_url = self._download_player_url(video_id)
3067 tried_iframe_fallback = True
3068
379e44ed 3069 try:
3070 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 3071 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 3072 except ExtractorError as e:
3073 if last_error:
3074 self.report_warning(last_error)
3075 last_error = e
3076 continue
3077
11f9be09 3078 if pr:
b6de707d 3079 prs.append(pr)
c0bc527b 3080
e7e94f2a 3081 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3082 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3083 append_client(f'{base_client}_creator')
e7e94f2a 3084 elif self._is_agegated(pr):
e7870111
D
3085 if variant == 'tv_embedded':
3086 append_client(f'{base_client}_embedded')
3087 elif not variant:
3088 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3089
379e44ed 3090 if last_error:
b6de707d 3091 if not len(prs):
379e44ed 3092 raise last_error
3093 self.report_warning(last_error)
b6de707d 3094 return prs, player_url
11f9be09 3095
a1b2d843 3096 def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3097 itags, stream_ids = {}, []
2a9c6dcd 3098 itag_qualities, res_qualities = {}, {}
d3fc8074 3099 q = qualities([
2a9c6dcd 3100 # Normally tiny is the smallest video-only formats. But
3101 # audio-only formats with unknown quality may get tagged as tiny
3102 'tiny',
3103 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3104 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3105 ])
11f9be09 3106 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3107
545cc85d 3108 for fmt in streaming_formats:
727029c5 3109 if fmt.get('targetDurationSec'):
545cc85d 3110 continue
321bf820 3111
cc2db878 3112 itag = str_or_none(fmt.get('itag'))
9297939e 3113 audio_track = fmt.get('audioTrack') or {}
3114 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3115 if stream_id in stream_ids:
3116 continue
3117
cc2db878 3118 quality = fmt.get('quality')
2a9c6dcd 3119 height = int_or_none(fmt.get('height'))
d3fc8074 3120 if quality == 'tiny' or not quality:
3121 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3122 # The 3gp format (17) in android client has a quality of "small",
3123 # but is actually worse than other formats
3124 if itag == '17':
3125 quality = 'tiny'
3126 if quality:
3127 if itag:
3128 itag_qualities[itag] = quality
3129 if height:
3130 res_qualities[height] = quality
cc2db878 3131 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3132 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3133 # number of fragment that would subsequently requested with (`&sq=N`)
3134 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3135 continue
3136
545cc85d 3137 fmt_url = fmt.get('url')
3138 if not fmt_url:
3139 sc = compat_parse_qs(fmt.get('signatureCipher'))
3140 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3141 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3142 if not (sc and fmt_url and encrypted_sig):
3143 continue
545cc85d 3144 if not player_url:
201e9eaa 3145 continue
545cc85d 3146 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
3147 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
3148 fmt_url += '&' + sp + '=' + signature
3149
404f611f 3150 query = parse_qs(fmt_url)
3151 throttled = False
b2916526 3152 if query.get('n'):
404f611f 3153 try:
3154 fmt_url = update_url_query(fmt_url, {
3155 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
3156 except ExtractorError as e:
aa9369a2 3157 self.report_warning(
1d485a1a 3158 'nsig extraction failed: You may experience throttling for some formats\n'
aa9369a2 3159 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 3160 throttled = True
3161
545cc85d 3162 if itag:
a0bb6ce5 3163 itags[itag] = 'https'
9297939e 3164 stream_ids.append(stream_id)
3165
0ad92dfb 3166 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3167 language_preference = (
3168 10 if audio_track.get('audioIsDefault') and 10
3169 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3170 else -1)
0ad92dfb 3171 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3172 # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3173 # Make sure to avoid false positives with small duration differences.
3174 # Eg: __2ABJjxzNo, ySuUZEjARPY
3175 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3176 if is_damaged:
0f06bcd7 3177 self.report_warning(
3178 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3179 dct = {
3180 'asr': int_or_none(fmt.get('audioSampleRate')),
3181 'filesize': int_or_none(fmt.get('contentLength')),
3182 'format_id': itag,
34921b43 3183 'format_note': join_nonempty(
26e8e044 3184 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3185 ' (default)' if language_preference > 0 else ''),
404f611f 3186 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
0ad92dfb 3187 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3188 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3189 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3190 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 3191 'height': height,
dca3ff4a 3192 'quality': q(quality),
727029c5 3193 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3194 'tbr': tbr,
545cc85d 3195 'url': fmt_url,
2a9c6dcd 3196 'width': int_or_none(fmt.get('width')),
ab6df717 3197 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3198 'desc' if language_preference < -1 else ''),
3199 'language_preference': language_preference,
a405b38f 3200 # Strictly de-prioritize damaged and 3gp formats
3201 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3202 }
60bdb7bd 3203 mime_mobj = re.match(
3204 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3205 if mime_mobj:
3206 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3207 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3208 no_audio = dct.get('acodec') == 'none'
3209 no_video = dct.get('vcodec') == 'none'
3210 if no_audio:
3211 dct['vbr'] = tbr
3212 if no_video:
3213 dct['abr'] = tbr
3214 if no_audio or no_video:
545cc85d 3215 dct['downloader_options'] = {
3216 # Youtube throttles chunks >~10M
3217 'http_chunk_size': 10485760,
bf1317d2 3218 }
7c60c33e 3219 if dct.get('ext'):
3220 dct['container'] = dct['ext'] + '_dash'
11f9be09 3221 yield dct
545cc85d 3222
adbc4ec4 3223 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3224 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3225 if not self.get_param('youtube_include_hls_manifest', True):
3226 skip_manifests.append('hls')
0f06bcd7 3227 if not self.get_param('youtube_include_dash_manifest', True):
3228 skip_manifests.append('dash')
adbc4ec4
THD
3229 get_dash = 'dash' not in skip_manifests and (
3230 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3231 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3232
a0bb6ce5 3233 def process_manifest_format(f, proto, itag):
3234 if itag in itags:
3235 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3236 return False
3237 itag = f'{itag}-{proto}'
3238 if itag:
3239 f['format_id'] = itag
3240 itags[itag] = proto
3241
3242 f['quality'] = next((
3243 q(qdict[val])
e339d25a 3244 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 3245 if val in qdict), -1)
3246 return True
2a9c6dcd 3247
11f9be09 3248 for sd in streaming_data:
5d3a0e79 3249 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3250 if hls_manifest_url:
2a9c6dcd 3251 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 3252 if process_manifest_format(f, 'hls', self._search_regex(
3253 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3254 yield f
545cc85d 3255
5d3a0e79 3256 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3257 if dash_manifest_url:
2a9c6dcd 3258 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 3259 if process_manifest_format(f, 'dash', f['format_id']):
3260 f['filesize'] = int_or_none(self._search_regex(
3261 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3262 if live_from_start:
3263 f['is_from_start'] = True
3264
a0bb6ce5 3265 yield f
11f9be09 3266
720c3099 3267 def _extract_storyboard(self, player_responses, duration):
3268 spec = get_first(
3269 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3270 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3271 if not base_url:
720c3099 3272 return
720c3099 3273 L = len(spec) - 1
3274 for i, args in enumerate(spec):
3275 args = args.split('#')
3276 counts = list(map(int_or_none, args[:5]))
3277 if len(args) != 8 or not all(counts):
3278 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3279 continue
3280 width, height, frame_count, cols, rows = counts
3281 N, sigh = args[6:]
3282
3283 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3284 fragment_count = frame_count / (cols * rows)
3285 fragment_duration = duration / fragment_count
3286 yield {
3287 'format_id': f'sb{i}',
3288 'format_note': 'storyboard',
3289 'ext': 'mhtml',
3290 'protocol': 'mhtml',
3291 'acodec': 'none',
3292 'vcodec': 'none',
3293 'url': url,
3294 'width': width,
3295 'height': height,
3296 'fragments': [{
b3edc806 3297 'url': url.replace('$M', str(j)),
720c3099 3298 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3299 } for j in range(math.ceil(fragment_count))],
3300 }
3301
adbc4ec4 3302 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3303 webpage = None
3304 if 'webpage' not in self._configuration_arg('player_skip'):
3305 webpage = self._download_webpage(
6e634cbe 3306 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
11f9be09 3307
3308 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3309
b6de707d 3310 player_responses, player_url = self._extract_player_responses(
11f9be09 3311 self._get_requested_clients(url, smuggled_data),
99e9e001 3312 video_id, webpage, master_ytcfg)
11f9be09 3313
adbc4ec4
THD
3314 return webpage, master_ytcfg, player_responses, player_url
3315
a1b2d843 3316 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3317 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3318 is_live = get_first(video_details, 'isLive')
3319 if is_live is None:
3320 is_live = get_first(live_broadcast_details, 'isLiveNow')
3321
3322 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
a1b2d843 3323 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
adbc4ec4
THD
3324
3325 return live_broadcast_details, is_live, streaming_data, formats
3326
3327 def _real_extract(self, url):
3328 url, smuggled_data = unsmuggle_url(url, {})
3329 video_id = self._match_id(url)
3330
3331 base_url = self.http_scheme() + '//www.youtube.com/'
3332 webpage_url = base_url + 'watch?v=' + video_id
3333
3334 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3335
11f9be09 3336 playability_statuses = traverse_obj(
3337 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3338
3339 trailer_video_id = get_first(
3340 playability_statuses,
3341 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3342 expected_type=str)
3343 if trailer_video_id:
3344 return self.url_result(
3345 trailer_video_id, self.ie_key(), trailer_video_id)
3346
3347 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3348 if webpage else (lambda x: None))
3349
3350 video_details = traverse_obj(
3351 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3352 microformats = traverse_obj(
3353 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3354 expected_type=dict, default=[])
3355 video_title = (
3356 get_first(video_details, 'title')
3357 or self._get_text(microformats, (..., 'title'))
3358 or search_meta(['og:title', 'twitter:title', 'title']))
3359 video_description = get_first(video_details, 'shortDescription')
3360
d89257f3 3361 multifeed_metadata_list = get_first(
3362 player_responses,
3363 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3364 expected_type=str)
3365 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3366 if self.get_param('noplaylist'):
11f9be09 3367 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3368 else:
3369 entries = []
3370 feed_ids = []
3371 for feed in multifeed_metadata_list.split(','):
3372 # Unquote should take place before split on comma (,) since textual
3373 # fields may contain comma as well (see
3374 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3375 feed_data = compat_parse_qs(
3376 compat_urllib_parse_unquote_plus(feed))
3377
3378 def feed_entry(name):
3379 return try_get(
3380 feed_data, lambda x: x[name][0], compat_str)
3381
3382 feed_id = feed_entry('id')
3383 if not feed_id:
3384 continue
3385 feed_title = feed_entry('title')
3386 title = video_title
3387 if feed_title:
3388 title += ' (%s)' % feed_title
3389 entries.append({
3390 '_type': 'url_transparent',
3391 'ie_key': 'Youtube',
3392 'url': smuggle_url(
3393 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3394 {'force_singlefeed': True}),
3395 'title': title,
3396 })
3397 feed_ids.append(feed_id)
3398 self.to_screen(
3399 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3400 % (', '.join(feed_ids), video_id))
3401 return self.playlist_result(
3402 entries, video_id, video_title, video_description)
11f9be09 3403
a1b2d843 3404 duration = int_or_none(
3405 get_first(video_details, 'lengthSeconds')
3406 or get_first(microformats, 'lengthSeconds')
3407 or parse_duration(search_meta('duration'))) or None
3408
3409 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
3410 video_id, microformats, video_details, player_responses, player_url, duration)
bf1317d2 3411
545cc85d 3412 if not formats:
11f9be09 3413 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3414 self.report_drm(video_id)
11f9be09 3415 pemr = get_first(
3416 playability_statuses,
3417 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3418 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3419 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3420 if subreason:
545cc85d 3421 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3422 countries = get_first(microformats, 'availableCountries')
545cc85d 3423 if not countries:
3424 regions_allowed = search_meta('regionsAllowed')
3425 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3426 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3427 reason += f'. {subreason}'
545cc85d 3428 if reason:
b7da73eb 3429 self.raise_no_formats(reason, expected=True)
bf1317d2 3430
11f9be09 3431 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3432 if not keywords and webpage:
3433 keywords = [
3434 unescapeHTML(m.group('content'))
3435 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3436 for keyword in keywords:
3437 if keyword.startswith('yt:stretch='):
201c1459 3438 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3439 if mobj:
3440 # NB: float is intentional for forcing float division
3441 w, h = (float(v) for v in mobj.groups())
3442 if w > 0 and h > 0:
3443 ratio = w / h
3444 for f in formats:
3445 if f.get('vcodec') != 'none':
3446 f['stretched_ratio'] = ratio
3447 break
a709d873 3448 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3449 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3450 if thumbnail_url:
3451 thumbnails.append({
3452 'url': thumbnail_url,
ff2751ac 3453 })
fccf5021 3454 original_thumbnails = thumbnails.copy()
3455
0ba692ac 3456 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3457 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3458 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3459 thumbnail_names = [
bfec31be 3460 # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants
3461 # in resolution, these are not the custom thumbnail. So de-prioritize them
3462 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3463 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3464 ]
cca80fe6 3465 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3466 thumbnails.extend({
3467 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3468 video_id=video_id, name=name, ext=ext,
3469 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3470 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3471 for thumb in thumbnails:
cca80fe6 3472 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3473 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3474 self._remove_duplicate_formats(thumbnails)
fccf5021 3475 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3476
7ea65411 3477 category = get_first(microformats, 'category') or search_meta('genre')
3478 channel_id = str_or_none(
3479 get_first(video_details, 'channelId')
3480 or get_first(microformats, 'externalChannelId')
3481 or search_meta('channelId'))
7ea65411 3482 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3483
3484 live_content = get_first(video_details, 'isLiveContent')
3485 is_upcoming = get_first(video_details, 'isUpcoming')
3486 if is_live is None:
3487 if is_upcoming or live_content is False:
3488 is_live = False
3489 if is_upcoming is None and (live_content or is_live):
3490 is_upcoming = False
adbc4ec4
THD
3491 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3492 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3493 if not duration and live_end_time and live_start_time:
3494 duration = live_end_time - live_start_time
3495
3496 if is_live and self.get_param('live_from_start'):
3497 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3498
720c3099 3499 formats.extend(self._extract_storyboard(player_responses, duration))
3500
3501 # Source is given priority since formats that throttle are given lower source_preference
3502 # When throttling issue is fully fixed, remove this
3503 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3504
545cc85d 3505 info = {
3506 'id': video_id,
39ca3b5c 3507 'title': video_title,
545cc85d 3508 'formats': formats,
3509 'thumbnails': thumbnails,
fccf5021 3510 # The best thumbnail that we are sure exists. Prevents unnecessary
3511 # URL checking if user don't care about getting the best possible thumbnail
3512 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3513 'description': video_description,
11f9be09 3514 'uploader': get_first(video_details, 'author'),
545cc85d 3515 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3516 'uploader_url': owner_profile_url,
3517 'channel_id': channel_id,
e0ddbd02 3518 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
545cc85d 3519 'duration': duration,
3520 'view_count': int_or_none(
11f9be09 3521 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3522 or search_meta('interactionCount')),
11f9be09 3523 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3524 'age_limit': 18 if (
11f9be09 3525 get_first(microformats, 'isFamilySafe') is False
545cc85d 3526 or search_meta('isFamilyFriendly') == 'false'
3527 or search_meta('og:restrictions:age') == '18+') else 0,
3528 'webpage_url': webpage_url,
3529 'categories': [category] if category else None,
3530 'tags': keywords,
11f9be09 3531 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3532 'is_live': is_live,
3533 'was_live': (False if is_live or is_upcoming or live_content is False
3534 else None if is_live is None or is_upcoming is None
3535 else live_content),
3536 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3537 'release_timestamp': live_start_time,
545cc85d 3538 }
b477fc13 3539
3944e7af 3540 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3541 if pctr:
ecdc9049 3542 def get_lang_code(track):
3543 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3544 or track.get('languageCode'))
3545
3546 # Converted into dicts to remove duplicates
3547 captions = {
3548 get_lang_code(sub): sub
3549 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3550 translation_languages = {
3551 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3552 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3553
774d79cc 3554 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3555 lang_subs = container.setdefault(lang_code, [])
545cc85d 3556 for fmt in self._SUBTITLE_FORMATS:
3557 query.update({
3558 'fmt': fmt,
3559 })
3560 lang_subs.append({
3561 'ext': fmt,
60f393e4 3562 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3563 'name': sub_name,
545cc85d 3564 })
7e72694b 3565
ecdc9049 3566 subtitles, automatic_captions = {}, {}
3567 for lang_code, caption_track in captions.items():
3568 base_url = caption_track.get('baseUrl')
1235d333 3569 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3570 if not base_url:
3571 continue
ecdc9049 3572 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3573 if caption_track.get('kind') != 'asr':
545cc85d 3574 if not lang_code:
3575 continue
3576 process_language(
ecdc9049 3577 subtitles, base_url, lang_code, lang_name, {})
3578 if not caption_track.get('isTranslatable'):
3579 continue
3944e7af 3580 for trans_code, trans_name in translation_languages.items():
3581 if not trans_code:
545cc85d 3582 continue
1235d333 3583 orig_trans_code = trans_code
ecdc9049 3584 if caption_track.get('kind') != 'asr':
18e49408 3585 if 'translated_subs' in self._configuration_arg('skip'):
3586 continue
ecdc9049 3587 trans_code += f'-{lang_code}'
3588 trans_name += format_field(lang_name, template=' from %s')
d49669ac 3589 # Add an "-orig" label to the original language so that it can be distinguished.
3590 # The subs are returned without "-orig" as well for compatibility
1235d333 3591 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3592 process_language(
d49669ac 3593 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3594 # Setting tlang=lang returns damaged subtitles.
d49669ac 3595 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3596 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
ecdc9049 3597 info['automatic_captions'] = automatic_captions
3598 info['subtitles'] = subtitles
7e72694b 3599
545cc85d 3600 parsed_url = compat_urllib_parse_urlparse(url)
3601 for component in [parsed_url.fragment, parsed_url.query]:
3602 query = compat_parse_qs(component)
3603 for k, v in query.items():
3604 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3605 d_k += '_time'
3606 if d_k not in info and k in s_ks:
3607 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3608
3609 # Youtube Music Auto-generated description
822b9d9c 3610 if video_description:
38d70284 3611 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3612 if mobj:
822b9d9c
RA
3613 release_year = mobj.group('release_year')
3614 release_date = mobj.group('release_date')
3615 if release_date:
3616 release_date = release_date.replace('-', '')
3617 if not release_year:
545cc85d 3618 release_year = release_date[:4]
3619 info.update({
3620 'album': mobj.group('album'.strip()),
3621 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3622 'track': mobj.group('track').strip(),
3623 'release_date': release_date,
cc2db878 3624 'release_year': int_or_none(release_year),
545cc85d 3625 })
7e72694b 3626
545cc85d 3627 initial_data = None
3628 if webpage:
3629 initial_data = self._extract_yt_initial_variable(
3630 webpage, self._YT_INITIAL_DATA_RE, video_id,
3631 'yt initial data')
3632 if not initial_data:
99e9e001 3633 query = {'videoId': video_id}
3634 query.update(self._get_checkok_params())
109dd3b2 3635 initial_data = self._extract_response(
3636 item_id=video_id, ep='next', fatal=False,
99e9e001 3637 ytcfg=master_ytcfg, query=query,
3638 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3639 note='Downloading initial data API JSON')
545cc85d 3640
19a03940 3641 try: # This will error if there is no livechat
c60ee3a2 3642 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3643 except (KeyError, IndexError, TypeError):
3644 pass
3645 else:
ecdc9049 3646 info.setdefault('subtitles', {})['live_chat'] = [{
19a03940 3647 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
c60ee3a2 3648 'video_id': video_id,
3649 'ext': 'json',
f6745c49 3650 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3651 }]
545cc85d 3652
3653 if initial_data:
7c365c21 3654 info['chapters'] = (
3655 self._extract_chapters_from_json(initial_data, duration)
3656 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3657 or None)
545cc85d 3658
17322130 3659 contents = traverse_obj(
3660 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3661 expected_type=list, default=[])
3662
3663 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3664 if vpir:
3665 stl = vpir.get('superTitleLink')
3666 if stl:
3667 stl = self._get_text(stl)
3668 if try_get(
3669 vpir,
3670 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3671 info['location'] = stl
3672 else:
affc4fef 3673 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3674 if mobj:
545cc85d 3675 info.update({
17322130 3676 'series': mobj.group(1),
3677 'season_number': int(mobj.group(2)),
3678 'episode_number': int(mobj.group(3)),
545cc85d 3679 })
17322130 3680 for tlb in (try_get(
3681 vpir,
3682 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3683 list) or []):
3684 tbr = tlb.get('toggleButtonRenderer') or {}
3685 for getter, regex in [(
3686 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3687 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3688 lambda x: x['accessibility'],
3689 lambda x: x['accessibilityData']['accessibilityData'],
3690 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3691 label = (try_get(tbr, getter, dict) or {}).get('label')
3692 if label:
3693 mobj = re.match(regex, label)
3694 if mobj:
3695 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3696 break
17322130 3697 sbr_tooltip = try_get(
3698 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3699 if sbr_tooltip:
3700 like_count, dislike_count = sbr_tooltip.split(' / ')
3701 info.update({
3702 'like_count': str_to_int(like_count),
3703 'dislike_count': str_to_int(dislike_count),
3704 })
3705 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3706 if vsir:
3707 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3708 info.update({
3709 'channel': self._get_text(vor, 'title'),
3710 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3711
3712 rows = try_get(
3713 vsir,
3714 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3715 list) or []
3716 multiple_songs = False
3717 for row in rows:
3718 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3719 multiple_songs = True
3720 break
3721 for row in rows:
3722 mrr = row.get('metadataRowRenderer') or {}
3723 mrr_title = mrr.get('title')
3724 if not mrr_title:
3725 continue
3726 mrr_title = self._get_text(mrr, 'title')
3727 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3728 if mrr_title == 'License':
3729 info['license'] = mrr_contents_text
3730 elif not multiple_songs:
3731 if mrr_title == 'Album':
3732 info['album'] = mrr_contents_text
3733 elif mrr_title == 'Artist':
3734 info['artist'] = mrr_contents_text
3735 elif mrr_title == 'Song':
3736 info['track'] = mrr_contents_text
545cc85d 3737
3738 fallbacks = {
3739 'channel': 'uploader',
3740 'channel_id': 'uploader_id',
3741 'channel_url': 'uploader_url',
3742 }
992f9a73 3743
17322130 3744 # The upload date for scheduled, live and past live streams / premieres in microformats
3745 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3746 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3747 upload_date = (
3748 unified_strdate(get_first(microformats, 'uploadDate'))
3749 or unified_strdate(search_meta('uploadDate')))
3750 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3751 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3752 info['upload_date'] = upload_date
992f9a73 3753
545cc85d 3754 for to, frm in fallbacks.items():
3755 if not info.get(to):
3756 info[to] = info.get(frm)
3757
3758 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3759 v = info.get(s_k)
3760 if v:
3761 info[d_k] = v
b84071c0 3762
11f9be09 3763 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3764 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3765 is_membersonly = None
b28f8d24 3766 is_premium = None
c224251a
M
3767 if initial_data and is_private is not None:
3768 is_membersonly = False
b28f8d24 3769 is_premium = False
47193e02 3770 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3771 badge_labels = set()
3772 for content in contents:
3773 if not isinstance(content, dict):
3774 continue
3775 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3776 for badge_label in badge_labels:
3777 if badge_label.lower() == 'members only':
3778 is_membersonly = True
3779 elif badge_label.lower() == 'premium':
3780 is_premium = True
3781 elif badge_label.lower() == 'unlisted':
3782 is_unlisted = True
c224251a 3783
c224251a
M
3784 info['availability'] = self._availability(
3785 is_private=is_private,
b28f8d24 3786 needs_premium=is_premium,
c224251a
M
3787 needs_subscription=is_membersonly,
3788 needs_auth=info['age_limit'] >= 18,
3789 is_unlisted=None if is_private is None else is_unlisted)
3790
a2160aa4 3791 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3792
11f9be09 3793 self.mark_watched(video_id, player_responses)
d77ab8e2 3794
545cc85d 3795 return info
c5e8d7af 3796
a61fd4cf 3797
a6213a49 3798class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3799
182bda88 3800 @staticmethod
3801 def passthrough_smuggled_data(func):
3802 def _smuggle(entries, smuggled_data):
3803 for entry in entries:
3804 # TODO: Convert URL to music.youtube instead.
3805 # Do we need to passthrough any other smuggled_data?
3806 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3807 yield entry
3808
3809 @functools.wraps(func)
3810 def wrapper(self, url):
3811 url, smuggled_data = unsmuggle_url(url, {})
3812 if self.is_music_url(url):
3813 smuggled_data['is_music_url'] = True
3814 info_dict = func(self, url, smuggled_data)
3815 if smuggled_data and info_dict.get('entries'):
3816 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3817 return info_dict
3818 return wrapper
3819
a6213a49 3820 def _extract_channel_id(self, webpage):
3821 channel_id = self._html_search_meta(
3822 'channelId', webpage, 'channel id', default=None)
3823 if channel_id:
3824 return channel_id
3825 channel_url = self._html_search_meta(
3826 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3827 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3828 'twitter:app:url:googleplay'), webpage, 'channel url')
3829 return self._search_regex(
3830 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3831 channel_url, 'channel id')
15f6397c 3832
8bdd16b4 3833 @staticmethod
cd7c66cf 3834 def _extract_basic_item_renderer(item):
3835 # Modified from _extract_grid_item_renderer
201c1459 3836 known_basic_renderers = (
a17526e4 3837 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 3838 )
3839 for key, renderer in item.items():
201c1459 3840 if not isinstance(renderer, dict):
cd7c66cf 3841 continue
201c1459 3842 elif key in known_basic_renderers:
3843 return renderer
3844 elif key.startswith('grid') and key.endswith('Renderer'):
3845 return renderer
8bdd16b4 3846
8bdd16b4 3847 def _grid_entries(self, grid_renderer):
3848 for item in grid_renderer['items']:
3849 if not isinstance(item, dict):
39b62db1 3850 continue
cd7c66cf 3851 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3852 if not isinstance(renderer, dict):
3853 continue
052e1350 3854 title = self._get_text(renderer, 'title')
fe93e2c4 3855
8bdd16b4 3856 # playlist
3857 playlist_id = renderer.get('playlistId')
3858 if playlist_id:
3859 yield self.url_result(
3860 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3861 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3862 video_title=title)
201c1459 3863 continue
8bdd16b4 3864 # video
3865 video_id = renderer.get('videoId')
3866 if video_id:
3867 yield self._extract_video(renderer)
201c1459 3868 continue
8bdd16b4 3869 # channel
3870 channel_id = renderer.get('channelId')
3871 if channel_id:
8bdd16b4 3872 yield self.url_result(
3873 'https://www.youtube.com/channel/%s' % channel_id,
3874 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3875 continue
3876 # generic endpoint URL support
3877 ep_url = urljoin('https://www.youtube.com/', try_get(
3878 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3879 compat_str))
3880 if ep_url:
3881 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3882 if ie.suitable(ep_url):
3883 yield self.url_result(
3884 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3885 break
8bdd16b4 3886
16aa9ea4 3887 def _music_reponsive_list_entry(self, renderer):
3888 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
3889 if video_id:
3890 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
3891 ie=YoutubeIE.ie_key(), video_id=video_id)
3892 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
3893 if playlist_id:
3894 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
3895 if video_id:
3896 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
3897 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3898 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
3899 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3900 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
3901 if browse_id:
3902 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
3903 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
3904
3d3dddc9 3905 def _shelf_entries_from_content(self, shelf_renderer):
3906 content = shelf_renderer.get('content')
3907 if not isinstance(content, dict):
8bdd16b4 3908 return
cd7c66cf 3909 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3910 if renderer:
3911 # TODO: add support for nested playlists so each shelf is processed
3912 # as separate playlist
3913 # TODO: this includes only first N items
86e5f3ed 3914 yield from self._grid_entries(renderer)
3d3dddc9 3915 renderer = content.get('horizontalListRenderer')
3916 if renderer:
3917 # TODO
3918 pass
8bdd16b4 3919
29f7c58a 3920 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3921 ep = try_get(
3922 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3923 compat_str)
3924 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3925 if shelf_url:
29f7c58a 3926 # Skipping links to another channels, note that checking for
3927 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3928 # will not work
3929 if skip_channels and '/channels?' in shelf_url:
3930 return
052e1350 3931 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3932 yield self.url_result(shelf_url, video_title=title)
3933 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 3934 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 3935
8bdd16b4 3936 def _playlist_entries(self, video_list_renderer):
3937 for content in video_list_renderer['contents']:
3938 if not isinstance(content, dict):
3939 continue
3940 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3941 if not isinstance(renderer, dict):
3942 continue
3943 video_id = renderer.get('videoId')
3944 if not video_id:
3945 continue
3946 yield self._extract_video(renderer)
07aeced6 3947
3462ffa8 3948 def _rich_entries(self, rich_grid_renderer):
3949 renderer = try_get(
70d5c17b 3950 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3951 video_id = renderer.get('videoId')
3952 if not video_id:
3953 return
3954 yield self._extract_video(renderer)
3955
8bdd16b4 3956 def _video_entry(self, video_renderer):
3957 video_id = video_renderer.get('videoId')
3958 if video_id:
3959 return self._extract_video(video_renderer)
dacb3a86 3960
ad210f4f 3961 def _hashtag_tile_entry(self, hashtag_tile_renderer):
3962 url = urljoin('https://youtube.com', traverse_obj(
3963 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
3964 if url:
3965 return self.url_result(
3966 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
3967
8bdd16b4 3968 def _post_thread_entries(self, post_thread_renderer):
3969 post_renderer = try_get(
3970 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3971 if not post_renderer:
3972 return
3973 # video attachment
3974 video_renderer = try_get(
895b0931 3975 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3976 video_id = video_renderer.get('videoId')
3977 if video_id:
3978 entry = self._extract_video(video_renderer)
8bdd16b4 3979 if entry:
3980 yield entry
895b0931 3981 # playlist attachment
3982 playlist_id = try_get(
3983 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3984 if playlist_id:
3985 yield self.url_result(
e28f1c0a 3986 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3987 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3988 # inline video links
3989 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3990 for run in runs:
3991 if not isinstance(run, dict):
3992 continue
3993 ep_url = try_get(
3994 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3995 if not ep_url:
3996 continue
3997 if not YoutubeIE.suitable(ep_url):
3998 continue
3999 ep_video_id = YoutubeIE._match_id(ep_url)
4000 if video_id == ep_video_id:
4001 continue
895b0931 4002 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4003
8bdd16b4 4004 def _post_thread_continuation_entries(self, post_thread_continuation):
4005 contents = post_thread_continuation.get('contents')
4006 if not isinstance(contents, list):
4007 return
4008 for content in contents:
4009 renderer = content.get('backstagePostThreadRenderer')
4010 if not isinstance(renderer, dict):
4011 continue
86e5f3ed 4012 yield from self._post_thread_entries(renderer)
07aeced6 4013
39ed931e 4014 r''' # unused
4015 def _rich_grid_entries(self, contents):
4016 for content in contents:
4017 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4018 if video_renderer:
4019 entry = self._video_entry(video_renderer)
4020 if entry:
4021 yield entry
4022 '''
52efa4b3 4023
a6213a49 4024 def _extract_entries(self, parent_renderer, continuation_list):
4025 # continuation_list is modified in-place with continuation_list = [continuation_token]
4026 continuation_list[:] = [None]
4027 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4028 for content in contents:
4029 if not isinstance(content, dict):
4030 continue
16aa9ea4 4031 is_renderer = traverse_obj(
4032 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4033 expected_type=dict)
a6213a49 4034 if not is_renderer:
4035 renderer = content.get('richItemRenderer')
4036 if renderer:
4037 for entry in self._rich_entries(renderer):
4038 yield entry
4039 continuation_list[0] = self._extract_continuation(parent_renderer)
4040 continue
4041 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4042 for isr_content in isr_contents:
4043 if not isinstance(isr_content, dict):
8bdd16b4 4044 continue
69184e41 4045
a6213a49 4046 known_renderers = {
4047 'playlistVideoListRenderer': self._playlist_entries,
4048 'gridRenderer': self._grid_entries,
a17526e4 4049 'reelShelfRenderer': self._grid_entries,
4050 'shelfRenderer': self._shelf_entries,
16aa9ea4 4051 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4052 'backstagePostThreadRenderer': self._post_thread_entries,
4053 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4054 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4055 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4056 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4057 }
4058 for key, renderer in isr_content.items():
4059 if key not in known_renderers:
4060 continue
4061 for entry in known_renderers[key](renderer):
4062 if entry:
4063 yield entry
4064 continuation_list[0] = self._extract_continuation(renderer)
4065 break
70d5c17b 4066
4067 if not continuation_list[0]:
a6213a49 4068 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4069
a6213a49 4070 if not continuation_list[0]:
4071 continuation_list[0] = self._extract_continuation(parent_renderer)
4072
4073 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4074 continuation_list = [None]
4075 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4076 tab_content = try_get(tab, lambda x: x['content'], dict)
4077 if not tab_content:
4078 return
3462ffa8 4079 parent_renderer = (
29f7c58a 4080 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4081 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4082 yield from extract_entries(parent_renderer)
3462ffa8 4083 continuation = continuation_list[0]
d069eca7 4084
8bdd16b4 4085 for page_num in itertools.count(1):
4086 if not continuation:
4087 break
99e9e001 4088 headers = self.generate_api_headers(
4089 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4090 response = self._extract_response(
86e5f3ed 4091 item_id=f'{item_id} page {page_num}',
fe93e2c4 4092 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4093 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4094
4095 if not response:
8bdd16b4 4096 break
ac56cf38 4097 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4098 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4099 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4100
69184e41 4101 known_continuation_renderers = {
4102 'playlistVideoListContinuation': self._playlist_entries,
4103 'gridContinuation': self._grid_entries,
4104 'itemSectionContinuation': self._post_thread_continuation_entries,
4105 'sectionListContinuation': extract_entries, # for feeds
4106 }
8bdd16b4 4107 continuation_contents = try_get(
69184e41 4108 response, lambda x: x['continuationContents'], dict) or {}
4109 continuation_renderer = None
4110 for key, value in continuation_contents.items():
4111 if key not in known_continuation_renderers:
3462ffa8 4112 continue
69184e41 4113 continuation_renderer = value
4114 continuation_list = [None]
86e5f3ed 4115 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4116 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4117 break
4118 if continuation_renderer:
4119 continue
c5e8d7af 4120
a1b535bd 4121 known_renderers = {
e4b98809 4122 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4123 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4124 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4125 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4126 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4127 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4128 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4129 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4130 }
cce889b9 4131 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4132 continuation_items = try_get(
cce889b9 4133 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4134 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4135 video_items_renderer = None
4136 for key, value in continuation_item.items():
4137 if key not in known_renderers:
8bdd16b4 4138 continue
a1b535bd 4139 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4140 continuation_list = [None]
86e5f3ed 4141 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4142 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4143 break
4144 if video_items_renderer:
4145 continue
8bdd16b4 4146 break
9558dcec 4147
8bdd16b4 4148 @staticmethod
7c219ea6 4149 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4150 for tab in tabs:
cd684175 4151 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4152 if renderer.get('selected') is True:
4153 return renderer
2b3c2546 4154 else:
7c219ea6 4155 if fatal:
4156 raise ExtractorError('Unable to find selected tab')
b82f815f 4157
61d3665d 4158 def _extract_uploader(self, data):
8bdd16b4 4159 uploader = {}
61d3665d 4160 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4161 owner = try_get(
4162 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4163 if owner:
61d3665d 4164 owner_text = owner.get('text')
4165 uploader['uploader'] = self._search_regex(
4166 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4167 uploader['uploader_id'] = try_get(
4168 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
4169 uploader['uploader_url'] = urljoin(
4170 'https://www.youtube.com/',
4171 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 4172 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4173
ac56cf38 4174 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4175 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4176 tags = []
b60419c5 4177
8bdd16b4 4178 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4179 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4180 renderer = try_get(
4181 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4182 if renderer:
b60419c5 4183 channel_name = renderer.get('title')
4184 channel_url = renderer.get('channelUrl')
4185 channel_id = renderer.get('externalId')
39ed931e 4186 else:
64c0d954 4187 renderer = try_get(
4188 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4189
8bdd16b4 4190 if renderer:
4191 title = renderer.get('title')
ecc97af3 4192 description = renderer.get('description', '')
b60419c5 4193 playlist_id = channel_id
4194 tags = renderer.get('keywords', '').split()
b60419c5 4195
301d07fc 4196 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4197 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4198 def _get_uncropped(url):
4199 return url_or_none((url or '').split('=')[0] + '=s0')
4200
4201 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4202 if avatar_thumbnails:
4203 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4204 if uncropped_avatar:
4205 avatar_thumbnails.append({
4206 'url': uncropped_avatar,
4207 'id': 'avatar_uncropped',
4208 'preference': 1
4209 })
4210
4211 channel_banners = self._extract_thumbnails(
4212 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4213 for banner in channel_banners:
4214 banner['preference'] = -10
4215
4216 if channel_banners:
4217 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4218 if uncropped_banner:
4219 channel_banners.append({
4220 'url': uncropped_banner,
4221 'id': 'banner_uncropped',
4222 'preference': -5
4223 })
4224
4225 primary_thumbnails = self._extract_thumbnails(
a17526e4 4226 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4227
3462ffa8 4228 if playlist_id is None:
70d5c17b 4229 playlist_id = item_id
f0d785d3 4230
4231 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4232 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4233 if title is None:
f0d785d3 4234 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4235 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4236 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4237
b60419c5 4238 metadata = {
4239 'playlist_id': playlist_id,
4240 'playlist_title': title,
4241 'playlist_description': description,
4242 'uploader': channel_name,
4243 'uploader_id': channel_id,
4244 'uploader_url': channel_url,
301d07fc 4245 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4246 'tags': tags,
f0d785d3 4247 'view_count': self._get_count(playlist_stats, 1),
4248 'availability': self._extract_availability(data),
4249 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4250 'playlist_count': self._get_count(playlist_stats, 0),
4251 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4252 }
4253 if not channel_id:
4254 metadata.update(self._extract_uploader(data))
4255 metadata.update({
4256 'channel': metadata['uploader'],
4257 'channel_id': metadata['uploader_id'],
4258 'channel_url': metadata['uploader_url']})
4259 return self.playlist_result(
d069eca7 4260 self._entries(
ac56cf38 4261 selected_tab, playlist_id, ytcfg,
4262 self._extract_account_syncid(ytcfg, data),
4263 self._extract_visitor_data(data, ytcfg)),
b60419c5 4264 **metadata)
73c4ac2c 4265
6e634cbe 4266 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4267 first_id = last_id = response = None
2be71994 4268 for page_num in itertools.count(1):
cd7c66cf 4269 videos = list(self._playlist_entries(playlist))
4270 if not videos:
4271 return
2be71994 4272 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4273 if start >= len(videos):
4274 return
4275 for video in videos[start:]:
2be71994 4276 yield video
4277 first_id = first_id or videos[0]['id']
4278 last_id = videos[-1]['id']
79360d99 4279 watch_endpoint = try_get(
4280 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4281 headers = self.generate_api_headers(
4282 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4283 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4284 query = {
4285 'playlistId': playlist_id,
4286 'videoId': watch_endpoint.get('videoId') or last_id,
4287 'index': watch_endpoint.get('index') or len(videos),
4288 'params': watch_endpoint.get('params') or 'OAE%3D'
4289 }
4290 response = self._extract_response(
4291 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4292 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4293 check_get_keys='contents'
4294 )
cd7c66cf 4295 playlist = try_get(
79360d99 4296 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4297
ac56cf38 4298 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4299 title = playlist.get('title') or try_get(
4300 data, lambda x: x['titleText']['simpleText'], compat_str)
4301 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4302
4303 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4304 playlist_url = urljoin(url, try_get(
4305 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4306 compat_str))
6e634cbe 4307
4308 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4309 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4310 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4311
4312 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4313 return self.url_result(
4314 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4315 video_title=title)
cd7c66cf 4316
8bdd16b4 4317 return self.playlist_result(
6e634cbe 4318 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4319 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4320
47193e02 4321 def _extract_availability(self, data):
4322 """
4323 Gets the availability of a given playlist/tab.
4324 Note: Unless YouTube tells us explicitly, we do not assume it is public
4325 @param data: response
4326 """
4327 is_private = is_unlisted = None
4328 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4329 badge_labels = self._extract_badges(renderer)
4330
4331 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4332 privacy_dropdown_entries = try_get(
4333 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4334 for renderer_dict in privacy_dropdown_entries:
4335 is_selected = try_get(
4336 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4337 if not is_selected:
4338 continue
052e1350 4339 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4340 if label:
4341 badge_labels.add(label.lower())
4342 break
4343
4344 for badge_label in badge_labels:
4345 if badge_label == 'unlisted':
4346 is_unlisted = True
4347 elif badge_label == 'private':
4348 is_private = True
4349 elif badge_label == 'public':
4350 is_unlisted = is_private = False
4351 return self._availability(is_private, False, False, False, is_unlisted)
4352
4353 @staticmethod
4354 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4355 sidebar_renderer = try_get(
4356 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4357 for item in sidebar_renderer:
4358 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4359 if renderer:
4360 return renderer
4361
ac56cf38 4362 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4363 """
4364 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4365 """
5d342002 4366 browse_id = params = None
47193e02 4367 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4368 if not renderer:
4369 return
4370 menu_renderer = try_get(
4371 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4372 for menu_item in menu_renderer:
4373 if not isinstance(menu_item, dict):
358de58c 4374 continue
47193e02 4375 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4376 text = try_get(
4377 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4378 if not text or text.lower() != 'show unavailable videos':
4379 continue
4380 browse_endpoint = try_get(
4381 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4382 browse_id = browse_endpoint.get('browseId')
4383 params = browse_endpoint.get('params')
4384 break
5d342002 4385
11f9be09 4386 headers = self.generate_api_headers(
99e9e001 4387 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4388 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4389 query = {
4390 'params': params or 'wgYCCAA=',
4391 'browseId': browse_id or 'VL%s' % item_id
4392 }
4393 return self._extract_response(
4394 item_id=item_id, headers=headers, query=query,
fe93e2c4 4395 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4396 note='Downloading API JSON with unavailable videos')
358de58c 4397
a25bca9f 4398 @property
4399 def skip_webpage(self):
4400 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4401
ac56cf38 4402 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4403 retries = self.get_param('extractor_retries', 3)
62bff2c1 4404 count = -1
ac56cf38 4405 webpage = data = last_error = None
14fdfea9 4406 while count < retries:
62bff2c1 4407 count += 1
14fdfea9 4408 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4409 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4410 if last_error:
c705177d 4411 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4412 try:
4413 webpage = self._download_webpage(
4414 url, item_id,
4415 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4416 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4417 except ExtractorError as e:
4418 if isinstance(e.cause, network_exceptions):
4419 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4420 last_error = error_to_compat_str(e.cause or e.msg)
4421 if count < retries:
4422 continue
4423 if fatal:
4424 raise
4425 self.report_warning(error_to_compat_str(e))
14fdfea9 4426 break
ac56cf38 4427 else:
4428 try:
4429 self._extract_and_report_alerts(data)
4430 except ExtractorError as e:
4431 if fatal:
4432 raise
4433 self.report_warning(error_to_compat_str(e))
4434 break
4435
7c219ea6 4436 if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
ac56cf38 4437 break
4438
4439 last_error = 'Incomplete yt initial data received'
4440 if count >= retries:
4441 if fatal:
4442 raise ExtractorError(last_error)
4443 self.report_warning(last_error)
4444 break
4445
cd7c66cf 4446 return webpage, data
4447
a25bca9f 4448 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4449 """Use if failed to extract ytcfg (and data) from initial webpage"""
4450 if not ytcfg and self.is_authenticated:
4451 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4452 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4453 raise ExtractorError(
4454 f'{msg}. If you are not downloading private content, or '
4455 'your cookies are only for the first account and channel,'
4456 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4457 expected=True)
4458 self.report_warning(msg, only_once=True)
4459
ac56cf38 4460 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4461 data = None
a25bca9f 4462 if not self.skip_webpage:
ac56cf38 4463 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4464 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4465 # Reject webpage data if redirected to home page without explicitly requesting
4466 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4467 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4468 if (url != 'https://www.youtube.com/feed/recommended'
4469 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4470 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4471 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4472 if fatal:
4473 raise ExtractorError(msg, expected=True)
4474 self.report_warning(msg, only_once=True)
ac56cf38 4475 if not data:
a25bca9f 4476 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4477 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4478 return data, ytcfg
4479
4480 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4481 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4482 resolve_response = self._extract_response(
4483 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4484 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4485 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4486 for ep_key, ep in endpoints.items():
4487 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4488 if params:
4489 return self._extract_response(
4490 item_id=item_id, query=params, ep=ep, headers=headers,
4491 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4492 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4493 err_note = 'Failed to resolve url (does the playlist exist?)'
4494 if fatal:
4495 raise ExtractorError(err_note, expected=True)
4496 self.report_warning(err_note, item_id)
4497
a6213a49 4498 _SEARCH_PARAMS = None
4499
af5c1c55 4500 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4501 data = {'query': query}
4502 if params is NO_DEFAULT:
4503 params = self._SEARCH_PARAMS
4504 if params:
4505 data['params'] = params
16aa9ea4 4506
4507 content_keys = (
4508 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4509 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4510 # ytmusic search
4511 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4512 ('continuationContents', ),
4513 )
a25bca9f 4514 display_id = f'query "{query}"'
86e5f3ed 4515 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4516 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4517 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4518
a61fd4cf 4519 continuation_list = [None]
a25bca9f 4520 search = None
a6213a49 4521 for page_num in itertools.count(1):
a61fd4cf 4522 data.update(continuation_list[0] or {})
a25bca9f 4523 headers = self.generate_api_headers(
4524 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4525 search = self._extract_response(
a25bca9f 4526 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4527 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4528 slr_contents = traverse_obj(search, *content_keys)
4529 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4530 if not continuation_list[0]:
a6213a49 4531 break
4532
4533
4534class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4535 IE_DESC = 'YouTube Tabs'
4536 _VALID_URL = r'''(?x:
4537 https?://
4538 (?:\w+\.)?
4539 (?:
4540 youtube(?:kids)?\.com|
4541 %(invidious)s
4542 )/
4543 (?:
4544 (?P<channel_type>channel|c|user|browse)/|
4545 (?P<not_channel>
4546 feed/|hashtag/|
4547 (?:playlist|watch)\?.*?\blist=
4548 )|
4549 (?!(?:%(reserved_names)s)\b) # Direct URLs
4550 )
4551 (?P<id>[^/?\#&]+)
4552 )''' % {
4553 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4554 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4555 }
4556 IE_NAME = 'youtube:tab'
4557
4558 _TESTS = [{
4559 'note': 'playlists, multipage',
4560 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4561 'playlist_mincount': 94,
4562 'info_dict': {
4563 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4564 'title': 'Igor Kleiner - Playlists',
a6213a49 4565 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4566 'uploader': 'Igor Kleiner',
a6213a49 4567 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4568 'channel': 'Igor Kleiner',
4569 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4570 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4571 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4572 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4573 'channel_follower_count': int
a6213a49 4574 },
4575 }, {
4576 'note': 'playlists, multipage, different order',
4577 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4578 'playlist_mincount': 94,
4579 'info_dict': {
4580 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4581 'title': 'Igor Kleiner - Playlists',
a6213a49 4582 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4583 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4584 'uploader': 'Igor Kleiner',
4585 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4586 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4587 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4588 'channel': 'Igor Kleiner',
4589 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4590 'channel_follower_count': int
a6213a49 4591 },
4592 }, {
4593 'note': 'playlists, series',
4594 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4595 'playlist_mincount': 5,
4596 'info_dict': {
4597 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4598 'title': '3Blue1Brown - Playlists',
4599 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4600 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4601 'uploader': '3Blue1Brown',
976ae3ea 4602 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4603 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4604 'channel': '3Blue1Brown',
4605 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4606 'tags': ['Mathematics'],
6c73052c 4607 'channel_follower_count': int
a6213a49 4608 },
4609 }, {
4610 'note': 'playlists, singlepage',
4611 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4612 'playlist_mincount': 4,
4613 'info_dict': {
4614 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4615 'title': 'ThirstForScience - Playlists',
4616 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4617 'uploader': 'ThirstForScience',
4618 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4619 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4620 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4621 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4622 'tags': 'count:13',
4623 'channel': 'ThirstForScience',
6c73052c 4624 'channel_follower_count': int
a6213a49 4625 }
4626 }, {
4627 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4628 'only_matching': True,
4629 }, {
4630 'note': 'basic, single video playlist',
4631 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4632 'info_dict': {
4633 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4634 'uploader': 'Sergey M.',
4635 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4636 'title': 'youtube-dl public playlist',
976ae3ea 4637 'description': '',
4638 'tags': [],
4639 'view_count': int,
4640 'modified_date': '20201130',
4641 'channel': 'Sergey M.',
4642 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4643 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4644 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4645 },
4646 'playlist_count': 1,
4647 }, {
4648 'note': 'empty playlist',
4649 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4650 'info_dict': {
4651 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4652 'uploader': 'Sergey M.',
4653 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4654 'title': 'youtube-dl empty playlist',
976ae3ea 4655 'tags': [],
4656 'channel': 'Sergey M.',
4657 'description': '',
4658 'modified_date': '20160902',
4659 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4660 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4661 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4662 },
4663 'playlist_count': 0,
4664 }, {
4665 'note': 'Home tab',
4666 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4667 'info_dict': {
4668 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4669 'title': 'lex will - Home',
4670 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4671 'uploader': 'lex will',
4672 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4673 'channel': 'lex will',
4674 'tags': ['bible', 'history', 'prophesy'],
4675 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4676 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4677 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4678 'channel_follower_count': int
a6213a49 4679 },
4680 'playlist_mincount': 2,
4681 }, {
4682 'note': 'Videos tab',
4683 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4684 'info_dict': {
4685 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4686 'title': 'lex will - Videos',
4687 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4688 'uploader': 'lex will',
4689 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4690 'tags': ['bible', 'history', 'prophesy'],
4691 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4692 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4693 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4694 'channel': 'lex will',
6c73052c 4695 'channel_follower_count': int
a6213a49 4696 },
4697 'playlist_mincount': 975,
4698 }, {
4699 'note': 'Videos tab, sorted by popular',
4700 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4701 'info_dict': {
4702 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4703 'title': 'lex will - Videos',
4704 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4705 'uploader': 'lex will',
4706 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4707 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4708 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4709 'channel': 'lex will',
4710 'tags': ['bible', 'history', 'prophesy'],
4711 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4712 'channel_follower_count': int
a6213a49 4713 },
4714 'playlist_mincount': 199,
4715 }, {
4716 'note': 'Playlists tab',
4717 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4718 'info_dict': {
4719 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4720 'title': 'lex will - Playlists',
4721 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4722 'uploader': 'lex will',
4723 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4724 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4725 'channel': 'lex will',
4726 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4727 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4728 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4729 'channel_follower_count': int
a6213a49 4730 },
4731 'playlist_mincount': 17,
4732 }, {
4733 'note': 'Community tab',
4734 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4735 'info_dict': {
4736 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4737 'title': 'lex will - Community',
4738 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4739 'uploader': 'lex will',
4740 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4741 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4742 'channel': 'lex will',
4743 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4744 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4745 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4746 'channel_follower_count': int
a6213a49 4747 },
4748 'playlist_mincount': 18,
4749 }, {
4750 'note': 'Channels tab',
4751 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4752 'info_dict': {
4753 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4754 'title': 'lex will - Channels',
4755 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4756 'uploader': 'lex will',
4757 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4758 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4759 'channel': 'lex will',
4760 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4761 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4762 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4763 'channel_follower_count': int
a6213a49 4764 },
4765 'playlist_mincount': 12,
4766 }, {
4767 'note': 'Search tab',
4768 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4769 'playlist_mincount': 40,
4770 'info_dict': {
4771 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4772 'title': '3Blue1Brown - Search - linear algebra',
4773 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4774 'uploader': '3Blue1Brown',
4775 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4776 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4777 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4778 'tags': ['Mathematics'],
4779 'channel': '3Blue1Brown',
4780 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4781 'channel_follower_count': int
a6213a49 4782 },
4783 }, {
4784 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4785 'only_matching': True,
4786 }, {
4787 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4788 'only_matching': True,
4789 }, {
4790 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4791 'only_matching': True,
4792 }, {
4793 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4794 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4795 'info_dict': {
4796 'title': '29C3: Not my department',
4797 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4798 'uploader': 'Christiaan008',
4799 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4800 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4801 'tags': [],
4802 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4803 'view_count': int,
4804 'modified_date': '20150605',
4805 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4806 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4807 'channel': 'Christiaan008',
a6213a49 4808 },
4809 'playlist_count': 96,
4810 }, {
4811 'note': 'Large playlist',
4812 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4813 'info_dict': {
4814 'title': 'Uploads from Cauchemar',
4815 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4816 'uploader': 'Cauchemar',
4817 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4818 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4819 'tags': [],
4820 'modified_date': r're:\d{8}',
4821 'channel': 'Cauchemar',
4822 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4823 'view_count': int,
4824 'description': '',
4825 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4826 },
4827 'playlist_mincount': 1123,
976ae3ea 4828 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4829 }, {
4830 'note': 'even larger playlist, 8832 videos',
4831 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4832 'only_matching': True,
4833 }, {
4834 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4835 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4836 'info_dict': {
4837 'title': 'Uploads from Interstellar Movie',
4838 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4839 'uploader': 'Interstellar Movie',
4840 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4841 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4842 'tags': [],
4843 'view_count': int,
4844 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4845 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4846 'channel': 'Interstellar Movie',
4847 'description': '',
4848 'modified_date': r're:\d{8}',
a6213a49 4849 },
4850 'playlist_mincount': 21,
4851 }, {
4852 'note': 'Playlist with "show unavailable videos" button',
4853 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4854 'info_dict': {
4855 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4856 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4857 'uploader': 'Phim Siêu Nhân Nhật Bản',
4858 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4859 'view_count': int,
4860 'channel': 'Phim Siêu Nhân Nhật Bản',
4861 'tags': [],
4862 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4863 'description': '',
4864 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4865 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4866 'modified_date': r're:\d{8}',
a6213a49 4867 },
4868 'playlist_mincount': 200,
976ae3ea 4869 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4870 }, {
4871 'note': 'Playlist with unavailable videos in page 7',
4872 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4873 'info_dict': {
4874 'title': 'Uploads from BlankTV',
4875 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4876 'uploader': 'BlankTV',
4877 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 4878 'channel': 'BlankTV',
4879 'channel_url': 'https://www.youtube.com/c/blanktv',
4880 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4881 'view_count': int,
4882 'tags': [],
4883 'uploader_url': 'https://www.youtube.com/c/blanktv',
4884 'modified_date': r're:\d{8}',
4885 'description': '',
a6213a49 4886 },
4887 'playlist_mincount': 1000,
976ae3ea 4888 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4889 }, {
4890 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4891 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4892 'info_dict': {
4893 'title': 'Data Analysis with Dr Mike Pound',
4894 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4895 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4896 'uploader': 'Computerphile',
4897 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 4898 'uploader_url': 'https://www.youtube.com/user/Computerphile',
4899 'tags': [],
4900 'view_count': int,
4901 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4902 'channel_url': 'https://www.youtube.com/user/Computerphile',
4903 'channel': 'Computerphile',
a6213a49 4904 },
4905 'playlist_mincount': 11,
4906 }, {
4907 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4908 'only_matching': True,
4909 }, {
4910 'note': 'Playlist URL that does not actually serve a playlist',
4911 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4912 'info_dict': {
4913 'id': 'FqZTN594JQw',
4914 'ext': 'webm',
4915 'title': "Smiley's People 01 detective, Adventure Series, Action",
4916 'uploader': 'STREEM',
4917 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4918 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4919 'upload_date': '20150526',
4920 'license': 'Standard YouTube License',
4921 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4922 'categories': ['People & Blogs'],
4923 'tags': list,
4924 'view_count': int,
4925 'like_count': int,
a6213a49 4926 },
4927 'params': {
4928 'skip_download': True,
4929 },
4930 'skip': 'This video is not available.',
4931 'add_ie': [YoutubeIE.ie_key()],
4932 }, {
4933 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4934 'only_matching': True,
4935 }, {
4936 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4937 'only_matching': True,
4938 }, {
4939 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4940 'info_dict': {
6c73052c 4941 'id': 'GgL890LIznQ', # This will keep changing
a6213a49 4942 'ext': 'mp4',
976ae3ea 4943 'title': str,
a6213a49 4944 'uploader': 'Sky News',
4945 'uploader_id': 'skynews',
4946 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4947 'upload_date': r're:\d{8}',
976ae3ea 4948 'description': str,
a6213a49 4949 'categories': ['News & Politics'],
4950 'tags': list,
4951 'like_count': int,
6c73052c 4952 'release_timestamp': 1642502819,
976ae3ea 4953 'channel': 'Sky News',
4954 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4955 'age_limit': 0,
4956 'view_count': int,
6c73052c 4957 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 4958 'playable_in_embed': True,
6c73052c 4959 'release_date': '20220118',
976ae3ea 4960 'availability': 'public',
4961 'live_status': 'is_live',
4962 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 4963 'channel_follower_count': int
a6213a49 4964 },
4965 'params': {
4966 'skip_download': True,
4967 },
976ae3ea 4968 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 4969 }, {
4970 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4971 'info_dict': {
4972 'id': 'a48o2S1cPoo',
4973 'ext': 'mp4',
4974 'title': 'The Young Turks - Live Main Show',
4975 'uploader': 'The Young Turks',
4976 'uploader_id': 'TheYoungTurks',
4977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4978 'upload_date': '20150715',
4979 'license': 'Standard YouTube License',
4980 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4981 'categories': ['News & Politics'],
4982 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4983 'like_count': int,
a6213a49 4984 },
4985 'params': {
4986 'skip_download': True,
4987 },
4988 'only_matching': True,
4989 }, {
4990 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4991 'only_matching': True,
4992 }, {
4993 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4994 'only_matching': True,
4995 }, {
4996 'note': 'A channel that is not live. Should raise error',
4997 'url': 'https://www.youtube.com/user/numberphile/live',
4998 'only_matching': True,
4999 }, {
5000 'url': 'https://www.youtube.com/feed/trending',
5001 'only_matching': True,
5002 }, {
5003 'url': 'https://www.youtube.com/feed/library',
5004 'only_matching': True,
5005 }, {
5006 'url': 'https://www.youtube.com/feed/history',
5007 'only_matching': True,
5008 }, {
5009 'url': 'https://www.youtube.com/feed/subscriptions',
5010 'only_matching': True,
5011 }, {
5012 'url': 'https://www.youtube.com/feed/watch_later',
5013 'only_matching': True,
5014 }, {
5015 'note': 'Recommended - redirects to home page.',
5016 'url': 'https://www.youtube.com/feed/recommended',
5017 'only_matching': True,
5018 }, {
5019 'note': 'inline playlist with not always working continuations',
5020 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5021 'only_matching': True,
5022 }, {
5023 'url': 'https://www.youtube.com/course',
5024 'only_matching': True,
5025 }, {
5026 'url': 'https://www.youtube.com/zsecurity',
5027 'only_matching': True,
5028 }, {
5029 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5030 'only_matching': True,
5031 }, {
5032 'url': 'https://www.youtube.com/TheYoungTurks/live',
5033 'only_matching': True,
5034 }, {
5035 'url': 'https://www.youtube.com/hashtag/cctv9',
5036 'info_dict': {
5037 'id': 'cctv9',
5038 'title': '#cctv9',
976ae3ea 5039 'tags': [],
a6213a49 5040 },
5041 'playlist_mincount': 350,
5042 }, {
5043 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5044 'only_matching': True,
5045 }, {
5046 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5047 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5048 'only_matching': True
5049 }, {
5050 'note': '/browse/ should redirect to /channel/',
5051 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5052 'only_matching': True
5053 }, {
5054 'note': 'VLPL, should redirect to playlist?list=PL...',
5055 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5056 'info_dict': {
5057 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5058 'uploader': 'NoCopyrightSounds',
5059 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5060 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5061 'title': 'NCS Releases',
976ae3ea 5062 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5063 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5064 'modified_date': r're:\d{8}',
5065 'view_count': int,
5066 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5067 'tags': [],
5068 'channel': 'NoCopyrightSounds',
a6213a49 5069 },
5070 'playlist_mincount': 166,
976ae3ea 5071 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5072 }, {
5073 'note': 'Topic, should redirect to playlist?list=UU...',
5074 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5075 'info_dict': {
5076 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5077 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5078 'title': 'Uploads from Royalty Free Music - Topic',
5079 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5080 'tags': [],
5081 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5082 'channel': 'Royalty Free Music - Topic',
5083 'view_count': int,
5084 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5085 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5086 'modified_date': r're:\d{8}',
5087 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5088 'description': '',
a6213a49 5089 },
5090 'expected_warnings': [
a6213a49 5091 'The URL does not have a videos tab',
976ae3ea 5092 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5093 ],
5094 'playlist_mincount': 101,
5095 }, {
5096 'note': 'Topic without a UU playlist',
5097 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5098 'info_dict': {
5099 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5100 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5101 'tags': [],
a6213a49 5102 },
5103 'expected_warnings': [
976ae3ea 5104 'the playlist redirect gave error',
a6213a49 5105 ],
5106 'playlist_mincount': 9,
5107 }, {
5108 'note': 'Youtube music Album',
5109 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5110 'info_dict': {
5111 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5112 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5113 'tags': [],
5114 'view_count': int,
5115 'description': '',
5116 'availability': 'unlisted',
5117 'modified_date': r're:\d{8}',
a6213a49 5118 },
5119 'playlist_count': 50,
5120 }, {
5121 'note': 'unlisted single video playlist',
5122 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5123 'info_dict': {
5124 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5125 'uploader': 'colethedj',
5126 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5127 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5128 'availability': 'unlisted',
5129 'tags': [],
5130 'modified_date': '20211208',
5131 'channel': 'colethedj',
5132 'view_count': int,
5133 'description': '',
5134 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5135 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5136 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5137 },
5138 'playlist_count': 1,
5139 }, {
5140 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5141 'url': 'https://www.youtube.com/feed/recommended',
5142 'info_dict': {
5143 'id': 'recommended',
5144 'title': 'recommended',
6c73052c 5145 'tags': [],
a6213a49 5146 },
5147 'playlist_mincount': 50,
5148 'params': {
5149 'skip_download': True,
5150 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5151 },
5152 }, {
5153 'note': 'API Fallback: /videos tab, sorted by oldest first',
5154 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5155 'info_dict': {
5156 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5157 'title': 'Cody\'sLab - Videos',
5158 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5159 'uploader': 'Cody\'sLab',
5160 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5161 'channel': 'Cody\'sLab',
5162 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5163 'tags': [],
5164 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5165 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5166 'channel_follower_count': int
a6213a49 5167 },
5168 'playlist_mincount': 650,
5169 'params': {
5170 'skip_download': True,
5171 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5172 },
5173 }, {
5174 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5175 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5176 'info_dict': {
5177 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5178 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5179 'title': 'Uploads from Royalty Free Music - Topic',
5180 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5181 'modified_date': r're:\d{8}',
5182 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5183 'description': '',
5184 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5185 'tags': [],
5186 'channel': 'Royalty Free Music - Topic',
5187 'view_count': int,
5188 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5189 },
5190 'expected_warnings': [
976ae3ea 5191 'does not have a videos tab',
5192 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5193 ],
5194 'playlist_mincount': 101,
5195 'params': {
5196 'skip_download': True,
5197 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5198 },
7c219ea6 5199 }, {
5200 'note': 'non-standard redirect to regional channel',
5201 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5202 'only_matching': True
61d3665d 5203 }, {
5204 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5205 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5206 'info_dict': {
5207 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5208 'modified_date': '20220407',
5209 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5210 'tags': [],
5211 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5212 'uploader': 'pukkandan',
5213 'availability': 'unlisted',
5214 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5215 'channel': 'pukkandan',
5216 'description': 'Test for collaborative playlist',
5217 'title': 'yt-dlp test - collaborative playlist',
5218 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5219 },
5220 'playlist_mincount': 2
a6213a49 5221 }]
5222
5223 @classmethod
5224 def suitable(cls, url):
86e5f3ed 5225 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5226
64f36541 5227 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5228
182bda88 5229 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5230 def _real_extract(self, url, smuggled_data):
cd7c66cf 5231 item_id = self._match_id(url)
5232 url = compat_urlparse.urlunparse(
5233 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5234 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5235
fe03a6cd 5236 def get_mobj(url):
37e57a9f 5237 mobj = self._URL_RE.match(url).groupdict()
07cce701 5238 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5239 return mobj
5240
37e57a9f 5241 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5242 # Youtube returns incomplete data if tabname is not lower case
5243 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5244 if is_channel:
5245 if smuggled_data.get('is_music_url'):
37e57a9f 5246 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5247 item_id = item_id[2:]
37e57a9f 5248 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5249 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5250 mdata = self._extract_tab_endpoint(
37e57a9f 5251 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5252 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5253 get_all=False, expected_type=compat_str)
ac56cf38 5254 if not murl:
37e57a9f 5255 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5256 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5257 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5258 pre = f'https://www.youtube.com/channel/{item_id}'
5259
64f36541 5260 original_tab_name = tab
fe03a6cd 5261 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5262 # Home URLs should redirect to /videos/
37e57a9f 5263 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5264 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5265 tab = '/videos'
5266
5267 url = ''.join((pre, tab, post))
5268 mobj = get_mobj(url)
cd7c66cf 5269
5270 # Handle both video/playlist URLs
201c1459 5271 qs = parse_qs(url)
86e5f3ed 5272 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5273
fe03a6cd 5274 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5275 if not playlist_id:
fe03a6cd 5276 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5277 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5278 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5279 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5280 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5281 mobj = get_mobj(url)
cd7c66cf 5282
5283 if video_id and playlist_id:
a06916d9 5284 if self.get_param('noplaylist'):
37e57a9f 5285 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5286 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5287 ie=YoutubeIE.ie_key(), video_id=video_id)
5288 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5289
ac56cf38 5290 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5291
7c219ea6 5292 # YouTube may provide a non-standard redirect to the regional channel
5293 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5294 redirect_url = traverse_obj(
5295 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5296 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5297 redirect_url = ''.join((
5298 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5299 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5300 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5301
37e57a9f 5302 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5303 if tabs:
5304 selected_tab = self._extract_selected_tab(tabs)
64f36541 5305 selected_tab_name = selected_tab.get('title', '').lower()
5306 if selected_tab_name == 'home':
5307 selected_tab_name = 'featured'
5308 requested_tab_name = mobj['tab'][1:]
09f1580e 5309 if 'no-youtube-channel-redirect' not in compat_opts:
64f36541 5310 if requested_tab_name == 'live':
09f1580e 5311 # Live tab should have redirected to the video
5312 raise ExtractorError('The channel is not currently live', expected=True)
64f36541 5313 if requested_tab_name not in ('', selected_tab_name):
5314 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5315 if not original_tab_name:
5316 if item_id[:2] == 'UC':
5317 # Topic channels don't have /videos. Use the equivalent playlist instead
5318 pl_id = f'UU{item_id[2:]}'
5319 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5320 try:
5321 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5322 except ExtractorError:
5323 redirect_warning += ' and the playlist redirect gave error'
5324 else:
5325 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5326 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5327 if selected_tab_name and selected_tab_name != requested_tab_name:
5328 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5329 else:
5330 raise ExtractorError(redirect_warning, expected=True)
18db7548 5331
37e57a9f 5332 if redirect_warning:
64f36541 5333 self.to_screen(redirect_warning)
37e57a9f 5334 self.write_debug(f'Final URL: {url}')
18db7548 5335
358de58c 5336 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5337 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5338 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5339 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5340 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5341 if tabs:
ac56cf38 5342 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5343
37e57a9f 5344 playlist = traverse_obj(
5345 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5346 if playlist:
ac56cf38 5347 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5348
37e57a9f 5349 video_id = traverse_obj(
5350 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5351 if video_id:
09f1580e 5352 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5353 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5354 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5355 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5356
8bdd16b4 5357 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5358
c5e8d7af 5359
8bdd16b4 5360class YoutubePlaylistIE(InfoExtractor):
96565c7e 5361 IE_DESC = 'YouTube playlists'
8bdd16b4 5362 _VALID_URL = r'''(?x)(?:
5363 (?:https?://)?
5364 (?:\w+\.)?
5365 (?:
5366 (?:
5367 youtube(?:kids)?\.com|
d9190e44 5368 %(invidious)s
8bdd16b4 5369 )
5370 /.*?\?.*?\blist=
5371 )?
5372 (?P<id>%(playlist_id)s)
d9190e44
RH
5373 )''' % {
5374 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5375 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5376 }
8bdd16b4 5377 IE_NAME = 'youtube:playlist'
cdc628a4 5378 _TESTS = [{
8bdd16b4 5379 'note': 'issue #673',
5380 'url': 'PLBB231211A4F62143',
cdc628a4 5381 'info_dict': {
8bdd16b4 5382 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5383 'id': 'PLBB231211A4F62143',
976ae3ea 5384 'uploader': 'Wickman',
8bdd16b4 5385 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5386 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5387 'view_count': int,
5388 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5389 'modified_date': r're:\d{8}',
5390 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5391 'channel': 'Wickman',
5392 'tags': [],
5393 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5394 },
5395 'playlist_mincount': 29,
5396 }, {
5397 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5398 'info_dict': {
5399 'title': 'YDL_safe_search',
5400 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5401 },
5402 'playlist_count': 2,
5403 'skip': 'This playlist is private',
9558dcec 5404 }, {
8bdd16b4 5405 'note': 'embedded',
5406 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5407 'playlist_count': 4,
9558dcec 5408 'info_dict': {
8bdd16b4 5409 'title': 'JODA15',
5410 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5411 'uploader': 'milan',
5412 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5413 'description': '',
5414 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5415 'tags': [],
5416 'modified_date': '20140919',
5417 'view_count': int,
5418 'channel': 'milan',
5419 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5420 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5421 },
5422 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5423 }, {
8bdd16b4 5424 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 5425 'playlist_mincount': 654,
8bdd16b4 5426 'info_dict': {
5427 'title': '2018 Chinese New Singles (11/6 updated)',
5428 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5429 'uploader': 'LBK',
5430 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5431 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5432 'channel': 'LBK',
5433 'view_count': int,
5434 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5435 'tags': [],
5436 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5437 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5438 'modified_date': r're:\d{8}',
5439 },
5440 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5441 }, {
29f7c58a 5442 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5443 'only_matching': True,
5444 }, {
5445 # music album playlist
5446 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5447 'only_matching': True,
5448 }]
5449
5450 @classmethod
5451 def suitable(cls, url):
201c1459 5452 if YoutubeTabIE.suitable(url):
5453 return False
49a57e70 5454 from ..utils import parse_qs
201c1459 5455 qs = parse_qs(url)
5456 if qs.get('v', [None])[0]:
5457 return False
86e5f3ed 5458 return super().suitable(url)
29f7c58a 5459
5460 def _real_extract(self, url):
5461 playlist_id = self._match_id(url)
46953e7e 5462 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5463 url = update_url_query(
5464 'https://www.youtube.com/playlist',
5465 parse_qs(url) or {'list': playlist_id})
5466 if is_music_url:
5467 url = smuggle_url(url, {'is_music_url': True})
5468 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5469
5470
5471class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5472 IE_DESC = 'youtu.be'
29f7c58a 5473 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5474 _TESTS = [{
8bdd16b4 5475 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5476 'info_dict': {
5477 'id': 'yeWKywCrFtk',
5478 'ext': 'mp4',
5479 'title': 'Small Scale Baler and Braiding Rugs',
5480 'uploader': 'Backus-Page House Museum',
5481 'uploader_id': 'backuspagemuseum',
5482 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5483 'upload_date': '20161008',
5484 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5485 'categories': ['Nonprofits & Activism'],
5486 'tags': list,
5487 'like_count': int,
976ae3ea 5488 'age_limit': 0,
5489 'playable_in_embed': True,
5490 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5491 'channel': 'Backus-Page House Museum',
5492 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5493 'live_status': 'not_live',
5494 'view_count': int,
5495 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5496 'availability': 'public',
5497 'duration': 59,
8bdd16b4 5498 },
5499 'params': {
5500 'noplaylist': True,
5501 'skip_download': True,
5502 },
39e7107d 5503 }, {
8bdd16b4 5504 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5505 'only_matching': True,
cdc628a4
PH
5506 }]
5507
8bdd16b4 5508 def _real_extract(self, url):
5ad28e7f 5509 mobj = self._match_valid_url(url)
29f7c58a 5510 video_id = mobj.group('id')
5511 playlist_id = mobj.group('playlist_id')
8bdd16b4 5512 return self.url_result(
29f7c58a 5513 update_url_query('https://www.youtube.com/watch', {
5514 'v': video_id,
5515 'list': playlist_id,
5516 'feature': 'youtu.be',
5517 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5518
5519
b6ce9bb0 5520class YoutubeLivestreamEmbedIE(InfoExtractor):
5521 IE_DESC = 'YouTube livestream embeds'
5522 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5523 _TESTS = [{
5524 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5525 'only_matching': True,
5526 }]
5527
5528 def _real_extract(self, url):
5529 channel_id = self._match_id(url)
5530 return self.url_result(
5531 f'https://www.youtube.com/channel/{channel_id}/live',
5532 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5533
5534
8bdd16b4 5535class YoutubeYtUserIE(InfoExtractor):
96565c7e 5536 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5537 IE_NAME = 'youtube:user'
8bdd16b4 5538 _VALID_URL = r'ytuser:(?P<id>.+)'
5539 _TESTS = [{
5540 'url': 'ytuser:phihag',
5541 'only_matching': True,
5542 }]
5543
5544 def _real_extract(self, url):
5545 user_id = self._match_id(url)
5546 return self.url_result(
c586f9e8 5547 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5548 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5549
b05654f0 5550
3d3dddc9 5551class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5552 IE_NAME = 'youtube:favorites'
96565c7e 5553 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5554 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5555 _LOGIN_REQUIRED = True
5556 _TESTS = [{
5557 'url': ':ytfav',
5558 'only_matching': True,
5559 }, {
5560 'url': ':ytfavorites',
5561 'only_matching': True,
5562 }]
5563
5564 def _real_extract(self, url):
5565 return self.url_result(
5566 'https://www.youtube.com/playlist?list=LL',
5567 ie=YoutubeTabIE.ie_key())
5568
5569
ca5300c7 5570class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5571 IE_NAME = 'youtube:notif'
5572 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5573 _VALID_URL = r':ytnotif(?:ication)?s?'
5574 _LOGIN_REQUIRED = True
5575 _TESTS = [{
5576 'url': ':ytnotif',
5577 'only_matching': True,
5578 }, {
5579 'url': ':ytnotifications',
5580 'only_matching': True,
5581 }]
5582
5583 def _extract_notification_menu(self, response, continuation_list):
5584 notification_list = traverse_obj(
5585 response,
5586 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5587 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5588 expected_type=list) or []
5589 continuation_list[0] = None
5590 for item in notification_list:
5591 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5592 if entry:
5593 yield entry
5594 continuation = item.get('continuationItemRenderer')
5595 if continuation:
5596 continuation_list[0] = continuation
5597
5598 def _extract_notification_renderer(self, notification):
5599 video_id = traverse_obj(
5600 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5601 url = f'https://www.youtube.com/watch?v={video_id}'
5602 channel_id = None
5603 if not video_id:
5604 browse_ep = traverse_obj(
5605 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5606 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5607 post_id = self._search_regex(
5608 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5609 'post id', default=None)
5610 if not channel_id or not post_id:
5611 return
5612 # The direct /post url redirects to this in the browser
5613 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5614
5615 channel = traverse_obj(
5616 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5617 expected_type=str)
5618 title = self._search_regex(
5619 rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
5620 'video title', default=None)
5621 if title:
5622 title = title.replace('\xad', '') # remove soft hyphens
5623 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5624 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5625 else None)
5626 return {
5627 '_type': 'url',
5628 'url': url,
5629 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5630 'video_id': video_id,
5631 'title': title,
5632 'channel_id': channel_id,
5633 'channel': channel,
5634 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5635 'upload_date': upload_date,
5636 }
5637
5638 def _notification_menu_entries(self, ytcfg):
5639 continuation_list = [None]
5640 response = None
5641 for page in itertools.count(1):
5642 ctoken = traverse_obj(
5643 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5644 response = self._extract_response(
5645 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5646 ep='notification/get_notification_menu', check_get_keys='actions',
5647 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5648 yield from self._extract_notification_menu(response, continuation_list)
5649 if not continuation_list[0]:
5650 break
5651
5652 def _real_extract(self, url):
5653 display_id = 'notifications'
5654 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5655 self._report_playlist_authcheck(ytcfg)
5656 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5657
5658
a6213a49 5659class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5660 IE_DESC = 'YouTube search'
78caa52a 5661 IE_NAME = 'youtube:search'
b05654f0 5662 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5663 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5664 _TESTS = [{
5665 'url': 'ytsearch5:youtube-dl test video',
5666 'playlist_count': 5,
5667 'info_dict': {
5668 'id': 'youtube-dl test video',
5669 'title': 'youtube-dl test video',
5670 }
5671 }]
b05654f0 5672
a61fd4cf 5673
5f7cb91a 5674class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5675 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5676 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5677 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5678 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5679 _TESTS = [{
5680 'url': 'ytsearchdate5:youtube-dl test video',
5681 'playlist_count': 5,
5682 'info_dict': {
5683 'id': 'youtube-dl test video',
5684 'title': 'youtube-dl test video',
5685 }
5686 }]
75dff0ee 5687
c9ae7b95 5688
a6213a49 5689class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5690 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5691 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5692 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5693 _TESTS = [{
5694 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5695 'playlist_mincount': 5,
5696 'info_dict': {
11f9be09 5697 'id': 'youtube-dl test video',
3462ffa8 5698 'title': 'youtube-dl test video',
5699 }
a61fd4cf 5700 }, {
5701 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5702 'playlist_mincount': 5,
5703 'info_dict': {
5704 'id': 'python',
5705 'title': 'python',
5706 }
ad210f4f 5707 }, {
5708 'url': 'https://www.youtube.com/results?search_query=%23cats',
5709 'playlist_mincount': 1,
5710 'info_dict': {
5711 'id': '#cats',
5712 'title': '#cats',
5713 'entries': [{
5714 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5715 'title': '#cats',
5716 }],
5717 },
3462ffa8 5718 }, {
5719 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5720 'only_matching': True,
5721 }]
5722
5723 def _real_extract(self, url):
4dfbf869 5724 qs = parse_qs(url)
386e1dd9 5725 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5726 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5727
5728
16aa9ea4 5729class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
455a15e2 5730 IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
16aa9ea4 5731 IE_NAME = 'youtube:music:search_url'
5732 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5733 _TESTS = [{
5734 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5735 'playlist_count': 16,
5736 'info_dict': {
5737 'id': 'royalty free music',
5738 'title': 'royalty free music',
5739 }
5740 }, {
5741 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5742 'playlist_mincount': 30,
5743 'info_dict': {
5744 'id': 'royalty free music - songs',
5745 'title': 'royalty free music - songs',
5746 },
5747 'params': {'extract_flat': 'in_playlist'}
5748 }, {
5749 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5750 'playlist_mincount': 30,
5751 'info_dict': {
5752 'id': 'royalty free music - community playlists',
5753 'title': 'royalty free music - community playlists',
5754 },
5755 'params': {'extract_flat': 'in_playlist'}
5756 }]
5757
5758 _SECTIONS = {
5759 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5760 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5761 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5762 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5763 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5764 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5765 }
5766
5767 def _real_extract(self, url):
5768 qs = parse_qs(url)
5769 query = (qs.get('search_query') or qs.get('q'))[0]
5770 params = qs.get('sp', (None,))[0]
5771 if params:
5772 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5773 else:
5774 section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
5775 params = self._SECTIONS.get(section)
5776 if not params:
5777 section = None
5778 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5779 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5780
5781
182bda88 5782class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5783 """
25f14e9f 5784 Base class for feed extractors
82d02080 5785 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5786 """
b2e8bc1b 5787 _LOGIN_REQUIRED = True
82d02080 5788 _FEED_NAME = 'feeds'
a25bca9f 5789
5790 def _real_initialize(self):
5791 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5792
82d02080 5793 @classproperty
d7ae0639 5794 def IE_NAME(self):
82d02080 5795 return f'youtube:{self._FEED_NAME}'
04cc9617 5796
3853309f 5797 def _real_extract(self, url):
3d3dddc9 5798 return self.url_result(
182bda88 5799 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5800
5801
ef2f3c7f 5802class YoutubeWatchLaterIE(InfoExtractor):
5803 IE_NAME = 'youtube:watchlater'
96565c7e 5804 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5805 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5806 _TESTS = [{
8bdd16b4 5807 'url': ':ytwatchlater',
bc7a9cd8
S
5808 'only_matching': True,
5809 }]
25f14e9f
S
5810
5811 def _real_extract(self, url):
ef2f3c7f 5812 return self.url_result(
5813 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5814
5815
25f14e9f 5816class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5817 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5818 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5819 _FEED_NAME = 'recommended'
45db527f 5820 _LOGIN_REQUIRED = False
3d3dddc9 5821 _TESTS = [{
5822 'url': ':ytrec',
5823 'only_matching': True,
5824 }, {
5825 'url': ':ytrecommended',
5826 'only_matching': True,
5827 }, {
5828 'url': 'https://youtube.com',
5829 'only_matching': True,
5830 }]
1ed5b5c9 5831
1ed5b5c9 5832
25f14e9f 5833class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5834 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5835 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5836 _FEED_NAME = 'subscriptions'
3d3dddc9 5837 _TESTS = [{
5838 'url': ':ytsubs',
5839 'only_matching': True,
5840 }, {
5841 'url': ':ytsubscriptions',
5842 'only_matching': True,
5843 }]
1ed5b5c9 5844
1ed5b5c9 5845
25f14e9f 5846class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5847 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5848 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5849 _FEED_NAME = 'history'
3d3dddc9 5850 _TESTS = [{
5851 'url': ':ythistory',
5852 'only_matching': True,
5853 }]
1ed5b5c9
JMF
5854
5855
6e634cbe 5856class YoutubeStoriesIE(InfoExtractor):
5857 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
5858 IE_NAME = 'youtube:stories'
5859 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
5860 _TESTS = [{
5861 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
5862 'only_matching': True,
5863 }]
5864
5865 def _real_extract(self, url):
5866 playlist_id = f'RLTD{self._match_id(url)}'
5867 return self.url_result(
5868 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
5869 ie=YoutubeTabIE, video_id=playlist_id)
5870
5871
15870e90
PH
5872class YoutubeTruncatedURLIE(InfoExtractor):
5873 IE_NAME = 'youtube:truncated_url'
5874 IE_DESC = False # Do not list
975d35db 5875 _VALID_URL = r'''(?x)
b95aab84
PH
5876 (?:https?://)?
5877 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5878 (?:watch\?(?:
c4808c60 5879 feature=[a-z_]+|
b95aab84
PH
5880 annotation_id=annotation_[^&]+|
5881 x-yt-cl=[0-9]+|
c1708b89 5882 hl=[^&]*|
287be8c6 5883 t=[0-9]+
b95aab84
PH
5884 )?
5885 |
5886 attribution_link\?a=[^&]+
5887 )
5888 $
975d35db 5889 '''
15870e90 5890
c4808c60 5891 _TESTS = [{
2d3d2997 5892 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 5893 'only_matching': True,
dc2fc736 5894 }, {
2d3d2997 5895 'url': 'https://www.youtube.com/watch?',
dc2fc736 5896 'only_matching': True,
b95aab84
PH
5897 }, {
5898 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5899 'only_matching': True,
5900 }, {
5901 'url': 'https://www.youtube.com/watch?feature=foo',
5902 'only_matching': True,
c1708b89
PH
5903 }, {
5904 'url': 'https://www.youtube.com/watch?hl=en-GB',
5905 'only_matching': True,
287be8c6
PH
5906 }, {
5907 'url': 'https://www.youtube.com/watch?t=2372',
5908 'only_matching': True,
c4808c60
PH
5909 }]
5910
15870e90
PH
5911 def _real_extract(self, url):
5912 raise ExtractorError(
78caa52a
PH
5913 'Did you forget to quote the URL? Remember that & is a meta '
5914 'character in most shells, so you want to put the URL in quotes, '
3867038a 5915 'like youtube-dl '
2d3d2997 5916 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 5917 ' or simply youtube-dl BaW_jenozKc .',
15870e90 5918 expected=True)
772fd5cc
PH
5919
5920
3cd786db 5921class YoutubeClipIE(InfoExtractor):
5922 IE_NAME = 'youtube:clip'
5923 IE_DESC = False # Do not list
5924 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5925
5926 def _real_extract(self, url):
5927 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5928 return self.url_result(url, 'Generic')
5929
5930
772fd5cc
PH
5931class YoutubeTruncatedIDIE(InfoExtractor):
5932 IE_NAME = 'youtube:truncated_id'
5933 IE_DESC = False # Do not list
b95aab84 5934 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
5935
5936 _TESTS = [{
5937 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5938 'only_matching': True,
5939 }]
5940
5941 def _real_extract(self, url):
5942 video_id = self._match_id(url)
5943 raise ExtractorError(
86e5f3ed 5944 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 5945 expected=True)