]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[dash] Show fragment count with `--live-from-start` (#3493)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
1890fc63 18from ..compat import functools # isort: split
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
f8271158 31 NO_DEFAULT,
32 ExtractorError,
720c3099 33 bug_reports_message,
82d02080 34 classproperty,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
2d30521a 39 float_or_none,
11f9be09 40 format_field,
ff91cf74 41 get_first,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
48416bc4 45 js_to_json,
94278f72 46 mimetype2ext,
9c0d7f49 47 network_exceptions,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
f0d785d3 65 unified_timestamp,
cf7e015f 66 unsmuggle_url,
8bdd16b4 67 update_url_query,
21c340b8 68 url_or_none,
fe93e2c4 69 urljoin,
7c365c21 70 variadic,
c5e8d7af
PH
71)
72
000c15a4 73# any clients starting with _ cannot be explicity requested by the user
74INNERTUBE_CLIENTS = {
75 'web': {
76 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
77 'INNERTUBE_CONTEXT': {
78 'client': {
79 'clientName': 'WEB',
18c7683d 80 'clientVersion': '2.20211221.00.00',
000c15a4 81 }
82 },
83 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
84 },
85 'web_embedded': {
86 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
87 'INNERTUBE_CONTEXT': {
88 'client': {
89 'clientName': 'WEB_EMBEDDED_PLAYER',
18c7683d 90 'clientVersion': '1.20211215.00.01',
000c15a4 91 },
92 },
93 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
94 },
95 'web_music': {
96 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
97 'INNERTUBE_HOST': 'music.youtube.com',
98 'INNERTUBE_CONTEXT': {
99 'client': {
100 'clientName': 'WEB_REMIX',
18c7683d 101 'clientVersion': '1.20211213.00.00',
000c15a4 102 }
103 },
104 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
105 },
e7e94f2a 106 'web_creator': {
18c7683d 107 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
108 'INNERTUBE_CONTEXT': {
109 'client': {
110 'clientName': 'WEB_CREATOR',
18c7683d 111 'clientVersion': '1.20211220.02.00',
e7e94f2a
D
112 }
113 },
114 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
115 },
000c15a4 116 'android': {
18c7683d 117 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 118 'INNERTUBE_CONTEXT': {
119 'client': {
120 'clientName': 'ANDROID',
18c7683d 121 'clientVersion': '16.49',
000c15a4 122 }
123 },
124 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 125 'REQUIRE_JS_PLAYER': False
000c15a4 126 },
127 'android_embedded': {
18c7683d 128 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 129 'INNERTUBE_CONTEXT': {
130 'client': {
131 'clientName': 'ANDROID_EMBEDDED_PLAYER',
18c7683d 132 'clientVersion': '16.49',
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
18c7683d 143 'clientVersion': '4.57',
000c15a4 144 }
145 },
146 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 147 'REQUIRE_JS_PLAYER': False
000c15a4 148 },
e7e94f2a 149 'android_creator': {
18c7683d 150 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
18c7683d 154 'clientVersion': '21.47',
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
18c7683d 167 'clientVersion': '16.46',
168 'deviceModel': 'iPhone14,3',
000c15a4 169 }
170 },
b6de707d 171 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
172 'REQUIRE_JS_PLAYER': False
000c15a4 173 },
174 'ios_embedded': {
000c15a4 175 'INNERTUBE_CONTEXT': {
176 'client': {
177 'clientName': 'IOS_MESSAGES_EXTENSION',
18c7683d 178 'clientVersion': '16.46',
179 'deviceModel': 'iPhone14,3',
000c15a4 180 },
181 },
b6de707d 182 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
183 'REQUIRE_JS_PLAYER': False
000c15a4 184 },
185 'ios_music': {
18c7683d 186 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 187 'INNERTUBE_CONTEXT': {
188 'client': {
189 'clientName': 'IOS_MUSIC',
18c7683d 190 'clientVersion': '4.57',
000c15a4 191 },
192 },
b6de707d 193 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
194 'REQUIRE_JS_PLAYER': False
000c15a4 195 },
e7e94f2a
D
196 'ios_creator': {
197 'INNERTUBE_CONTEXT': {
198 'client': {
199 'clientName': 'IOS_CREATOR',
18c7683d 200 'clientVersion': '21.47',
e7e94f2a
D
201 },
202 },
b6de707d 203 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
204 'REQUIRE_JS_PLAYER': False
e7e94f2a 205 },
3619f78d 206 # mweb has 'ultralow' formats
207 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 208 'mweb': {
18c7683d 209 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 210 'INNERTUBE_CONTEXT': {
211 'client': {
212 'clientName': 'MWEB',
18c7683d 213 'clientVersion': '2.20211221.01.00',
000c15a4 214 }
215 },
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
217 },
218 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
219 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
220 'tv_embedded': {
221 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
222 'INNERTUBE_CONTEXT': {
223 'client': {
224 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
225 'clientVersion': '2.0',
226 },
227 },
228 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
229 },
000c15a4 230}
231
232
e7870111
D
233def _split_innertube_client(client_name):
234 variant, *base = client_name.rsplit('.', 1)
235 if base:
236 return variant, base[0], variant
237 base, *variant = client_name.split('_', 1)
238 return client_name, base, variant[0] if variant else None
239
240
000c15a4 241def build_innertube_clients():
2e4cacd0 242 THIRD_PARTY = {
e7870111 243 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 244 }
e7870111 245 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 246 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 247
248 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 249 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 250 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 251 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 252 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 253
e7870111 254 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 255 ytcfg['priority'] = 10 * priority(base_client)
256
e48b3875 257 if not variant:
e7870111
D
258 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
259 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
260 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
261 embedscreen['priority'] -= 3
262 elif variant == 'embedded':
e48b3875 263 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 264 ytcfg['priority'] -= 2
e48b3875 265 else:
000c15a4 266 ytcfg['priority'] -= 3
267
268
269build_innertube_clients()
270
271
de7f3446 272class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 273 """Provide base functions for Youtube extractors"""
e00eb564 274
3462ffa8 275 _RESERVED_NAMES = (
3cd786db 276 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 277 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 278 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 279 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 280
3619f78d 281 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
282
52efa4b3 283 # _NETRC_MACHINE = 'youtube'
3619f78d 284
b2e8bc1b
JMF
285 # If True it will raise an error if no login info is provided
286 _LOGIN_REQUIRED = False
287
d9190e44
RH
288 _INVIDIOUS_SITES = (
289 # invidious-redirect websites
290 r'(?:www\.)?redirect\.invidious\.io',
291 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 292 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
293 r'(?:www\.)?invidious\.pussthecat\.org',
294 r'(?:www\.)?invidious\.zee\.li',
295 r'(?:www\.)?invidious\.ethibox\.fr',
296 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
297 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
298 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
299 # youtube-dl invidious instances list
300 r'(?:(?:www|no)\.)?invidiou\.sh',
301 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
302 r'(?:www\.)?invidious\.kabi\.tk',
303 r'(?:www\.)?invidious\.mastodon\.host',
304 r'(?:www\.)?invidious\.zapashcanon\.fr',
305 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
306 r'(?:www\.)?invidious\.tinfoil-hat\.net',
307 r'(?:www\.)?invidious\.himiko\.cloud',
308 r'(?:www\.)?invidious\.reallyancient\.tech',
309 r'(?:www\.)?invidious\.tube',
310 r'(?:www\.)?invidiou\.site',
311 r'(?:www\.)?invidious\.site',
312 r'(?:www\.)?invidious\.xyz',
313 r'(?:www\.)?invidious\.nixnet\.xyz',
314 r'(?:www\.)?invidious\.048596\.xyz',
315 r'(?:www\.)?invidious\.drycat\.fr',
316 r'(?:www\.)?inv\.skyn3t\.in',
317 r'(?:www\.)?tube\.poal\.co',
318 r'(?:www\.)?tube\.connect\.cafe',
319 r'(?:www\.)?vid\.wxzm\.sx',
320 r'(?:www\.)?vid\.mint\.lgbt',
321 r'(?:www\.)?vid\.puffyan\.us',
322 r'(?:www\.)?yewtu\.be',
323 r'(?:www\.)?yt\.elukerio\.org',
324 r'(?:www\.)?yt\.lelux\.fi',
325 r'(?:www\.)?invidious\.ggc-project\.de',
326 r'(?:www\.)?yt\.maisputain\.ovh',
327 r'(?:www\.)?ytprivate\.com',
328 r'(?:www\.)?invidious\.13ad\.de',
329 r'(?:www\.)?invidious\.toot\.koeln',
330 r'(?:www\.)?invidious\.fdn\.fr',
331 r'(?:www\.)?watch\.nettohikari\.com',
332 r'(?:www\.)?invidious\.namazso\.eu',
333 r'(?:www\.)?invidious\.silkky\.cloud',
334 r'(?:www\.)?invidious\.exonip\.de',
335 r'(?:www\.)?invidious\.riverside\.rocks',
336 r'(?:www\.)?invidious\.blamefran\.net',
337 r'(?:www\.)?invidious\.moomoo\.de',
338 r'(?:www\.)?ytb\.trom\.tf',
339 r'(?:www\.)?yt\.cyberhost\.uk',
340 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
341 r'(?:www\.)?qklhadlycap4cnod\.onion',
342 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
343 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
344 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
345 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
346 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
347 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
348 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
349 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
350 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
351 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
352 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
353 r'(?:www\.)?piped\.kavin\.rocks',
354 r'(?:www\.)?piped\.silkky\.cloud',
355 r'(?:www\.)?piped\.tokhmi\.xyz',
356 r'(?:www\.)?piped\.moomoo\.me',
357 r'(?:www\.)?il\.ax',
358 r'(?:www\.)?piped\.syncpundit\.com',
359 r'(?:www\.)?piped\.mha\.fi',
360 r'(?:www\.)?piped\.mint\.lgbt',
361 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
362 )
363
cce889b9 364 def _initialize_consent(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 if cookies.get('__Secure-3PSID'):
367 return
368 consent_id = None
369 consent = cookies.get('CONSENT')
370 if consent:
371 if 'YES' in consent.value:
372 return
373 consent_id = self._search_regex(
374 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
375 if not consent_id:
376 consent_id = random.randint(100, 999)
377 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 378
f3aa3c3f 379 def _initialize_pref(self):
380 cookies = self._get_cookies('https://www.youtube.com/')
381 pref_cookie = cookies.get('PREF')
382 pref = {}
383 if pref_cookie:
384 try:
385 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
386 except ValueError:
387 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 388 pref.update({'hl': 'en', 'tz': 'UTC'})
f3aa3c3f 389 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
390
b2e8bc1b 391 def _real_initialize(self):
f3aa3c3f 392 self._initialize_pref()
cce889b9 393 self._initialize_consent()
a25bca9f 394 self._check_login_required()
395
396 def _check_login_required(self):
24146491 397 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 398 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 399
b7c47b74 400 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
401 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 402
000c15a4 403 def _get_default_ytcfg(self, client='web'):
404 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 405
000c15a4 406 def _get_innertube_host(self, client='web'):
407 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 408
000c15a4 409 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 410 # try_get but with fallback to default ytcfg client values when present
411 _func = lambda y: try_get(y, getter, expected_type)
412 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
413
000c15a4 414 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 415 return self._ytcfg_get_safe(
416 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
417 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 418
000c15a4 419 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 420 return self._ytcfg_get_safe(
421 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
422 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 423
000c15a4 424 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
426
000c15a4 427 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 433 return context
434
cf87314d 435 _SAPISID = None
436
109dd3b2 437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 438 time_now = round(time.time())
cf87314d 439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
1974e99f 457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
86e5f3ed 459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 464 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
11f9be09 468 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
545cc85d 472 return self._download_json(
86e5f3ed 473 f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
a5c56234 474 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 475 data=json.dumps(data).encode('utf8'), headers=real_headers,
5dbc77df 476 query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
f4f751af 477
65141660 478 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
479 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 480
99e9e001 481 @staticmethod
482 def _extract_session_index(*data):
483 """
484 Index of current account in account list.
485 See: https://github.com/yt-dlp/yt-dlp/pull/519
486 """
487 for ytcfg in data:
488 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
489 if session_index is not None:
490 return session_index
491
492 # Deprecated?
493 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
494 if ytcfg:
495 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
496 if token:
497 return token
99e9e001 498 if webpage:
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None, fatal=False)
a1c5d2ca
M
502
503 @staticmethod
fe93e2c4 504 def _extract_account_syncid(*args):
8ea3f7b9 505 """
506 Extract syncId required to download private playlists of secondary channels
fe93e2c4 507 @params response and/or ytcfg
8ea3f7b9 508 """
fe93e2c4 509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 516 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
a1c5d2ca 521
ac56cf38 522 @staticmethod
523 def _extract_visitor_data(*args):
524 """
525 Extracts visitorData from an API response or ytcfg
526 Appears to be used to track session state
527 """
9222c381 528 return get_first(
6c73052c 529 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 530 expected_type=str)
ac56cf38 531
2762dbb1 532 @functools.cached_property
99e9e001 533 def is_authenticated(self):
534 return bool(self._generate_sapisidhash_header())
535
11f9be09 536 def extract_ytcfg(self, video_id, webpage):
8c54a305 537 if not webpage:
538 return {}
29f7c58a 539 return self._parse_json(
540 self._search_regex(
541 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 542 default='{}'), video_id, fatal=False) or {}
543
11f9be09 544 def generate_api_headers(
99e9e001 545 self, *, ytcfg=None, account_syncid=None, session_index=None,
546 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
547
11f9be09 548 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 549 headers = {
109dd3b2 550 'X-YouTube-Client-Name': compat_str(
11f9be09 551 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
552 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 553 'Origin': origin,
554 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
555 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 556 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 557 }
558 if session_index is None:
314ee305 559 session_index = self._extract_session_index(ytcfg)
560 if account_syncid or session_index is not None:
561 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 562
109dd3b2 563 auth = self._generate_sapisidhash_header(origin)
f4f751af 564 if auth is not None:
565 headers['Authorization'] = auth
109dd3b2 566 headers['X-Origin'] = origin
99e9e001 567 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 568
a25bca9f 569 def _download_ytcfg(self, client, video_id):
570 url = {
571 'web': 'https://www.youtube.com',
572 'web_music': 'https://music.youtube.com',
573 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
574 }.get(client)
575 if not url:
576 return {}
577 webpage = self._download_webpage(
578 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
579 return self.extract_ytcfg(video_id, webpage) or {}
580
2d6659b9 581 @staticmethod
582 def _build_api_continuation_query(continuation, ctp=None):
583 query = {
584 'continuation': continuation
585 }
586 # TODO: Inconsistency with clickTrackingParams.
587 # Currently we have a fixed ctp contained within context (from ytcfg)
588 # and a ctp in root query for continuation.
589 if ctp:
590 query['clickTracking'] = {'clickTrackingParams': ctp}
591 return query
592
2d6659b9 593 @classmethod
594 def _extract_next_continuation_data(cls, renderer):
595 next_continuation = try_get(
596 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
597 lambda x: x['continuation']['reloadContinuationData']), dict)
598 if not next_continuation:
599 return
600 continuation = next_continuation.get('continuation')
601 if not continuation:
602 return
603 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 604 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 605
606 @classmethod
607 def _extract_continuation_ep_data(cls, continuation_ep: dict):
608 if isinstance(continuation_ep, dict):
609 continuation = try_get(
610 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
611 if not continuation:
612 return
613 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 614 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 615
616 @classmethod
617 def _extract_continuation(cls, renderer):
618 next_continuation = cls._extract_next_continuation_data(renderer)
619 if next_continuation:
620 return next_continuation
fe93e2c4 621
2d6659b9 622 contents = []
623 for key in ('contents', 'items'):
624 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 625
2d6659b9 626 for content in contents:
627 if not isinstance(content, dict):
628 continue
629 continuation_ep = try_get(
630 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
631 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
632 dict)
633 continuation = cls._extract_continuation_ep_data(continuation_ep)
634 if continuation:
635 return continuation
636
fe93e2c4 637 @classmethod
638 def _extract_alerts(cls, data):
109dd3b2 639 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
640 if not isinstance(alert_dict, dict):
641 continue
642 for alert in alert_dict.values():
643 alert_type = alert.get('type')
644 if not alert_type:
645 continue
052e1350 646 message = cls._get_text(alert, 'text')
109dd3b2 647 if message:
648 yield alert_type, message
649
c0ac49bc 650 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 651 errors = []
652 warnings = []
653 for alert_type, alert_message in alerts:
641ad5d8 654 if alert_type.lower() == 'error' and fatal:
109dd3b2 655 errors.append([alert_type, alert_message])
656 else:
657 warnings.append([alert_type, alert_message])
658
659 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 660 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 661 if errors:
662 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
663
664 def _extract_and_report_alerts(self, data, *args, **kwargs):
665 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
666
47193e02 667 def _extract_badges(self, renderer: dict):
668 badges = set()
669 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
670 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
671 if label:
672 badges.add(label.lower())
673 return badges
674
675 @staticmethod
052e1350 676 def _get_text(data, *path_list, max_runs=None):
677 for path in path_list or [None]:
678 if path is None:
679 obj = [data]
680 else:
681 obj = traverse_obj(data, path, default=[])
682 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
683 obj = [obj]
684 for item in obj:
685 text = try_get(item, lambda x: x['simpleText'], compat_str)
686 if text:
687 return text
688 runs = try_get(item, lambda x: x['runs'], list) or []
689 if not runs and isinstance(item, list):
690 runs = item
691
692 runs = runs[:min(len(runs), max_runs or len(runs))]
693 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
694 if text:
695 return text
47193e02 696
f0d785d3 697 def _get_count(self, data, *path_list):
698 count_text = self._get_text(data, *path_list) or ''
699 count = parse_count(count_text)
700 if count is None:
701 count = str_to_int(
702 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
703 return count
704
a709d873 705 @staticmethod
706 def _extract_thumbnails(data, *path_list):
707 """
708 Extract thumbnails from thumbnails dict
709 @param path_list: path list to level that contains 'thumbnails' key
710 """
711 thumbnails = []
712 for path in path_list or [()]:
713 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
714 thumbnail_url = url_or_none(thumbnail.get('url'))
715 if not thumbnail_url:
716 continue
717 # Sometimes youtube gives a wrong thumbnail URL. See:
718 # https://github.com/yt-dlp/yt-dlp/issues/233
719 # https://github.com/ytdl-org/youtube-dl/issues/28023
720 if 'maxresdefault' in thumbnail_url:
721 thumbnail_url = thumbnail_url.split('?')[0]
722 thumbnails.append({
723 'url': thumbnail_url,
724 'height': int_or_none(thumbnail.get('height')),
725 'width': int_or_none(thumbnail.get('width')),
726 })
727 return thumbnails
728
f3aa3c3f 729 @staticmethod
730 def extract_relative_time(relative_time_text):
731 """
732 Extracts a relative time from string and converts to dt object
f0d785d3 733 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 734 """
f0d785d3 735 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 736 if mobj:
f0d785d3 737 start = mobj.group('start')
738 if start:
739 return datetime_from_str(start)
f3aa3c3f 740 try:
f0d785d3 741 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 742 except ValueError:
743 return None
744
745 def _extract_time_text(self, renderer, *path_list):
a25bca9f 746 """@returns (timestamp, time_text)"""
f3aa3c3f 747 text = self._get_text(renderer, *path_list) or ''
748 dt = self.extract_relative_time(text)
749 timestamp = None
750 if isinstance(dt, datetime.datetime):
751 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 752
753 if timestamp is None:
754 timestamp = (
755 unified_timestamp(text) or unified_timestamp(
756 self._search_regex(
17322130 757 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 758 text.lower(), 'time text', default=None)))
f0d785d3 759
f3aa3c3f 760 if text and timestamp is None:
17322130 761 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 762 return timestamp, text
763
109dd3b2 764 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
765 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 766 default_client='web'):
109dd3b2 767 response = None
768 last_error = None
769 count = -1
770 retries = self.get_param('extractor_retries', 3)
771 if check_get_keys is None:
772 check_get_keys = []
773 while count < retries:
774 count += 1
775 if last_error:
c0ac49bc 776 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 777 try:
778 response = self._call_api(
779 ep=ep, fatal=True, headers=headers,
780 video_id=item_id, query=query,
781 context=self._extract_context(ytcfg, default_client),
782 api_key=self._extract_api_key(ytcfg, default_client),
783 api_hostname=api_hostname, default_client=default_client,
784 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
785 except ExtractorError as e:
9c0d7f49 786 if isinstance(e.cause, network_exceptions):
87e8e8a7 787 if isinstance(e.cause, compat_HTTPError):
788 first_bytes = e.cause.read(512)
789 if not is_html(first_bytes):
790 yt_error = try_get(
791 self._parse_json(
792 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
793 lambda x: x['error']['message'], compat_str)
794 if yt_error:
795 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 796 # Downloading page may result in intermittent 5xx HTTP error
797 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 798 # We also want to catch all other network exceptions since errors in later pages can be troublesome
799 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
800 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 801 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 802 if count < retries:
803 continue
109dd3b2 804 if fatal:
805 raise
806 else:
807 self.report_warning(error_to_compat_str(e))
808 return
809
810 else:
109dd3b2 811 try:
ac56cf38 812 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 813 except ExtractorError as e:
c0ac49bc 814 # YouTube servers may return errors we want to retry on in a 200 OK response
815 # See: https://github.com/yt-dlp/yt-dlp/issues/839
816 if 'unknown error' in e.msg.lower():
817 last_error = e.msg
818 continue
109dd3b2 819 if fatal:
820 raise
821 self.report_warning(error_to_compat_str(e))
822 return
823 if not check_get_keys or dict_get(response, check_get_keys):
824 break
825 # Youtube sometimes sends incomplete data
826 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
827 last_error = 'Incomplete data received'
828 if count >= retries:
829 if fatal:
830 raise ExtractorError(last_error)
831 else:
832 self.report_warning(last_error)
833 return
834 return response
835
9297939e 836 @staticmethod
837 def is_music_url(url):
838 return re.match(r'https?://music\.youtube\.com/', url) is not None
839
30a074c2 840 def _extract_video(self, renderer):
841 video_id = renderer.get('videoId')
052e1350 842 title = self._get_text(renderer, 'title')
843 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 844 duration = parse_duration(self._get_text(
845 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 846 if duration is None:
847 duration = parse_duration(self._search_regex(
848 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
849 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
850 video_id, default=None, group='duration'))
851
f0d785d3 852 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 853
052e1350 854 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 855 channel_id = traverse_obj(
a44ca5a4 856 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
857 expected_type=str, get_all=False)
f3aa3c3f 858 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
859 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
860 overlay_style = traverse_obj(
a44ca5a4 861 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
862 get_all=False, expected_type=str)
f3aa3c3f 863 badges = self._extract_badges(renderer)
a709d873 864 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 865 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 866 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
867 expected_type=str)) or ''
fd2ad7cb 868 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 869 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 870 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 871
30a074c2 872 return {
39ed931e 873 '_type': 'url',
30a074c2 874 'ie_key': YoutubeIE.ie_key(),
875 'id': video_id,
fd2ad7cb 876 'url': url,
30a074c2 877 'title': title,
878 'description': description,
879 'duration': duration,
880 'view_count': view_count,
881 'uploader': uploader,
f3aa3c3f 882 'channel_id': channel_id,
a709d873 883 'thumbnails': thumbnails,
a44ca5a4 884 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
885 if self._configuration_arg('approximate_date', ie_key='youtubetab')
886 else None),
f3aa3c3f 887 'live_status': ('is_upcoming' if scheduled_timestamp is not None
888 else 'was_live' if 'streamed' in time_text.lower()
889 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
890 else None),
891 'release_timestamp': scheduled_timestamp,
892 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 893 }
894
0c148415 895
360e1ca5 896class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 897 IE_DESC = 'YouTube'
cb7dfeea 898 _VALID_URL = r"""(?x)^
c5e8d7af 899 (
edb53e2d 900 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 901 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
902 (?:www\.)?deturl\.com/www\.youtube\.com|
903 (?:www\.)?pwnyoutube\.com|
904 (?:www\.)?hooktube\.com|
905 (?:www\.)?yourepeat\.com|
906 tube\.majestyc\.net|
907 %(invidious)s|
908 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
909 (?:.*?\#/)? # handle anchor (#/) redirect urls
910 (?: # the various things that can precede the ID:
b6ce9bb0 911 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 912 |(?: # or the v= param in all its forms
f7000f3a 913 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 914 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 915 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
916 v=
917 )
f4b05232 918 ))
cbaed4bb
S
919 |(?:
920 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
921 vid\.plus| # or vid.plus/xxxx
922 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 923 %(invidious)s
cbaed4bb 924 )/
edb53e2d 925 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 926 )
c5e8d7af 927 )? # all until now is optional -> you can pass the naked ID
201c1459 928 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 929 (?(1).+)? # if we found the ID, everything can follow
9297939e 930 (?:\#|$)""" % {
d9190e44 931 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 932 }
e40c758c 933 _PLAYER_INFO_RE = (
cc2db878 934 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
935 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 936 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 937 )
2c62dc26 938 _formats = {
c2d3cb4c 939 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
940 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
941 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
942 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
943 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
944 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
946 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 947 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 948 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
949 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
950 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
952 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
953 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 954 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 955 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
956 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 957
958
959 # 3D videos
c2d3cb4c 960 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
961 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
962 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
963 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 964 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
965 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
966 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 967
96fb5605 968 # Apple HTTP Live Streaming
11f12195 969 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 970 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
972 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
973 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
974 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 975 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
976 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
977
978 # DASH mp4 video
d23028a8
S
979 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 984 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
985 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
989 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
990 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 991
f6f1fc92 992 # Dash mp4 audio
d23028a8
S
993 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
994 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
995 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
996 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
997 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
998 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
999 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1000
1001 # Dash webm
d23028a8
S
1002 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1009 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1017 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1018 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1020 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1024
1025 # Dash webm audio
d23028a8
S
1026 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1027 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1028
0857baad 1029 # Dash webm audio with opus inside
d23028a8
S
1030 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1031 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1032 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1033
ce6b9a2d
PH
1034 # RTMP (unnamed)
1035 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1036
1037 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1038 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1039 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1040 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1041 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1042 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1043 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1044 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1045 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1046 }
29f7c58a 1047 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1048
fd5c4aab
S
1049 _GEO_BYPASS = False
1050
78caa52a 1051 IE_NAME = 'youtube'
2eb88d95
PH
1052 _TESTS = [
1053 {
2d3d2997 1054 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1055 'info_dict': {
1056 'id': 'BaW_jenozKc',
1057 'ext': 'mp4',
3867038a 1058 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1059 'uploader': 'Philipp Hagemeister',
1060 'uploader_id': 'phihag',
ec85ded8 1061 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1062 'channel': 'Philipp Hagemeister',
dd4c4492
S
1063 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1064 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1065 'upload_date': '20121002',
ff9f925b 1066 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1067 'categories': ['Science & Technology'],
3867038a 1068 'tags': ['youtube-dl'],
556dbe7f 1069 'duration': 10,
dbdaaa23 1070 'view_count': int,
3e7c1224 1071 'like_count': int,
ff9f925b 1072 'availability': 'public',
1073 'playable_in_embed': True,
1074 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1075 'live_status': 'not_live',
1076 'age_limit': 0,
7c80519c 1077 'start_time': 1,
297a564b 1078 'end_time': 9,
6c73052c 1079 'channel_follower_count': int
2eb88d95 1080 }
0e853ca4 1081 },
fccd3771 1082 {
4bc3a23e
PH
1083 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1084 'note': 'Embed-only video (#1746)',
1085 'info_dict': {
1086 'id': 'yZIXLfi8CZQ',
1087 'ext': 'mp4',
1088 'upload_date': '20120608',
1089 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1090 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1091 'uploader': 'SET India',
94bfcd23 1092 'uploader_id': 'setindia',
ec85ded8 1093 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1094 'age_limit': 18,
545cc85d 1095 },
1096 'skip': 'Private video',
fccd3771 1097 },
11b56058 1098 {
8bdd16b4 1099 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1100 'note': 'Use the first video ID in the URL',
1101 'info_dict': {
1102 'id': 'BaW_jenozKc',
1103 'ext': 'mp4',
3867038a 1104 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1105 'uploader': 'Philipp Hagemeister',
1106 'uploader_id': 'phihag',
ec85ded8 1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1108 'channel': 'Philipp Hagemeister',
1109 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1110 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1111 'upload_date': '20121002',
976ae3ea 1112 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1113 'categories': ['Science & Technology'],
3867038a 1114 'tags': ['youtube-dl'],
556dbe7f 1115 'duration': 10,
dbdaaa23 1116 'view_count': int,
11b56058 1117 'like_count': int,
976ae3ea 1118 'availability': 'public',
1119 'playable_in_embed': True,
1120 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1121 'live_status': 'not_live',
1122 'age_limit': 0,
6c73052c 1123 'channel_follower_count': int
34a7de29
S
1124 },
1125 'params': {
1126 'skip_download': True,
1127 },
11b56058 1128 },
dd27fd17 1129 {
2d3d2997 1130 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1131 'note': '256k DASH audio (format 141) via DASH manifest',
1132 'info_dict': {
1133 'id': 'a9LDPn-MO4I',
1134 'ext': 'm4a',
1135 'upload_date': '20121002',
1136 'uploader_id': '8KVIDEO',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1138 'description': '',
1139 'uploader': '8KVIDEO',
1140 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1141 },
4bc3a23e
PH
1142 'params': {
1143 'youtube_include_dash_manifest': True,
1144 'format': '141',
4919603f 1145 },
de3c7fe0 1146 'skip': 'format 141 not served anymore',
dd27fd17 1147 },
8bdd16b4 1148 # DASH manifest with encrypted signature
1149 {
1150 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1151 'info_dict': {
1152 'id': 'IB3lcPjvWLA',
1153 'ext': 'm4a',
1154 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1155 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1156 'duration': 244,
1157 'uploader': 'AfrojackVEVO',
1158 'uploader_id': 'AfrojackVEVO',
1159 'upload_date': '20131011',
cc2db878 1160 'abr': 129.495,
976ae3ea 1161 'like_count': int,
1162 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1163 'playable_in_embed': True,
1164 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1165 'view_count': int,
1166 'track': 'The Spark',
1167 'live_status': 'not_live',
1168 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1169 'channel': 'Afrojack',
1170 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1171 'tags': 'count:19',
1172 'availability': 'public',
1173 'categories': ['Music'],
1174 'age_limit': 0,
1175 'alt_title': 'The Spark',
6c73052c 1176 'channel_follower_count': int
8bdd16b4 1177 },
1178 'params': {
1179 'youtube_include_dash_manifest': True,
1180 'format': '141/bestaudio[ext=m4a]',
1181 },
1182 },
65c2fde2 1183 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1184 {
65c2fde2 1185 'note': 'Embed allowed age-gate video',
2d3d2997 1186 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1187 'info_dict': {
1188 'id': 'HtVdAasjOgU',
1189 'ext': 'mp4',
1190 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1191 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1192 'duration': 142,
c522adb1
JMF
1193 'uploader': 'The Witcher',
1194 'uploader_id': 'WitcherGame',
ec85ded8 1195 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1196 'upload_date': '20140605',
34952f09 1197 'age_limit': 18,
976ae3ea 1198 'categories': ['Gaming'],
1199 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1200 'availability': 'needs_auth',
1201 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1202 'like_count': int,
1203 'channel': 'The Witcher',
1204 'live_status': 'not_live',
1205 'tags': 'count:17',
1206 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1207 'playable_in_embed': True,
1208 'view_count': int,
6c73052c 1209 'channel_follower_count': int
c522adb1
JMF
1210 },
1211 },
65c2fde2 1212 {
1213 'note': 'Age-gate video with embed allowed in public site',
1214 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1215 'info_dict': {
1216 'id': 'HsUATh_Nc2U',
1217 'ext': 'mp4',
1218 'title': 'Godzilla 2 (Official Video)',
1219 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1220 'upload_date': '20200408',
1221 'uploader_id': 'FlyingKitty900',
1222 'uploader': 'FlyingKitty',
1223 'age_limit': 18,
976ae3ea 1224 'availability': 'needs_auth',
1225 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1226 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1227 'channel': 'FlyingKitty',
1228 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1229 'view_count': int,
1230 'categories': ['Entertainment'],
1231 'live_status': 'not_live',
1232 'tags': ['Flyingkitty', 'godzilla 2'],
1233 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1234 'like_count': int,
1235 'duration': 177,
1236 'playable_in_embed': True,
6c73052c 1237 'channel_follower_count': int
65c2fde2 1238 },
1239 },
1240 {
1241 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1242 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1243 'info_dict': {
1244 'id': 'Tq92D6wQ1mg',
1245 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1246 'ext': 'mp4',
17322130 1247 'upload_date': '20191228',
65c2fde2 1248 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1249 'uploader': 'Projekt Melody',
1250 'description': 'md5:17eccca93a786d51bc67646756894066',
1251 'age_limit': 18,
976ae3ea 1252 'like_count': int,
1253 'availability': 'needs_auth',
1254 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1255 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1256 'view_count': int,
1257 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1258 'channel': 'Projekt Melody',
1259 'live_status': 'not_live',
1260 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1261 'playable_in_embed': True,
1262 'categories': ['Entertainment'],
1263 'duration': 106,
1264 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
6c73052c 1265 'channel_follower_count': int
65c2fde2 1266 },
1267 },
1268 {
1269 'note': 'Non-Agegated non-embeddable video',
1270 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1271 'info_dict': {
1272 'id': 'MeJVWBSsPAY',
1273 'ext': 'mp4',
1274 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1275 'uploader': 'Herr Lurik',
1276 'uploader_id': 'st3in234',
1277 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1278 'upload_date': '20130730',
976ae3ea 1279 'track': 'Such mich find mich',
1280 'age_limit': 0,
1281 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1282 'like_count': int,
1283 'playable_in_embed': False,
1284 'creator': 'OOMPH!',
1285 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1286 'view_count': int,
1287 'alt_title': 'Such mich find mich',
1288 'duration': 210,
1289 'channel': 'Herr Lurik',
1290 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1291 'categories': ['Music'],
1292 'availability': 'public',
1293 'uploader_url': 'http://www.youtube.com/user/st3in234',
1294 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1295 'live_status': 'not_live',
1296 'artist': 'OOMPH!',
6c73052c 1297 'channel_follower_count': int
65c2fde2 1298 },
1299 },
1300 {
1301 'note': 'Non-bypassable age-gated video',
1302 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1303 'only_matching': True,
1304 },
8bdd16b4 1305 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1306 # YouTube Red ad is not captured for creator
1307 {
1308 'url': '__2ABJjxzNo',
1309 'info_dict': {
1310 'id': '__2ABJjxzNo',
1311 'ext': 'mp4',
1312 'duration': 266,
1313 'upload_date': '20100430',
1314 'uploader_id': 'deadmau5',
1315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1316 'creator': 'deadmau5',
1317 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1318 'uploader': 'deadmau5',
1319 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1320 'alt_title': 'Some Chords',
976ae3ea 1321 'availability': 'public',
1322 'tags': 'count:14',
1323 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1324 'view_count': int,
1325 'live_status': 'not_live',
1326 'channel': 'deadmau5',
1327 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1328 'like_count': int,
1329 'track': 'Some Chords',
1330 'artist': 'deadmau5',
1331 'playable_in_embed': True,
1332 'age_limit': 0,
1333 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1334 'categories': ['Music'],
1335 'album': 'Some Chords',
6c73052c 1336 'channel_follower_count': int
8bdd16b4 1337 },
1338 'expected_warnings': [
1339 'DASH manifest missing',
1340 ]
1341 },
067aa17e 1342 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1343 {
1344 'url': 'lqQg6PlCWgI',
1345 'info_dict': {
1346 'id': 'lqQg6PlCWgI',
1347 'ext': 'mp4',
556dbe7f 1348 'duration': 6085,
90227264 1349 'upload_date': '20150827',
cbe2bd91 1350 'uploader_id': 'olympic',
ec85ded8 1351 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1352 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1353 'uploader': 'Olympics',
cbe2bd91 1354 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1355 'like_count': int,
1356 'release_timestamp': 1343767800,
1357 'playable_in_embed': True,
1358 'categories': ['Sports'],
1359 'release_date': '20120731',
1360 'channel': 'Olympics',
1361 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1362 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1363 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1364 'age_limit': 0,
1365 'availability': 'public',
1366 'live_status': 'was_live',
1367 'view_count': int,
1368 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1369 'channel_follower_count': int
cbe2bd91
PH
1370 },
1371 'params': {
1372 'skip_download': 'requires avconv',
e52a40ab 1373 }
cbe2bd91 1374 },
6271f1ca
PH
1375 # Non-square pixels
1376 {
1377 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1378 'info_dict': {
1379 'id': '_b-2C3KPAM0',
1380 'ext': 'mp4',
1381 'stretched_ratio': 16 / 9.,
556dbe7f 1382 'duration': 85,
6271f1ca
PH
1383 'upload_date': '20110310',
1384 'uploader_id': 'AllenMeow',
ec85ded8 1385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1386 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1387 'uploader': '孫ᄋᄅ',
6271f1ca 1388 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1389 'playable_in_embed': True,
1390 'channel': '孫ᄋᄅ',
1391 'age_limit': 0,
1392 'tags': 'count:11',
1393 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1394 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1395 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1396 'view_count': int,
1397 'categories': ['People & Blogs'],
1398 'like_count': int,
1399 'live_status': 'not_live',
1400 'availability': 'unlisted',
6c73052c 1401 'channel_follower_count': int
6271f1ca 1402 },
06b491eb
S
1403 },
1404 # url_encoded_fmt_stream_map is empty string
1405 {
1406 'url': 'qEJwOuvDf7I',
1407 'info_dict': {
1408 'id': 'qEJwOuvDf7I',
f57b7835 1409 'ext': 'webm',
06b491eb
S
1410 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1411 'description': '',
1412 'upload_date': '20150404',
1413 'uploader_id': 'spbelect',
1414 'uploader': 'Наблюдатели Петербурга',
1415 },
1416 'params': {
1417 'skip_download': 'requires avconv',
e323cf3f
S
1418 },
1419 'skip': 'This live event has ended.',
06b491eb 1420 },
067aa17e 1421 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1422 {
1423 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1424 'info_dict': {
1425 'id': 'FIl7x6_3R5Y',
eb6793ba 1426 'ext': 'webm',
da77d856
S
1427 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1428 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1429 'duration': 220,
da77d856
S
1430 'upload_date': '20150625',
1431 'uploader_id': 'dorappi2000',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1433 'uploader': 'dorappi2000',
eb6793ba 1434 'formats': 'mincount:31',
da77d856 1435 },
eb6793ba 1436 'skip': 'not actual anymore',
2ee8f5d8 1437 },
8a1a26ce
YCH
1438 # DASH manifest with segment_list
1439 {
1440 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1441 'md5': '8ce563a1d667b599d21064e982ab9e31',
1442 'info_dict': {
1443 'id': 'CsmdDsKjzN8',
1444 'ext': 'mp4',
17ee98e1 1445 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1446 'uploader': 'Airtek',
1447 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1448 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1449 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1450 },
1451 'params': {
1452 'youtube_include_dash_manifest': True,
1453 'format': '135', # bestvideo
be49068d
S
1454 },
1455 'skip': 'This live event has ended.',
2ee8f5d8 1456 },
cf7e015f
S
1457 {
1458 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1459 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1460 'info_dict': {
545cc85d 1461 'id': 'jvGDaLqkpTg',
1462 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1463 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1464 },
1465 'playlist': [{
1466 'info_dict': {
545cc85d 1467 'id': 'jvGDaLqkpTg',
cf7e015f 1468 'ext': 'mp4',
545cc85d 1469 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1470 'description': 'md5:e03b909557865076822aa169218d6a5d',
1471 'duration': 10643,
1472 'upload_date': '20161111',
1473 'uploader': 'Team PGP',
1474 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1475 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1476 },
1477 }, {
1478 'info_dict': {
545cc85d 1479 'id': '3AKt1R1aDnw',
cf7e015f 1480 'ext': 'mp4',
545cc85d 1481 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1482 'description': 'md5:e03b909557865076822aa169218d6a5d',
1483 'duration': 10991,
1484 'upload_date': '20161111',
1485 'uploader': 'Team PGP',
1486 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1487 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1488 },
1489 }, {
1490 'info_dict': {
545cc85d 1491 'id': 'RtAMM00gpVc',
cf7e015f 1492 'ext': 'mp4',
545cc85d 1493 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1494 'description': 'md5:e03b909557865076822aa169218d6a5d',
1495 'duration': 10995,
1496 'upload_date': '20161111',
1497 'uploader': 'Team PGP',
1498 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1499 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1500 },
1501 }, {
1502 'info_dict': {
545cc85d 1503 'id': '6N2fdlP3C5U',
cf7e015f 1504 'ext': 'mp4',
545cc85d 1505 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1506 'description': 'md5:e03b909557865076822aa169218d6a5d',
1507 'duration': 10990,
1508 'upload_date': '20161111',
1509 'uploader': 'Team PGP',
1510 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1511 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1512 },
1513 }],
1514 'params': {
1515 'skip_download': True,
1516 },
65c2fde2 1517 'skip': 'Not multifeed anymore',
cbaed4bb 1518 },
f9f49d87 1519 {
067aa17e 1520 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1521 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1522 'info_dict': {
1523 'id': 'gVfLd0zydlo',
1524 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1525 },
1526 'playlist_count': 2,
be49068d 1527 'skip': 'Not multifeed anymore',
f9f49d87 1528 },
cbaed4bb 1529 {
2d3d2997 1530 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1531 'only_matching': True,
0e49d9a6 1532 },
6d4fc66b 1533 {
2d3d2997 1534 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1535 'only_matching': True,
1536 },
0e49d9a6 1537 {
067aa17e 1538 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1539 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1540 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1541 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1542 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1543 'info_dict': {
1544 'id': 'lsguqyKfVQg',
1545 'ext': 'mp4',
1546 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1547 'alt_title': 'Dark Walk',
0e49d9a6 1548 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1549 'duration': 133,
0e49d9a6
LL
1550 'upload_date': '20151119',
1551 'uploader_id': 'IronSoulElf',
ec85ded8 1552 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1553 'uploader': 'IronSoulElf',
11f9be09 1554 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1555 'track': 'Dark Walk',
1556 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1557 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1558 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1559 'categories': ['Film & Animation'],
1560 'view_count': int,
1561 'live_status': 'not_live',
1562 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1564 'tags': 'count:13',
1565 'availability': 'public',
1566 'channel': 'IronSoulElf',
1567 'playable_in_embed': True,
1568 'like_count': int,
1569 'age_limit': 0,
6c73052c 1570 'channel_follower_count': int
0e49d9a6
LL
1571 },
1572 'params': {
1573 'skip_download': True,
1574 },
1575 },
61f92af1 1576 {
067aa17e 1577 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1578 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1579 'only_matching': True,
1580 },
313dfc45
LL
1581 {
1582 # Video with yt:stretch=17:0
1583 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1584 'info_dict': {
1585 'id': 'Q39EVAstoRM',
1586 'ext': 'mp4',
1587 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1588 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1589 'upload_date': '20151107',
1590 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1591 'uploader': 'CH GAMER DROID',
1592 },
1593 'params': {
1594 'skip_download': True,
1595 },
be49068d 1596 'skip': 'This video does not exist.',
313dfc45 1597 },
201c1459 1598 {
1599 # Video with incomplete 'yt:stretch=16:'
1600 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1601 'only_matching': True,
1602 },
7caf9830
S
1603 {
1604 # Video licensed under Creative Commons
1605 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1606 'info_dict': {
1607 'id': 'M4gD1WSo5mA',
1608 'ext': 'mp4',
1609 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1610 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1611 'duration': 721,
17322130 1612 'upload_date': '20150128',
7caf9830 1613 'uploader_id': 'BerkmanCenter',
ec85ded8 1614 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1615 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1616 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1617 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1618 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1619 'like_count': int,
1620 'age_limit': 0,
1621 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1622 'channel': 'The Berkman Klein Center for Internet & Society',
1623 'availability': 'public',
1624 'view_count': int,
1625 'categories': ['Education'],
1626 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1627 'live_status': 'not_live',
1628 'playable_in_embed': True,
6c73052c 1629 'channel_follower_count': int
7caf9830
S
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
fd050249
S
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1643 'duration': 4060,
17322130 1644 'upload_date': '20151120',
eb6793ba 1645 'uploader': 'Bernie Sanders',
fd050249 1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1648 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
6c73052c 1661 'channel_follower_count': int
fd050249
S
1662 },
1663 'params': {
1664 'skip_download': True,
1665 },
1666 },
040ac686
S
1667 {
1668 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1669 'only_matching': True,
7f29cf54
S
1670 },
1671 {
067aa17e 1672 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1673 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1674 'only_matching': True,
6496ccb4
S
1675 },
1676 {
1677 # Rental video preview
1678 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1679 'info_dict': {
1680 'id': 'uGpuVWrhIzE',
1681 'ext': 'mp4',
1682 'title': 'Piku - Trailer',
1683 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1684 'upload_date': '20150811',
1685 'uploader': 'FlixMatrix',
1686 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1687 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1688 'license': 'Standard YouTube License',
1689 },
1690 'params': {
1691 'skip_download': True,
1692 },
eb6793ba 1693 'skip': 'This video is not available.',
022a5d66 1694 },
12afdc2a
S
1695 {
1696 # YouTube Red video with episode data
1697 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1698 'info_dict': {
1699 'id': 'iqKdEhx-dD4',
1700 'ext': 'mp4',
1701 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1702 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1703 'duration': 2085,
12afdc2a
S
1704 'upload_date': '20170118',
1705 'uploader': 'Vsauce',
1706 'uploader_id': 'Vsauce',
1707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1708 'series': 'Mind Field',
1709 'season_number': 1,
1710 'episode_number': 1,
976ae3ea 1711 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1712 'tags': 'count:12',
1713 'view_count': int,
1714 'availability': 'public',
1715 'age_limit': 0,
1716 'channel': 'Vsauce',
1717 'episode': 'Episode 1',
1718 'categories': ['Entertainment'],
1719 'season': 'Season 1',
1720 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1721 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'like_count': int,
1723 'playable_in_embed': True,
1724 'live_status': 'not_live',
6c73052c 1725 'channel_follower_count': int
12afdc2a
S
1726 },
1727 'params': {
1728 'skip_download': True,
1729 },
1730 'expected_warnings': [
1731 'Skipping DASH manifest',
1732 ],
1733 },
c7121fa7
S
1734 {
1735 # The following content has been identified by the YouTube community
1736 # as inappropriate or offensive to some audiences.
1737 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1738 'info_dict': {
1739 'id': '6SJNVb0GnPI',
1740 'ext': 'mp4',
1741 'title': 'Race Differences in Intelligence',
1742 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1743 'duration': 965,
1744 'upload_date': '20140124',
1745 'uploader': 'New Century Foundation',
1746 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1747 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1748 },
1749 'params': {
1750 'skip_download': True,
1751 },
545cc85d 1752 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1753 },
022a5d66
S
1754 {
1755 # itag 212
1756 'url': '1t24XAntNCY',
1757 'only_matching': True,
fd5c4aab
S
1758 },
1759 {
1760 # geo restricted to JP
1761 'url': 'sJL6WA-aGkQ',
1762 'only_matching': True,
1763 },
cd5a74a2
S
1764 {
1765 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1766 'only_matching': True,
1767 },
bc2ca1bb 1768 {
1769 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1770 'only_matching': True,
1771 },
1772 {
1773 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1774 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1775 'only_matching': True,
1776 },
825cd268
RA
1777 {
1778 # DRM protected
1779 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1780 'only_matching': True,
4fe54c12
S
1781 },
1782 {
1783 # Video with unsupported adaptive stream type formats
1784 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1785 'info_dict': {
1786 'id': 'Z4Vy8R84T1U',
1787 'ext': 'mp4',
1788 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1789 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1790 'duration': 433,
1791 'upload_date': '20130923',
1792 'uploader': 'Amelia Putri Harwita',
1793 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1795 'formats': 'maxcount:10',
1796 },
1797 'params': {
1798 'skip_download': True,
1799 'youtube_include_dash_manifest': False,
1800 },
5429d6a9 1801 'skip': 'not actual anymore',
5caabd3c 1802 },
1803 {
822b9d9c 1804 # Youtube Music Auto-generated description
5caabd3c 1805 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1806 'info_dict': {
1807 'id': 'MgNrAu2pzNs',
1808 'ext': 'mp4',
1809 'title': 'Voyeur Girl',
1810 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1811 'upload_date': '20190312',
5429d6a9
S
1812 'uploader': 'Stephen - Topic',
1813 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1814 'artist': 'Stephen',
1815 'track': 'Voyeur Girl',
1816 'album': 'it\'s too much love to know my dear',
1817 'release_date': '20190313',
1818 'release_year': 2019,
976ae3ea 1819 'alt_title': 'Voyeur Girl',
1820 'view_count': int,
1821 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1822 'playable_in_embed': True,
1823 'like_count': int,
1824 'categories': ['Music'],
1825 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1826 'channel': 'Stephen',
1827 'availability': 'public',
1828 'creator': 'Stephen',
1829 'duration': 169,
1830 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1831 'age_limit': 0,
1832 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1833 'tags': 'count:11',
1834 'live_status': 'not_live',
6c73052c 1835 'channel_follower_count': int
5caabd3c 1836 },
1837 'params': {
1838 'skip_download': True,
1839 },
1840 },
66b48727
RA
1841 {
1842 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1843 'only_matching': True,
1844 },
011e75e6
S
1845 {
1846 # invalid -> valid video id redirection
1847 'url': 'DJztXj2GPfl',
1848 'info_dict': {
1849 'id': 'DJztXj2GPfk',
1850 'ext': 'mp4',
1851 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1852 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1853 'upload_date': '20090125',
1854 'uploader': 'Prochorowka',
1855 'uploader_id': 'Prochorowka',
1856 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1857 'artist': 'Panjabi MC',
1858 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1859 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1860 },
1861 'params': {
1862 'skip_download': True,
1863 },
545cc85d 1864 'skip': 'Video unavailable',
ea74e00b
DP
1865 },
1866 {
1867 # empty description results in an empty string
1868 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1869 'info_dict': {
1870 'id': 'x41yOUIvK2k',
1871 'ext': 'mp4',
1872 'title': 'IMG 3456',
1873 'description': '',
1874 'upload_date': '20170613',
1875 'uploader_id': 'ElevageOrVert',
1876 'uploader': 'ElevageOrVert',
976ae3ea 1877 'view_count': int,
1878 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1879 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1880 'like_count': int,
1881 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1882 'tags': [],
1883 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1884 'availability': 'public',
1885 'age_limit': 0,
1886 'categories': ['Pets & Animals'],
1887 'duration': 7,
1888 'playable_in_embed': True,
1889 'live_status': 'not_live',
1890 'channel': 'ElevageOrVert',
6c73052c 1891 'channel_follower_count': int
ea74e00b
DP
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 },
a0566bbf 1897 {
29f7c58a 1898 # with '};' inside yt initial data (see [1])
1899 # see [2] for an example with '};' inside ytInitialPlayerResponse
1900 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1901 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1902 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1903 'info_dict': {
1904 'id': 'CHqg6qOn4no',
1905 'ext': 'mp4',
1906 'title': 'Part 77 Sort a list of simple types in c#',
1907 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1908 'upload_date': '20130831',
1909 'uploader_id': 'kudvenkat',
1910 'uploader': 'kudvenkat',
976ae3ea 1911 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1912 'like_count': int,
1913 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1914 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1915 'live_status': 'not_live',
1916 'categories': ['Education'],
1917 'availability': 'public',
1918 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1919 'tags': 'count:12',
1920 'playable_in_embed': True,
1921 'age_limit': 0,
1922 'view_count': int,
1923 'duration': 522,
1924 'channel': 'kudvenkat',
6c73052c 1925 'channel_follower_count': int
a0566bbf 1926 },
1927 'params': {
1928 'skip_download': True,
1929 },
1930 },
29f7c58a 1931 {
1932 # another example of '};' in ytInitialData
1933 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1934 'only_matching': True,
1935 },
1936 {
1937 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1938 'only_matching': True,
1939 },
545cc85d 1940 {
cc2db878 1941 # https://github.com/ytdl-org/youtube-dl/pull/28094
1942 'url': 'OtqTfy26tG0',
1943 'info_dict': {
1944 'id': 'OtqTfy26tG0',
1945 'ext': 'mp4',
1946 'title': 'Burn Out',
1947 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1948 'upload_date': '20141120',
1949 'uploader': 'The Cinematic Orchestra - Topic',
1950 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1951 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1952 'artist': 'The Cinematic Orchestra',
1953 'track': 'Burn Out',
1954 'album': 'Every Day',
976ae3ea 1955 'like_count': int,
1956 'live_status': 'not_live',
1957 'alt_title': 'Burn Out',
1958 'duration': 614,
1959 'age_limit': 0,
1960 'view_count': int,
1961 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1962 'creator': 'The Cinematic Orchestra',
1963 'channel': 'The Cinematic Orchestra',
1964 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1965 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1966 'availability': 'public',
1967 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1968 'categories': ['Music'],
1969 'playable_in_embed': True,
6c73052c 1970 'channel_follower_count': int
cc2db878 1971 },
1972 'params': {
1973 'skip_download': True,
1974 },
545cc85d 1975 },
bc2ca1bb 1976 {
1977 # controversial video, only works with bpctr when authenticated with cookies
1978 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1979 'only_matching': True,
1980 },
a1a7907b 1981 {
1982 # controversial video, requires bpctr/contentCheckOk
1983 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1984 'info_dict': {
1985 'id': 'SZJvDhaSDnc',
1986 'ext': 'mp4',
1987 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1988 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1989 'uploader': 'CBS Mornings',
11f9be09 1990 'uploader_id': 'CBSThisMorning',
a1a7907b 1991 'upload_date': '20140716',
976ae3ea 1992 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1993 'duration': 170,
1994 'categories': ['News & Politics'],
1995 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1996 'view_count': int,
1997 'channel': 'CBS Mornings',
1998 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1999 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2000 'age_limit': 18,
2001 'availability': 'needs_auth',
2002 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2003 'like_count': int,
2004 'live_status': 'not_live',
2005 'playable_in_embed': True,
6c73052c 2006 'channel_follower_count': int
a1a7907b 2007 }
2008 },
f7ad7160 2009 {
2010 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2011 'url': 'cBvYw8_A0vQ',
2012 'info_dict': {
2013 'id': 'cBvYw8_A0vQ',
2014 'ext': 'mp4',
2015 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2016 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2017 'upload_date': '20201120',
2018 'uploader': 'Walk around Japan',
2019 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2020 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2021 'duration': 1456,
2022 'categories': ['Travel & Events'],
2023 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2024 'view_count': int,
2025 'channel': 'Walk around Japan',
2026 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2027 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2028 'age_limit': 0,
2029 'availability': 'public',
2030 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2031 'live_status': 'not_live',
2032 'playable_in_embed': True,
6c73052c 2033 'channel_follower_count': int
f7ad7160 2034 },
2035 'params': {
2036 'skip_download': True,
2037 },
0fb983f6 2038 }, {
2039 # Has multiple audio streams
2040 'url': 'WaOKSUlf4TM',
2041 'only_matching': True
9297939e 2042 }, {
2043 # Requires Premium: has format 141 when requested using YTM url
2044 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2045 'only_matching': True
2046 }, {
120916da 2047 # multiple subtitles with same lang_code
2048 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2049 'only_matching': True,
109dd3b2 2050 }, {
2051 # Force use android client fallback
2052 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2053 'info_dict': {
2054 'id': 'YOelRv7fMxY',
11f9be09 2055 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2056 'ext': '3gp',
2057 'upload_date': '20210624',
2058 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2059 'uploader': 'colinfurze',
11f9be09 2060 'uploader_id': 'colinfurze',
109dd3b2 2061 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2062 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2063 'duration': 596,
2064 'categories': ['Entertainment'],
2065 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2066 'view_count': int,
2067 'channel': 'colinfurze',
2068 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2069 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2070 'age_limit': 0,
2071 'availability': 'public',
2072 'like_count': int,
2073 'live_status': 'not_live',
2074 'playable_in_embed': True,
6c73052c 2075 'channel_follower_count': int
109dd3b2 2076 },
2077 'params': {
2078 'format': '17', # 3gp format available on android
2079 'extractor_args': {'youtube': {'player_client': ['android']}},
2080 },
120916da 2081 },
109dd3b2 2082 {
2083 # Skip download of additional client configs (remix client config in this case)
2084 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2085 'only_matching': True,
2086 'params': {
2087 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2088 },
8fc54b12 2089 }, {
2090 # shorts
2091 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2092 'only_matching': True,
9222c381 2093 }, {
2094 'note': 'Storyboards',
2095 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2096 'info_dict': {
2097 'id': '5KLPxDtMqe8',
2098 'ext': 'mhtml',
2099 'format_id': 'sb0',
2100 'title': 'Your Brain is Plastic',
2101 'uploader_id': 'scishow',
2102 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2103 'upload_date': '20140324',
2104 'uploader': 'SciShow',
976ae3ea 2105 'like_count': int,
2106 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2107 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2108 'view_count': int,
2109 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2110 'playable_in_embed': True,
2111 'tags': 'count:12',
2112 'uploader_url': 'http://www.youtube.com/user/scishow',
2113 'availability': 'public',
2114 'channel': 'SciShow',
2115 'live_status': 'not_live',
2116 'duration': 248,
2117 'categories': ['Education'],
2118 'age_limit': 0,
6c73052c 2119 'channel_follower_count': int
9222c381 2120 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2121 }, {
2122 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2123 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2124 'info_dict': {
2125 'id': '2NUZ8W2llS4',
2126 'ext': 'mp4',
2127 'title': 'The NP that test your phone performance 🙂',
2128 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2129 'uploader': 'Leon Nguyen',
2130 'uploader_id': 'VNSXIII',
2131 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2132 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2133 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2134 'duration': 21,
2135 'view_count': int,
2136 'age_limit': 0,
2137 'categories': ['Gaming'],
2138 'tags': 'count:23',
2139 'playable_in_embed': True,
2140 'live_status': 'not_live',
2141 'upload_date': '20220103',
2142 'like_count': int,
2143 'availability': 'public',
2144 'channel': 'Leon Nguyen',
2145 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2146 'channel_follower_count': int
2147 }
2148 }, {
2149 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2150 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2151 'info_dict': {
2152 'id': 'mzZzzBU6lrM',
2153 'ext': 'mp4',
2154 'title': 'I Met GeorgeNotFound In Real Life...',
2155 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2156 'uploader': 'Quackity',
2157 'uploader_id': 'QuackityHQ',
2158 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2159 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2160 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2161 'duration': 955,
2162 'view_count': int,
2163 'age_limit': 0,
2164 'categories': ['Entertainment'],
2165 'tags': 'count:26',
2166 'playable_in_embed': True,
2167 'live_status': 'not_live',
2168 'release_timestamp': 1641172509,
2169 'release_date': '20220103',
2170 'upload_date': '20220103',
2171 'like_count': int,
2172 'availability': 'public',
2173 'channel': 'Quackity',
2174 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2175 'channel_follower_count': int
2176 }
2177 },
2178 { # continuous livestream. Microformat upload date should be preferred.
2179 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2180 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2181 'info_dict': {
2182 'id': 'kgx4WGK0oNU',
2183 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2184 'ext': 'mp4',
2185 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2186 'availability': 'public',
2187 'age_limit': 0,
2188 'release_timestamp': 1637975704,
2189 'upload_date': '20210619',
2190 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2191 'live_status': 'is_live',
2192 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2193 'uploader': '阿鲍Abao',
2194 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2195 'channel': 'Abao in Tokyo',
2196 'channel_follower_count': int,
2197 'release_date': '20211127',
2198 'tags': 'count:39',
2199 'categories': ['People & Blogs'],
2200 'like_count': int,
2201 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2202 'view_count': int,
2203 'playable_in_embed': True,
2204 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2205 },
2206 'params': {'skip_download': True}
6e634cbe 2207 }, {
2208 # Story. Requires specific player params to work.
2209 # Note: stories get removed after some period of time
ee27297f 2210 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2211 'info_dict': {
ee27297f 2212 'id': 'vv8qTUWmulI',
6e634cbe 2213 'ext': 'mp4',
ee27297f 2214 'availability': 'unlisted',
2215 'view_count': int,
2216 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2217 'upload_date': '20220526',
2218 'categories': ['Education'],
2219 'title': 'Story',
2220 'channel': 'IT\'S HISTORY',
2221 'description': '',
2222 'uploader_id': 'BlastfromthePast',
2223 'duration': 12,
2224 'uploader': 'IT\'S HISTORY',
6e634cbe 2225 'playable_in_embed': True,
6e634cbe 2226 'age_limit': 0,
6e634cbe 2227 'live_status': 'not_live',
ee27297f 2228 'tags': [],
2229 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2230 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2231 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2232 }
2233 }, {
2234 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2235 'info_dict': {
2236 'id': 'tjjjtzRLHvA',
2237 'ext': 'mp4',
2238 'title': 'ハッシュタグ無し };if window.ytcsi',
2239 'upload_date': '20220323',
2240 'like_count': int,
2241 'availability': 'unlisted',
2242 'channel': 'nao20010128nao',
2243 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2244 'age_limit': 0,
2245 'uploader': 'nao20010128nao',
2246 'uploader_id': 'nao20010128nao',
2247 'categories': ['Music'],
6e634cbe 2248 'view_count': int,
2249 'description': '',
ee27297f 2250 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2251 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2252 'live_status': 'not_live',
2253 'playable_in_embed': True,
2254 'channel_follower_count': int,
2255 'duration': 6,
2256 'tags': [],
2257 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2258 }
2259 }
2eb88d95
PH
2260 ]
2261
201c1459 2262 @classmethod
2263 def suitable(cls, url):
4dfbf869 2264 from ..utils import parse_qs
2265
201c1459 2266 qs = parse_qs(url)
2267 if qs.get('list', [None])[0]:
2268 return False
86e5f3ed 2269 return super().suitable(url)
201c1459 2270
e0df6211 2271 def __init__(self, *args, **kwargs):
86e5f3ed 2272 super().__init__(*args, **kwargs)
545cc85d 2273 self._code_cache = {}
83799698 2274 self._player_cache = {}
e0df6211 2275
adbc4ec4 2276 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2277 lock = threading.Lock()
2278
2279 is_live = True
185bf310 2280 start_time = time.time()
adbc4ec4
THD
2281 formats = [f for f in formats if f.get('is_from_start')]
2282
185bf310 2283 def refetch_manifest(format_id, delay):
2284 nonlocal formats, start_time, is_live
2285 if time.time() <= start_time + delay:
adbc4ec4
THD
2286 return
2287
2288 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2289 video_details = traverse_obj(
2290 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2291 microformats = traverse_obj(
2292 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2293 expected_type=dict, default=[])
2294 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2295 start_time = time.time()
adbc4ec4 2296
185bf310 2297 def mpd_feed(format_id, delay):
adbc4ec4
THD
2298 """
2299 @returns (manifest_url, manifest_stream_number, is_live) or None
2300 """
2301 with lock:
185bf310 2302 refetch_manifest(format_id, delay)
adbc4ec4
THD
2303
2304 f = next((f for f in formats if f['format_id'] == format_id), None)
2305 if not f:
185bf310 2306 if not is_live:
2307 self.to_screen(f'{video_id}: Video is no longer live')
2308 else:
2309 self.report_warning(
2310 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2311 return None
2312 return f['manifest_url'], f['manifest_stream_number'], is_live
2313
2314 for f in formats:
a539f065 2315 f['is_live'] = True
adbc4ec4
THD
2316 f['protocol'] = 'http_dash_segments_generator'
2317 f['fragments'] = functools.partial(
2318 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2319
2320 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2321 FETCH_SPAN, MAX_DURATION = 5, 432000
2322
2323 mpd_url, stream_number, is_live = None, None, True
2324
2325 begin_index = 0
2326 download_start_time = ctx.get('start') or time.time()
2327
2328 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2329 if lack_early_segments:
2330 self.report_warning(bug_reports_message(
2331 'Starting download from the last 120 hours of the live stream since '
2332 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2333 lack_early_segments = True
2334
2335 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2336 fragments, fragment_base_url = None, None
2337
a539f065 2338 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2339 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2340 # Obtain from MPD's maximum seq value
2341 old_mpd_url = mpd_url
185bf310 2342 last_error = ctx.pop('last_error', None)
a539f065 2343 expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
185bf310 2344 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2345 or (mpd_url, stream_number, False))
2346 if not refresh_sequence:
2347 if expire_fast and not is_live:
2348 return False, last_seq
2349 elif old_mpd_url == mpd_url:
2350 return True, last_seq
adbc4ec4
THD
2351 try:
2352 fmts, _ = self._extract_mpd_formats_and_subtitles(
2353 mpd_url, None, note=False, errnote=False, fatal=False)
2354 except ExtractorError:
2355 fmts = None
2356 if not fmts:
a539f065 2357 no_fragment_score += 2
adbc4ec4
THD
2358 return False, last_seq
2359 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2360 fragments = fmt_info['fragments']
2361 fragment_base_url = fmt_info['fragment_base_url']
2362 assert fragment_base_url
2363
2364 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2365 return True, _last_seq
2366
2367 while is_live:
2368 fetch_time = time.time()
2369 if no_fragment_score > 30:
2370 return
2371 if last_segment_url:
2372 # Obtain from "X-Head-Seqnum" header value from each segment
2373 try:
2374 urlh = self._request_webpage(
2375 last_segment_url, None, note=False, errnote=False, fatal=False)
2376 except ExtractorError:
2377 urlh = None
2378 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2379 if last_seq is None:
a539f065 2380 no_fragment_score += 2
adbc4ec4
THD
2381 last_segment_url = None
2382 continue
2383 else:
a539f065
LNO
2384 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2385 no_fragment_score += 2
185bf310 2386 if not should_continue:
adbc4ec4
THD
2387 continue
2388
2389 if known_idx > last_seq:
2390 last_segment_url = None
2391 continue
2392
2393 last_seq += 1
2394
2395 if begin_index < 0 and known_idx < 0:
2396 # skip from the start when it's negative value
2397 known_idx = last_seq + begin_index
2398 if lack_early_segments:
2399 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2400 try:
2401 for idx in range(known_idx, last_seq):
2402 # do not update sequence here or you'll get skipped some part of it
a539f065 2403 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2404 if not should_continue:
adbc4ec4
THD
2405 known_idx = idx - 1
2406 raise ExtractorError('breaking out of outer loop')
2407 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2408 yield {
2409 'url': last_segment_url,
36195c44 2410 'fragment_count': last_seq,
adbc4ec4
THD
2411 }
2412 if known_idx == last_seq:
2413 no_fragment_score += 5
2414 else:
2415 no_fragment_score = 0
2416 known_idx = last_seq
2417 except ExtractorError:
2418 continue
2419
2420 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2421
b6de707d 2422 def _extract_player_url(self, *ytcfgs, webpage=None):
2423 player_url = traverse_obj(
2424 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2425 get_all=False, expected_type=compat_str)
11f9be09 2426 if not player_url:
b6de707d 2427 return
60f393e4 2428 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2429
b6de707d 2430 def _download_player_url(self, video_id, fatal=False):
2431 res = self._download_webpage(
2432 'https://www.youtube.com/iframe_api',
2433 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2434 if res:
2435 player_version = self._search_regex(
2436 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2437 if player_version:
2438 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2439
60064c53
PH
2440 def _signature_cache_id(self, example_sig):
2441 """ Return a string representation of a signature """
78caa52a 2442 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 2443
e40c758c
S
2444 @classmethod
2445 def _extract_player_info(cls, player_url):
2446 for player_re in cls._PLAYER_INFO_RE:
2447 id_m = re.search(player_re, player_url)
2448 if id_m:
2449 break
2450 else:
c081b35c 2451 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2452 return id_m.group('id')
e40c758c 2453
404f611f 2454 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2455 player_id = self._extract_player_info(player_url)
2456 if player_id not in self._code_cache:
1276a43a 2457 code = self._download_webpage(
109dd3b2 2458 player_url, video_id, fatal=fatal,
2459 note='Downloading player ' + player_id,
2460 errnote='Download of %s failed' % player_url)
1276a43a 2461 if code:
2462 self._code_cache[player_id] = code
404f611f 2463 return self._code_cache.get(player_id)
109dd3b2 2464
e40c758c 2465 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2466 player_id = self._extract_player_info(player_url)
e0df6211 2467
c4417ddb 2468 # Read from filesystem cache
86e5f3ed 2469 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2470 assert os.path.basename(func_id) == func_id
a0e07d31 2471
69ea8ca4 2472 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 2473 if cache_spec is not None:
78caa52a 2474 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2475
404f611f 2476 code = self._load_player(video_id, player_url)
2477 if code:
109dd3b2 2478 res = self._parse_sig_js(code)
e0df6211 2479
109dd3b2 2480 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2481 cache_res = res(test_string)
2482 cache_spec = [ord(c) for c in cache_res]
83799698 2483
109dd3b2 2484 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2485 return res
83799698 2486
60064c53 2487 def _print_sig_code(self, func, example_sig):
404f611f 2488 if not self.get_param('youtube_print_sig_code'):
2489 return
2490
edf3e38e
PH
2491 def gen_sig_code(idxs):
2492 def _genslice(start, end, step):
78caa52a 2493 starts = '' if start == 0 else str(start)
8bcc8756 2494 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2495 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2496 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2497
2498 step = None
7af808a5
PH
2499 # Quelch pyflakes warnings - start will be set when step is set
2500 start = '(Never used)'
edf3e38e
PH
2501 for i, prev in zip(idxs[1:], idxs[:-1]):
2502 if step is not None:
2503 if i - prev == step:
2504 continue
2505 yield _genslice(start, prev, step)
2506 step = None
2507 continue
2508 if i - prev in [-1, 1]:
2509 step = i - prev
2510 start = prev
2511 continue
2512 else:
78caa52a 2513 yield 's[%d]' % prev
edf3e38e 2514 if step is None:
78caa52a 2515 yield 's[%d]' % i
edf3e38e
PH
2516 else:
2517 yield _genslice(start, i, step)
2518
78caa52a 2519 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2520 cache_res = func(test_string)
edf3e38e 2521 cache_spec = [ord(c) for c in cache_res]
78caa52a 2522 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2523 signature_id_tuple = '(%s)' % (
2524 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2525 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2526 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2527 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2528
e0df6211
PH
2529 def _parse_sig_js(self, jscode):
2530 funcname = self._search_regex(
abefc03f
S
2531 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2532 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2533 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2534 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2535 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2536 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2537 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2538 # Obsolete patterns
2539 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2540 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2541 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2542 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2543 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2544 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2545 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2546 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2547 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2548
2549 jsi = JSInterpreter(jscode)
2550 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2551 return lambda s: initial_function([s])
2552
545cc85d 2553 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2554 """Turn the encrypted s field into a working signature"""
c8bf86d5 2555 try:
62af3a0e 2556 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5 2557 if player_id not in self._player_cache:
52023f12 2558 func = self._extract_signature_function(video_id, player_url, s)
c8bf86d5
PH
2559 self._player_cache[player_id] = func
2560 func = self._player_cache[player_id]
404f611f 2561 self._print_sig_code(func, s)
c8bf86d5
PH
2562 return func(s)
2563 except Exception as e:
52023f12 2564 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2565
2566 def _decrypt_nsig(self, s, video_id, player_url):
2567 """Turn the encrypted n field into a working signature"""
2568 if player_url is None:
2569 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2570 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2571
2572 sig_id = ('nsig_value', s)
2573 if sig_id in self._player_cache:
2574 return self._player_cache[sig_id]
2575
2576 try:
2577 player_id = ('nsig', player_url)
2578 if player_id not in self._player_cache:
2579 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2580 func = self._player_cache[player_id]
2581 self._player_cache[sig_id] = func(s)
2582 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2583 return self._player_cache[sig_id]
2584 except Exception as e:
aa9369a2 2585 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2586
2587 def _extract_n_function_name(self, jscode):
48416bc4 2588 nfunc, idx = self._search_regex(
c571b3a6 2589 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
48416bc4 2590 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2591 if not idx:
2592 return nfunc
2593 return json.loads(js_to_json(self._search_regex(
a7d4acc0 2594 rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
48416bc4 2595 f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
404f611f 2596
2597 def _extract_n_function(self, video_id, player_url):
2598 player_id = self._extract_player_info(player_url)
2599 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2600
2601 if func_code:
2602 jsi = JSInterpreter(func_code)
2603 else:
2604 jscode = self._load_player(video_id, player_url)
2605 funcname = self._extract_n_function_name(jscode)
2606 jsi = JSInterpreter(jscode)
2607 func_code = jsi.extract_function_code(funcname)
2608 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2609
2610 if self.get_param('youtube_print_sig_code'):
2611 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2612
2613 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2614
109dd3b2 2615 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2616 """
2617 Extract signatureTimestamp (sts)
2618 Required to tell API what sig/player version is in use.
2619 """
2620 sts = None
2621 if isinstance(ytcfg, dict):
2622 sts = int_or_none(ytcfg.get('STS'))
2623
2624 if not sts:
2625 # Attempt to extract from player
2626 if player_url is None:
2627 error_msg = 'Cannot extract signature timestamp without player_url.'
2628 if fatal:
2629 raise ExtractorError(error_msg)
2630 self.report_warning(error_msg)
2631 return
404f611f 2632 code = self._load_player(video_id, player_url, fatal=fatal)
2633 if code:
109dd3b2 2634 sts = int_or_none(self._search_regex(
2635 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2636 'JS player signature timestamp', group='sts', fatal=fatal))
2637 return sts
2638
11f9be09 2639 def _mark_watched(self, video_id, player_responses):
9222c381 2640 playback_url = get_first(
2641 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2642 expected_type=url_or_none)
d77ab8e2 2643 if not playback_url:
352d63fd 2644 self.report_warning('Unable to mark watched')
d77ab8e2
S
2645 return
2646 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2647 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2648
2649 # cpn generation algorithm is reverse engineered from base.js.
2650 # In fact it works even with dummy cpn.
2651 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
86e5f3ed 2652 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
d77ab8e2
S
2653
2654 qs.update({
2655 'ver': ['2'],
2656 'cpn': [cpn],
2657 })
2658 playback_url = compat_urlparse.urlunparse(
15707c7e 2659 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2660
2661 self._download_webpage(
2662 playback_url, video_id, 'Marking watched',
2663 'Unable to mark watched', fatal=False)
2664
66c9fa36
S
2665 @staticmethod
2666 def _extract_urls(webpage):
2667 # Embedded YouTube player
2668 entries = [
2669 unescapeHTML(mobj.group('url'))
2670 for mobj in re.finditer(r'''(?x)
2671 (?:
2672 <iframe[^>]+?src=|
2673 data-video-url=|
2674 <embed[^>]+?src=|
2675 embedSWF\(?:\s*|
2676 <object[^>]+data=|
2677 new\s+SWFObject\(
2678 )
2679 (["\'])
2680 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2681 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2682 \1''', webpage)]
2683
2684 # lazyYT YouTube embed
2685 entries.extend(list(map(
2686 unescapeHTML,
2687 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2688
2689 # Wordpress "YouTube Video Importer" plugin
2690 matches = re.findall(r'''(?x)<div[^>]+
2691 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2692 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2693 entries.extend(m[-1] for m in matches)
2694
2695 return entries
2696
2697 @staticmethod
2698 def _extract_url(webpage):
2699 urls = YoutubeIE._extract_urls(webpage)
2700 return urls[0] if urls else None
2701
97665381
PH
2702 @classmethod
2703 def extract_id(cls, url):
2704 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2705 if mobj is None:
69ea8ca4 2706 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2707 return mobj.group('id')
c5e8d7af 2708
7c365c21 2709 def _extract_chapters_from_json(self, data, duration):
2710 chapter_list = traverse_obj(
2711 data, (
2712 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2713 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2714 ), expected_type=list)
2715
2716 return self._extract_chapters(
2717 chapter_list,
2718 chapter_time=lambda chapter: float_or_none(
2719 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2720 chapter_title=lambda chapter: traverse_obj(
2721 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2722 duration=duration)
2723
2724 def _extract_chapters_from_engagement_panel(self, data, duration):
2725 content_list = traverse_obj(
8bdd16b4 2726 data,
7c365c21 2727 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2728 expected_type=list, default=[])
052e1350 2729 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2730 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2731
1890fc63 2732 return next(filter(None, (
2733 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2734 chapter_time, chapter_title, duration)
2735 for contents in content_list)), [])
7c365c21 2736
1890fc63 2737 def _extract_chapters_from_description(self, description, duration):
2738 return self._extract_chapters(
2739 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2740 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2741 duration=duration, strict=False)
84213ea8 2742
1890fc63 2743 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2744 if not duration:
2745 return
2746 chapter_list = [{
2747 'start_time': chapter_time(chapter),
2748 'title': chapter_title(chapter),
2749 } for chapter in chapter_list or []]
2750 if not strict:
2751 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2752
2753 chapters = [{'start_time': 0, 'title': '<Untitled>'}]
2754 for idx, chapter in enumerate(chapter_list):
2755 if chapter['start_time'] is None or not chapter['title']:
2756 self.report_warning(f'Incomplete chapter {idx}')
2757 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2758 chapters[-1]['end_time'] = chapter['start_time']
2759 chapters.append(chapter)
2760 else:
2761 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2762 chapters[-1]['end_time'] = duration
2763 return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
84213ea8 2764
a1c5d2ca
M
2765 def _extract_comment(self, comment_renderer, parent=None):
2766 comment_id = comment_renderer.get('commentId')
2767 if not comment_id:
2768 return
fe93e2c4 2769
052e1350 2770 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2771
49bd8c66 2772 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2773 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2774 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2775 author_id = try_get(comment_renderer,
2776 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2777
49bd8c66 2778 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2779 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2780 author_thumbnail = try_get(comment_renderer,
2781 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2782
2783 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2784 is_favorited = 'creatorHeart' in (try_get(
2785 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2786 return {
2787 'id': comment_id,
2788 'text': text,
d92f5d5a 2789 'timestamp': timestamp,
a1c5d2ca
M
2790 'time_text': time_text,
2791 'like_count': votes,
97524332 2792 'is_favorited': is_favorited,
a1c5d2ca
M
2793 'author': author,
2794 'author_id': author_id,
2795 'author_thumbnail': author_thumbnail,
2796 'author_is_uploader': author_is_uploader,
2797 'parent': parent or 'root'
2798 }
2799
46383212 2800 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2801
2802 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2803
2804 def extract_header(contents):
2d6659b9 2805 _continuation = None
2806 for content in contents:
46383212 2807 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2808 expected_comment_count = self._get_count(
2809 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2810
2d6659b9 2811 if expected_comment_count:
46383212 2812 tracker['est_total'] = expected_comment_count
2813 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2814 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2815
2816 sort_menu_item = try_get(
2817 comments_header_renderer,
2818 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2819 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2820
2821 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2822 if not _continuation:
2823 continue
2824
46383212 2825 sort_text = str_or_none(sort_menu_item.get('title'))
2826 if not sort_text:
2d6659b9 2827 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2828 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2829 break
a2160aa4 2830 return _continuation
a1c5d2ca 2831
2d6659b9 2832 def extract_thread(contents):
a1c5d2ca 2833 if not parent:
46383212 2834 tracker['current_page_thread'] = 0
a1c5d2ca 2835 for content in contents:
46383212 2836 if not parent and tracker['total_parent_comments'] >= max_parents:
2837 yield
a1c5d2ca 2838 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2839 comment_renderer = get_first(
2840 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2841 expected_type=dict, default={})
a1c5d2ca 2842
a1c5d2ca
M
2843 comment = self._extract_comment(comment_renderer, parent)
2844 if not comment:
2845 continue
46383212 2846
2847 tracker['running_total'] += 1
2848 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2849 yield comment
46383212 2850
a1c5d2ca
M
2851 # Attempt to get the replies
2852 comment_replies_renderer = try_get(
2853 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2854
2855 if comment_replies_renderer:
46383212 2856 tracker['current_page_thread'] += 1
a1c5d2ca 2857 comment_entries_iter = self._comment_entries(
99e9e001 2858 comment_replies_renderer, ytcfg, video_id,
46383212 2859 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2860 yield from itertools.islice(comment_entries_iter, min(
2861 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2862
46383212 2863 # Keeps track of counts across recursive calls
2864 if not tracker:
2865 tracker = dict(
2866 running_total=0,
2867 est_total=0,
2868 current_page_thread=0,
2869 total_parent_comments=0,
2870 total_reply_comments=0)
2871
2872 # TODO: Deprecated
2d6659b9 2873 # YouTube comments have a max depth of 2
46383212 2874 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2875 if max_depth:
2876 self._downloader.deprecation_warning(
2877 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2878 if max_depth == 1 and parent:
2879 return
a1c5d2ca 2880
46383212 2881 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2882 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2883
46383212 2884 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2885
46383212 2886 response = None
6e634cbe 2887 is_forced_continuation = False
2d6659b9 2888 is_first_continuation = parent is None
6e634cbe 2889 if is_first_continuation and not continuation:
2890 # Sometimes you can get comments by generating the continuation yourself,
2891 # even if YouTube initially reports them being disabled - e.g. stories comments.
2892 # Note: if the comment section is actually disabled, YouTube may return a response with
2893 # required check_get_keys missing. So we will disable that check initially in this case.
2894 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2895 is_forced_continuation = True
a1c5d2ca
M
2896
2897 for page_num in itertools.count(0):
2898 if not continuation:
2899 break
46383212 2900 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2901 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2902 if page_num == 0:
2903 if is_first_continuation:
2904 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2905 else:
2d6659b9 2906 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2907 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2908 else:
2909 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2910 ' ' if parent else '', ' replies' if parent else '',
2911 page_num, comment_prog_str)
2912
2913 response = self._extract_response(
fe93e2c4 2914 item_id=None, query=continuation,
2d6659b9 2915 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 2916 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2917 is_forced_continuation = False
46383212 2918 continuation_contents = traverse_obj(
2919 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2920
2d6659b9 2921 continuation = None
46383212 2922 for continuation_section in continuation_contents:
2923 continuation_items = traverse_obj(
2924 continuation_section,
2925 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2926 get_all=False, expected_type=list) or []
2927 if is_first_continuation:
2928 continuation = extract_header(continuation_items)
2929 is_first_continuation = False
2d6659b9 2930 if continuation:
a1c5d2ca 2931 break
46383212 2932 continue
a1c5d2ca 2933
46383212 2934 for entry in extract_thread(continuation_items):
2935 if not entry:
2936 return
2937 yield entry
2938 continuation = self._extract_continuation({'contents': continuation_items})
2939 if continuation:
2d6659b9 2940 break
a1c5d2ca 2941
6e634cbe 2942 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2943 if message and not parent and tracker['running_total'] == 0:
2944 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
2945
2946 @staticmethod
2947 def _generate_comment_continuation(video_id):
2948 """
2949 Generates initial comment section continuation token from given video id
2950 """
2951 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
2952 return base64.b64encode(token.encode()).decode()
2953
a2160aa4 2954 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2955 """Entry for comment extraction"""
2d6659b9 2956 def _real_comment_extract(contents):
aae16f6e 2957 renderer = next((
2958 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2959 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2960 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2961
a2160aa4 2962 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2963 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2964
109dd3b2 2965 @staticmethod
99e9e001 2966 def _get_checkok_params():
2967 return {'contentCheckOk': True, 'racyCheckOk': True}
2968
2969 @classmethod
2970 def _generate_player_context(cls, sts=None):
109dd3b2 2971 context = {
2972 'html5Preference': 'HTML5_PREF_WANTS',
2973 }
2974 if sts is not None:
2975 context['signatureTimestamp'] = sts
2976 return {
2977 'playbackContext': {
2978 'contentPlaybackContext': context
a1a7907b 2979 },
99e9e001 2980 **cls._get_checkok_params()
109dd3b2 2981 }
2982
e7e94f2a
D
2983 @staticmethod
2984 def _is_agegated(player_response):
2985 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2986 return True
e7e94f2a
D
2987
2988 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2989 AGE_GATE_REASONS = (
2990 'confirm your age', 'age-restricted', 'inappropriate', # reason
2991 'age_verification_required', 'age_check_required', # status
2992 )
2993 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2994
2995 @staticmethod
2996 def _is_unplayable(player_response):
2997 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2998
99e9e001 2999 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 3000
11f9be09 3001 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3002 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3003 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3004 headers = self.generate_api_headers(
99e9e001 3005 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3006
6e634cbe 3007 yt_query = {
3008 'videoId': video_id,
3009 'params': '8AEB' # enable stories
3010 }
11f9be09 3011 yt_query.update(self._generate_player_context(sts))
3012 return self._extract_response(
3013 item_id=video_id, ep='player', query=yt_query,
379e44ed 3014 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3015 default_client=client,
11f9be09 3016 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3017 ) or None
3018
11f9be09 3019 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3020 requested_clients = []
d0d012d4 3021 default = ['android', 'web']
000c15a4 3022 allowed_clients = sorted(
86e5f3ed 3023 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3024 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3025 for client in self._configuration_arg('player_client'):
3026 if client in allowed_clients:
3027 requested_clients.append(client)
d0d012d4 3028 elif client == 'default':
3029 requested_clients.extend(default)
b4c055ba 3030 elif client == 'all':
3031 requested_clients.extend(allowed_clients)
3032 else:
3033 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3034 if not requested_clients:
d0d012d4 3035 requested_clients = default
cf7e015f 3036
11f9be09 3037 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3038 requested_clients.extend(
e7e94f2a 3039 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3040
11f9be09 3041 return orderedSet(requested_clients)
cf7e015f 3042
99e9e001 3043 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 3044 initial_pr = None
3045 if webpage:
b7c47b74 3046 initial_pr = self._search_json(
3047 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3048
ae729626 3049 all_clients = set(clients)
c0bc527b 3050 clients = clients[::-1]
b6de707d 3051 prs = []
e7e94f2a 3052
ae729626 3053 def append_client(*client_names):
e7870111 3054 """ Append the first client name that exists but not already used """
ae729626 3055 for client_name in client_names:
e7870111
D
3056 actual_client = _split_innertube_client(client_name)[0]
3057 if actual_client in INNERTUBE_CLIENTS:
3058 if actual_client not in all_clients:
ae729626 3059 clients.append(client_name)
e7870111
D
3060 all_clients.add(actual_client)
3061 return
e7e94f2a 3062
379e44ed 3063 # Android player_response does not have microFormats which are needed for
3064 # extraction of some data. So we return the initial_pr with formats
3065 # stripped out even if not requested by the user
3066 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3067 if initial_pr:
3068 pr = dict(initial_pr)
3069 pr['streamingData'] = None
b6de707d 3070 prs.append(pr)
379e44ed 3071
3072 last_error = None
b6de707d 3073 tried_iframe_fallback = False
3074 player_url = None
c0bc527b 3075 while clients:
e7870111 3076 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3077 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3078 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3079 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3080
b6de707d 3081 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3082 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3083 if 'js' in self._configuration_arg('player_skip'):
3084 require_js_player = False
3085 player_url = None
3086
3087 if not player_url and not tried_iframe_fallback and require_js_player:
3088 player_url = self._download_player_url(video_id)
3089 tried_iframe_fallback = True
3090
379e44ed 3091 try:
3092 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 3093 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 3094 except ExtractorError as e:
3095 if last_error:
3096 self.report_warning(last_error)
3097 last_error = e
3098 continue
3099
11f9be09 3100 if pr:
b6de707d 3101 prs.append(pr)
c0bc527b 3102
e7e94f2a 3103 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3104 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3105 append_client(f'{base_client}_creator')
e7e94f2a 3106 elif self._is_agegated(pr):
e7870111
D
3107 if variant == 'tv_embedded':
3108 append_client(f'{base_client}_embedded')
3109 elif not variant:
3110 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3111
379e44ed 3112 if last_error:
b6de707d 3113 if not len(prs):
379e44ed 3114 raise last_error
3115 self.report_warning(last_error)
b6de707d 3116 return prs, player_url
11f9be09 3117
a1b2d843 3118 def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3119 itags, stream_ids = {}, []
2a9c6dcd 3120 itag_qualities, res_qualities = {}, {}
d3fc8074 3121 q = qualities([
2a9c6dcd 3122 # Normally tiny is the smallest video-only formats. But
3123 # audio-only formats with unknown quality may get tagged as tiny
3124 'tiny',
3125 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3126 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3127 ])
11f9be09 3128 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3129
545cc85d 3130 for fmt in streaming_formats:
727029c5 3131 if fmt.get('targetDurationSec'):
545cc85d 3132 continue
321bf820 3133
cc2db878 3134 itag = str_or_none(fmt.get('itag'))
9297939e 3135 audio_track = fmt.get('audioTrack') or {}
3136 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3137 if stream_id in stream_ids:
3138 continue
3139
cc2db878 3140 quality = fmt.get('quality')
2a9c6dcd 3141 height = int_or_none(fmt.get('height'))
d3fc8074 3142 if quality == 'tiny' or not quality:
3143 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3144 # The 3gp format (17) in android client has a quality of "small",
3145 # but is actually worse than other formats
3146 if itag == '17':
3147 quality = 'tiny'
3148 if quality:
3149 if itag:
3150 itag_qualities[itag] = quality
3151 if height:
3152 res_qualities[height] = quality
cc2db878 3153 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3154 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3155 # number of fragment that would subsequently requested with (`&sq=N`)
3156 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3157 continue
3158
545cc85d 3159 fmt_url = fmt.get('url')
3160 if not fmt_url:
3161 sc = compat_parse_qs(fmt.get('signatureCipher'))
3162 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3163 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3164 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3165 continue
52023f12 3166 try:
3167 fmt_url += '&%s=%s' % (
3168 traverse_obj(sc, ('sp', -1)) or 'signature',
3169 self._decrypt_signature(encrypted_sig, video_id, player_url)
3170 )
3171 except ExtractorError as e:
3172 self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
3173 self.write_debug(e, only_once=True)
201e9eaa 3174 continue
545cc85d 3175
404f611f 3176 query = parse_qs(fmt_url)
3177 throttled = False
b2916526 3178 if query.get('n'):
404f611f 3179 try:
3180 fmt_url = update_url_query(fmt_url, {
3181 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
3182 except ExtractorError as e:
aa9369a2 3183 self.report_warning(
1d485a1a 3184 'nsig extraction failed: You may experience throttling for some formats\n'
52023f12 3185 f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
3186 self.write_debug(e, only_once=True)
404f611f 3187 throttled = True
3188
545cc85d 3189 if itag:
a0bb6ce5 3190 itags[itag] = 'https'
9297939e 3191 stream_ids.append(stream_id)
3192
0ad92dfb 3193 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3194 language_preference = (
3195 10 if audio_track.get('audioIsDefault') and 10
3196 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3197 else -1)
0ad92dfb 3198 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3199 # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3200 # Make sure to avoid false positives with small duration differences.
3201 # Eg: __2ABJjxzNo, ySuUZEjARPY
3202 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3203 if is_damaged:
0f06bcd7 3204 self.report_warning(
3205 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3206 dct = {
3207 'asr': int_or_none(fmt.get('audioSampleRate')),
3208 'filesize': int_or_none(fmt.get('contentLength')),
3209 'format_id': itag,
34921b43 3210 'format_note': join_nonempty(
26e8e044 3211 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3212 ' (default)' if language_preference > 0 else ''),
404f611f 3213 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
0ad92dfb 3214 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3215 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3216 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3217 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 3218 'height': height,
dca3ff4a 3219 'quality': q(quality),
727029c5 3220 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3221 'tbr': tbr,
545cc85d 3222 'url': fmt_url,
2a9c6dcd 3223 'width': int_or_none(fmt.get('width')),
ab6df717 3224 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3225 'desc' if language_preference < -1 else ''),
3226 'language_preference': language_preference,
a405b38f 3227 # Strictly de-prioritize damaged and 3gp formats
3228 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3229 }
60bdb7bd 3230 mime_mobj = re.match(
3231 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3232 if mime_mobj:
3233 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3234 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3235 no_audio = dct.get('acodec') == 'none'
3236 no_video = dct.get('vcodec') == 'none'
3237 if no_audio:
3238 dct['vbr'] = tbr
3239 if no_video:
3240 dct['abr'] = tbr
3241 if no_audio or no_video:
545cc85d 3242 dct['downloader_options'] = {
3243 # Youtube throttles chunks >~10M
3244 'http_chunk_size': 10485760,
bf1317d2 3245 }
7c60c33e 3246 if dct.get('ext'):
3247 dct['container'] = dct['ext'] + '_dash'
11f9be09 3248 yield dct
545cc85d 3249
adbc4ec4 3250 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3251 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3252 if not self.get_param('youtube_include_hls_manifest', True):
3253 skip_manifests.append('hls')
0f06bcd7 3254 if not self.get_param('youtube_include_dash_manifest', True):
3255 skip_manifests.append('dash')
adbc4ec4
THD
3256 get_dash = 'dash' not in skip_manifests and (
3257 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3258 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3259
a0bb6ce5 3260 def process_manifest_format(f, proto, itag):
3261 if itag in itags:
3262 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3263 return False
3264 itag = f'{itag}-{proto}'
3265 if itag:
3266 f['format_id'] = itag
3267 itags[itag] = proto
3268
3269 f['quality'] = next((
3270 q(qdict[val])
e339d25a 3271 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 3272 if val in qdict), -1)
3273 return True
2a9c6dcd 3274
11f9be09 3275 for sd in streaming_data:
5d3a0e79 3276 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3277 if hls_manifest_url:
2a9c6dcd 3278 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 3279 if process_manifest_format(f, 'hls', self._search_regex(
3280 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3281 yield f
545cc85d 3282
5d3a0e79 3283 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3284 if dash_manifest_url:
2a9c6dcd 3285 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 3286 if process_manifest_format(f, 'dash', f['format_id']):
3287 f['filesize'] = int_or_none(self._search_regex(
3288 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3289 if live_from_start:
3290 f['is_from_start'] = True
3291
a0bb6ce5 3292 yield f
11f9be09 3293
720c3099 3294 def _extract_storyboard(self, player_responses, duration):
3295 spec = get_first(
3296 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3297 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3298 if not base_url:
720c3099 3299 return
720c3099 3300 L = len(spec) - 1
3301 for i, args in enumerate(spec):
3302 args = args.split('#')
3303 counts = list(map(int_or_none, args[:5]))
3304 if len(args) != 8 or not all(counts):
3305 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3306 continue
3307 width, height, frame_count, cols, rows = counts
3308 N, sigh = args[6:]
3309
3310 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3311 fragment_count = frame_count / (cols * rows)
3312 fragment_duration = duration / fragment_count
3313 yield {
3314 'format_id': f'sb{i}',
3315 'format_note': 'storyboard',
3316 'ext': 'mhtml',
3317 'protocol': 'mhtml',
3318 'acodec': 'none',
3319 'vcodec': 'none',
3320 'url': url,
3321 'width': width,
3322 'height': height,
3323 'fragments': [{
b3edc806 3324 'url': url.replace('$M', str(j)),
720c3099 3325 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3326 } for j in range(math.ceil(fragment_count))],
3327 }
3328
adbc4ec4 3329 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3330 webpage = None
3331 if 'webpage' not in self._configuration_arg('player_skip'):
3332 webpage = self._download_webpage(
6e634cbe 3333 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
11f9be09 3334
3335 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3336
b6de707d 3337 player_responses, player_url = self._extract_player_responses(
11f9be09 3338 self._get_requested_clients(url, smuggled_data),
99e9e001 3339 video_id, webpage, master_ytcfg)
11f9be09 3340
adbc4ec4
THD
3341 return webpage, master_ytcfg, player_responses, player_url
3342
a1b2d843 3343 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3344 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3345 is_live = get_first(video_details, 'isLive')
3346 if is_live is None:
3347 is_live = get_first(live_broadcast_details, 'isLiveNow')
3348
3349 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
a1b2d843 3350 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
adbc4ec4
THD
3351
3352 return live_broadcast_details, is_live, streaming_data, formats
3353
3354 def _real_extract(self, url):
3355 url, smuggled_data = unsmuggle_url(url, {})
3356 video_id = self._match_id(url)
3357
3358 base_url = self.http_scheme() + '//www.youtube.com/'
3359 webpage_url = base_url + 'watch?v=' + video_id
3360
3361 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3362
11f9be09 3363 playability_statuses = traverse_obj(
3364 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3365
3366 trailer_video_id = get_first(
3367 playability_statuses,
3368 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3369 expected_type=str)
3370 if trailer_video_id:
3371 return self.url_result(
3372 trailer_video_id, self.ie_key(), trailer_video_id)
3373
3374 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3375 if webpage else (lambda x: None))
3376
3377 video_details = traverse_obj(
3378 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3379 microformats = traverse_obj(
3380 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3381 expected_type=dict, default=[])
3382 video_title = (
3383 get_first(video_details, 'title')
3384 or self._get_text(microformats, (..., 'title'))
3385 or search_meta(['og:title', 'twitter:title', 'title']))
3386 video_description = get_first(video_details, 'shortDescription')
3387
d89257f3 3388 multifeed_metadata_list = get_first(
3389 player_responses,
3390 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3391 expected_type=str)
3392 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3393 if self.get_param('noplaylist'):
11f9be09 3394 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3395 else:
3396 entries = []
3397 feed_ids = []
3398 for feed in multifeed_metadata_list.split(','):
3399 # Unquote should take place before split on comma (,) since textual
3400 # fields may contain comma as well (see
3401 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3402 feed_data = compat_parse_qs(
3403 compat_urllib_parse_unquote_plus(feed))
3404
3405 def feed_entry(name):
3406 return try_get(
3407 feed_data, lambda x: x[name][0], compat_str)
3408
3409 feed_id = feed_entry('id')
3410 if not feed_id:
3411 continue
3412 feed_title = feed_entry('title')
3413 title = video_title
3414 if feed_title:
3415 title += ' (%s)' % feed_title
3416 entries.append({
3417 '_type': 'url_transparent',
3418 'ie_key': 'Youtube',
3419 'url': smuggle_url(
3420 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3421 {'force_singlefeed': True}),
3422 'title': title,
3423 })
3424 feed_ids.append(feed_id)
3425 self.to_screen(
3426 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3427 % (', '.join(feed_ids), video_id))
3428 return self.playlist_result(
3429 entries, video_id, video_title, video_description)
11f9be09 3430
a1b2d843 3431 duration = int_or_none(
3432 get_first(video_details, 'lengthSeconds')
3433 or get_first(microformats, 'lengthSeconds')
3434 or parse_duration(search_meta('duration'))) or None
3435
829bbd1d 3436 if get_first(video_details, 'isPostLiveDvr'):
3437 self.write_debug('Video is in Post-Live Manifestless mode')
3438 if duration or 0 > 4 * 3600:
3439 self.report_warning(
3440 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3441 'This is a known issue and patches are welcome')
3442
a1b2d843 3443 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
3444 video_id, microformats, video_details, player_responses, player_url, duration)
bf1317d2 3445
545cc85d 3446 if not formats:
11f9be09 3447 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3448 self.report_drm(video_id)
11f9be09 3449 pemr = get_first(
3450 playability_statuses,
3451 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3452 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3453 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3454 if subreason:
545cc85d 3455 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3456 countries = get_first(microformats, 'availableCountries')
545cc85d 3457 if not countries:
3458 regions_allowed = search_meta('regionsAllowed')
3459 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3460 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3461 reason += f'. {subreason}'
545cc85d 3462 if reason:
b7da73eb 3463 self.raise_no_formats(reason, expected=True)
bf1317d2 3464
11f9be09 3465 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3466 if not keywords and webpage:
3467 keywords = [
3468 unescapeHTML(m.group('content'))
3469 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3470 for keyword in keywords:
3471 if keyword.startswith('yt:stretch='):
201c1459 3472 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3473 if mobj:
3474 # NB: float is intentional for forcing float division
3475 w, h = (float(v) for v in mobj.groups())
3476 if w > 0 and h > 0:
3477 ratio = w / h
3478 for f in formats:
3479 if f.get('vcodec') != 'none':
3480 f['stretched_ratio'] = ratio
3481 break
a709d873 3482 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3483 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3484 if thumbnail_url:
3485 thumbnails.append({
3486 'url': thumbnail_url,
ff2751ac 3487 })
fccf5021 3488 original_thumbnails = thumbnails.copy()
3489
0ba692ac 3490 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3491 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3492 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3493 thumbnail_names = [
bfec31be 3494 # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants
3495 # in resolution, these are not the custom thumbnail. So de-prioritize them
3496 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3497 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3498 ]
cca80fe6 3499 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3500 thumbnails.extend({
3501 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3502 video_id=video_id, name=name, ext=ext,
3503 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3504 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3505 for thumb in thumbnails:
cca80fe6 3506 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3507 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3508 self._remove_duplicate_formats(thumbnails)
fccf5021 3509 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3510
7ea65411 3511 category = get_first(microformats, 'category') or search_meta('genre')
3512 channel_id = str_or_none(
3513 get_first(video_details, 'channelId')
3514 or get_first(microformats, 'externalChannelId')
3515 or search_meta('channelId'))
7ea65411 3516 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3517
3518 live_content = get_first(video_details, 'isLiveContent')
3519 is_upcoming = get_first(video_details, 'isUpcoming')
3520 if is_live is None:
3521 if is_upcoming or live_content is False:
3522 is_live = False
3523 if is_upcoming is None and (live_content or is_live):
3524 is_upcoming = False
adbc4ec4
THD
3525 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3526 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3527 if not duration and live_end_time and live_start_time:
3528 duration = live_end_time - live_start_time
3529
3530 if is_live and self.get_param('live_from_start'):
3531 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3532
720c3099 3533 formats.extend(self._extract_storyboard(player_responses, duration))
3534
3535 # Source is given priority since formats that throttle are given lower source_preference
3536 # When throttling issue is fully fixed, remove this
3537 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3538
545cc85d 3539 info = {
3540 'id': video_id,
39ca3b5c 3541 'title': video_title,
545cc85d 3542 'formats': formats,
3543 'thumbnails': thumbnails,
fccf5021 3544 # The best thumbnail that we are sure exists. Prevents unnecessary
3545 # URL checking if user don't care about getting the best possible thumbnail
3546 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3547 'description': video_description,
11f9be09 3548 'uploader': get_first(video_details, 'author'),
545cc85d 3549 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3550 'uploader_url': owner_profile_url,
3551 'channel_id': channel_id,
e0ddbd02 3552 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
545cc85d 3553 'duration': duration,
3554 'view_count': int_or_none(
11f9be09 3555 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3556 or search_meta('interactionCount')),
11f9be09 3557 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3558 'age_limit': 18 if (
11f9be09 3559 get_first(microformats, 'isFamilySafe') is False
545cc85d 3560 or search_meta('isFamilyFriendly') == 'false'
3561 or search_meta('og:restrictions:age') == '18+') else 0,
3562 'webpage_url': webpage_url,
3563 'categories': [category] if category else None,
3564 'tags': keywords,
11f9be09 3565 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3566 'is_live': is_live,
3567 'was_live': (False if is_live or is_upcoming or live_content is False
3568 else None if is_live is None or is_upcoming is None
3569 else live_content),
3570 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3571 'release_timestamp': live_start_time,
545cc85d 3572 }
b477fc13 3573
3944e7af 3574 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3575 if pctr:
ecdc9049 3576 def get_lang_code(track):
3577 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3578 or track.get('languageCode'))
3579
3580 # Converted into dicts to remove duplicates
3581 captions = {
3582 get_lang_code(sub): sub
3583 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3584 translation_languages = {
3585 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3586 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3587
774d79cc 3588 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3589 lang_subs = container.setdefault(lang_code, [])
545cc85d 3590 for fmt in self._SUBTITLE_FORMATS:
3591 query.update({
3592 'fmt': fmt,
3593 })
3594 lang_subs.append({
3595 'ext': fmt,
60f393e4 3596 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3597 'name': sub_name,
545cc85d 3598 })
7e72694b 3599
ecdc9049 3600 subtitles, automatic_captions = {}, {}
3601 for lang_code, caption_track in captions.items():
3602 base_url = caption_track.get('baseUrl')
1235d333 3603 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3604 if not base_url:
3605 continue
ecdc9049 3606 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3607 if caption_track.get('kind') != 'asr':
545cc85d 3608 if not lang_code:
3609 continue
3610 process_language(
ecdc9049 3611 subtitles, base_url, lang_code, lang_name, {})
3612 if not caption_track.get('isTranslatable'):
3613 continue
3944e7af 3614 for trans_code, trans_name in translation_languages.items():
3615 if not trans_code:
545cc85d 3616 continue
1235d333 3617 orig_trans_code = trans_code
ecdc9049 3618 if caption_track.get('kind') != 'asr':
18e49408 3619 if 'translated_subs' in self._configuration_arg('skip'):
3620 continue
ecdc9049 3621 trans_code += f'-{lang_code}'
3622 trans_name += format_field(lang_name, template=' from %s')
d49669ac 3623 # Add an "-orig" label to the original language so that it can be distinguished.
3624 # The subs are returned without "-orig" as well for compatibility
1235d333 3625 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3626 process_language(
d49669ac 3627 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3628 # Setting tlang=lang returns damaged subtitles.
d49669ac 3629 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3630 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
ecdc9049 3631 info['automatic_captions'] = automatic_captions
3632 info['subtitles'] = subtitles
7e72694b 3633
545cc85d 3634 parsed_url = compat_urllib_parse_urlparse(url)
3635 for component in [parsed_url.fragment, parsed_url.query]:
3636 query = compat_parse_qs(component)
3637 for k, v in query.items():
3638 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3639 d_k += '_time'
3640 if d_k not in info and k in s_ks:
3641 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3642
3643 # Youtube Music Auto-generated description
822b9d9c 3644 if video_description:
1890fc63 3645 mobj = re.search(
3646 r'''(?xs)
3647 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3648 (?P<album>[^\n]+)
3649 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3650 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3651 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3652 .+\nAuto-generated\ by\ YouTube\.\s*$
3653 ''', video_description)
822b9d9c 3654 if mobj:
822b9d9c
RA
3655 release_year = mobj.group('release_year')
3656 release_date = mobj.group('release_date')
3657 if release_date:
3658 release_date = release_date.replace('-', '')
3659 if not release_year:
545cc85d 3660 release_year = release_date[:4]
3661 info.update({
3662 'album': mobj.group('album'.strip()),
3663 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3664 'track': mobj.group('track').strip(),
3665 'release_date': release_date,
cc2db878 3666 'release_year': int_or_none(release_year),
545cc85d 3667 })
7e72694b 3668
545cc85d 3669 initial_data = None
3670 if webpage:
b7c47b74 3671 initial_data = self._search_json(
3672 self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', video_id, fatal=False)
545cc85d 3673 if not initial_data:
99e9e001 3674 query = {'videoId': video_id}
3675 query.update(self._get_checkok_params())
109dd3b2 3676 initial_data = self._extract_response(
3677 item_id=video_id, ep='next', fatal=False,
99e9e001 3678 ytcfg=master_ytcfg, query=query,
3679 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3680 note='Downloading initial data API JSON')
545cc85d 3681
19a03940 3682 try: # This will error if there is no livechat
c60ee3a2 3683 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3684 except (KeyError, IndexError, TypeError):
3685 pass
3686 else:
ecdc9049 3687 info.setdefault('subtitles', {})['live_chat'] = [{
19a03940 3688 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
c60ee3a2 3689 'video_id': video_id,
3690 'ext': 'json',
f6745c49 3691 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3692 }]
545cc85d 3693
3694 if initial_data:
7c365c21 3695 info['chapters'] = (
3696 self._extract_chapters_from_json(initial_data, duration)
3697 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3698 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3699 or None)
545cc85d 3700
17322130 3701 contents = traverse_obj(
3702 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3703 expected_type=list, default=[])
3704
3705 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3706 if vpir:
3707 stl = vpir.get('superTitleLink')
3708 if stl:
3709 stl = self._get_text(stl)
3710 if try_get(
3711 vpir,
3712 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3713 info['location'] = stl
3714 else:
affc4fef 3715 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3716 if mobj:
545cc85d 3717 info.update({
17322130 3718 'series': mobj.group(1),
3719 'season_number': int(mobj.group(2)),
3720 'episode_number': int(mobj.group(3)),
545cc85d 3721 })
17322130 3722 for tlb in (try_get(
3723 vpir,
3724 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3725 list) or []):
3726 tbr = tlb.get('toggleButtonRenderer') or {}
3727 for getter, regex in [(
3728 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3729 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3730 lambda x: x['accessibility'],
3731 lambda x: x['accessibilityData']['accessibilityData'],
3732 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3733 label = (try_get(tbr, getter, dict) or {}).get('label')
3734 if label:
3735 mobj = re.match(regex, label)
3736 if mobj:
3737 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3738 break
17322130 3739 sbr_tooltip = try_get(
3740 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3741 if sbr_tooltip:
3742 like_count, dislike_count = sbr_tooltip.split(' / ')
3743 info.update({
3744 'like_count': str_to_int(like_count),
3745 'dislike_count': str_to_int(dislike_count),
3746 })
3747 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3748 if vsir:
3749 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3750 info.update({
3751 'channel': self._get_text(vor, 'title'),
3752 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3753
3754 rows = try_get(
3755 vsir,
3756 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3757 list) or []
3758 multiple_songs = False
3759 for row in rows:
3760 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3761 multiple_songs = True
3762 break
3763 for row in rows:
3764 mrr = row.get('metadataRowRenderer') or {}
3765 mrr_title = mrr.get('title')
3766 if not mrr_title:
3767 continue
3768 mrr_title = self._get_text(mrr, 'title')
3769 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3770 if mrr_title == 'License':
3771 info['license'] = mrr_contents_text
3772 elif not multiple_songs:
3773 if mrr_title == 'Album':
3774 info['album'] = mrr_contents_text
3775 elif mrr_title == 'Artist':
3776 info['artist'] = mrr_contents_text
3777 elif mrr_title == 'Song':
3778 info['track'] = mrr_contents_text
545cc85d 3779
3780 fallbacks = {
3781 'channel': 'uploader',
3782 'channel_id': 'uploader_id',
3783 'channel_url': 'uploader_url',
3784 }
992f9a73 3785
17322130 3786 # The upload date for scheduled, live and past live streams / premieres in microformats
3787 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3788 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3789 upload_date = (
3790 unified_strdate(get_first(microformats, 'uploadDate'))
3791 or unified_strdate(search_meta('uploadDate')))
3792 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3793 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3794 info['upload_date'] = upload_date
992f9a73 3795
545cc85d 3796 for to, frm in fallbacks.items():
3797 if not info.get(to):
3798 info[to] = info.get(frm)
3799
3800 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3801 v = info.get(s_k)
3802 if v:
3803 info[d_k] = v
b84071c0 3804
11f9be09 3805 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3806 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3807 is_membersonly = None
b28f8d24 3808 is_premium = None
c224251a
M
3809 if initial_data and is_private is not None:
3810 is_membersonly = False
b28f8d24 3811 is_premium = False
47193e02 3812 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3813 badge_labels = set()
3814 for content in contents:
3815 if not isinstance(content, dict):
3816 continue
3817 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3818 for badge_label in badge_labels:
3819 if badge_label.lower() == 'members only':
3820 is_membersonly = True
3821 elif badge_label.lower() == 'premium':
3822 is_premium = True
3823 elif badge_label.lower() == 'unlisted':
3824 is_unlisted = True
c224251a 3825
c224251a
M
3826 info['availability'] = self._availability(
3827 is_private=is_private,
b28f8d24 3828 needs_premium=is_premium,
c224251a
M
3829 needs_subscription=is_membersonly,
3830 needs_auth=info['age_limit'] >= 18,
3831 is_unlisted=None if is_private is None else is_unlisted)
3832
a2160aa4 3833 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3834
11f9be09 3835 self.mark_watched(video_id, player_responses)
d77ab8e2 3836
545cc85d 3837 return info
c5e8d7af 3838
a61fd4cf 3839
a6213a49 3840class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3841
182bda88 3842 @staticmethod
3843 def passthrough_smuggled_data(func):
3844 def _smuggle(entries, smuggled_data):
3845 for entry in entries:
3846 # TODO: Convert URL to music.youtube instead.
3847 # Do we need to passthrough any other smuggled_data?
3848 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3849 yield entry
3850
3851 @functools.wraps(func)
3852 def wrapper(self, url):
3853 url, smuggled_data = unsmuggle_url(url, {})
3854 if self.is_music_url(url):
3855 smuggled_data['is_music_url'] = True
3856 info_dict = func(self, url, smuggled_data)
3857 if smuggled_data and info_dict.get('entries'):
3858 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3859 return info_dict
3860 return wrapper
3861
a6213a49 3862 def _extract_channel_id(self, webpage):
3863 channel_id = self._html_search_meta(
3864 'channelId', webpage, 'channel id', default=None)
3865 if channel_id:
3866 return channel_id
3867 channel_url = self._html_search_meta(
3868 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3869 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3870 'twitter:app:url:googleplay'), webpage, 'channel url')
3871 return self._search_regex(
3872 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3873 channel_url, 'channel id')
15f6397c 3874
8bdd16b4 3875 @staticmethod
cd7c66cf 3876 def _extract_basic_item_renderer(item):
3877 # Modified from _extract_grid_item_renderer
201c1459 3878 known_basic_renderers = (
a17526e4 3879 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 3880 )
3881 for key, renderer in item.items():
201c1459 3882 if not isinstance(renderer, dict):
cd7c66cf 3883 continue
201c1459 3884 elif key in known_basic_renderers:
3885 return renderer
3886 elif key.startswith('grid') and key.endswith('Renderer'):
3887 return renderer
8bdd16b4 3888
8bdd16b4 3889 def _grid_entries(self, grid_renderer):
3890 for item in grid_renderer['items']:
3891 if not isinstance(item, dict):
39b62db1 3892 continue
cd7c66cf 3893 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3894 if not isinstance(renderer, dict):
3895 continue
052e1350 3896 title = self._get_text(renderer, 'title')
fe93e2c4 3897
8bdd16b4 3898 # playlist
3899 playlist_id = renderer.get('playlistId')
3900 if playlist_id:
3901 yield self.url_result(
3902 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3903 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3904 video_title=title)
201c1459 3905 continue
8bdd16b4 3906 # video
3907 video_id = renderer.get('videoId')
3908 if video_id:
3909 yield self._extract_video(renderer)
201c1459 3910 continue
8bdd16b4 3911 # channel
3912 channel_id = renderer.get('channelId')
3913 if channel_id:
8bdd16b4 3914 yield self.url_result(
3915 'https://www.youtube.com/channel/%s' % channel_id,
3916 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3917 continue
3918 # generic endpoint URL support
3919 ep_url = urljoin('https://www.youtube.com/', try_get(
3920 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3921 compat_str))
3922 if ep_url:
3923 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3924 if ie.suitable(ep_url):
3925 yield self.url_result(
3926 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3927 break
8bdd16b4 3928
16aa9ea4 3929 def _music_reponsive_list_entry(self, renderer):
3930 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
3931 if video_id:
3932 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
3933 ie=YoutubeIE.ie_key(), video_id=video_id)
3934 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
3935 if playlist_id:
3936 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
3937 if video_id:
3938 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
3939 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3940 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
3941 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3942 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
3943 if browse_id:
3944 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
3945 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
3946
3d3dddc9 3947 def _shelf_entries_from_content(self, shelf_renderer):
3948 content = shelf_renderer.get('content')
3949 if not isinstance(content, dict):
8bdd16b4 3950 return
cd7c66cf 3951 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3952 if renderer:
3953 # TODO: add support for nested playlists so each shelf is processed
3954 # as separate playlist
3955 # TODO: this includes only first N items
86e5f3ed 3956 yield from self._grid_entries(renderer)
3d3dddc9 3957 renderer = content.get('horizontalListRenderer')
3958 if renderer:
3959 # TODO
3960 pass
8bdd16b4 3961
29f7c58a 3962 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3963 ep = try_get(
3964 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3965 compat_str)
3966 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3967 if shelf_url:
29f7c58a 3968 # Skipping links to another channels, note that checking for
3969 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3970 # will not work
3971 if skip_channels and '/channels?' in shelf_url:
3972 return
052e1350 3973 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3974 yield self.url_result(shelf_url, video_title=title)
3975 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 3976 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 3977
8bdd16b4 3978 def _playlist_entries(self, video_list_renderer):
3979 for content in video_list_renderer['contents']:
3980 if not isinstance(content, dict):
3981 continue
3982 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3983 if not isinstance(renderer, dict):
3984 continue
3985 video_id = renderer.get('videoId')
3986 if not video_id:
3987 continue
3988 yield self._extract_video(renderer)
07aeced6 3989
3462ffa8 3990 def _rich_entries(self, rich_grid_renderer):
3991 renderer = try_get(
70d5c17b 3992 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3993 video_id = renderer.get('videoId')
3994 if not video_id:
3995 return
3996 yield self._extract_video(renderer)
3997
8bdd16b4 3998 def _video_entry(self, video_renderer):
3999 video_id = video_renderer.get('videoId')
4000 if video_id:
4001 return self._extract_video(video_renderer)
dacb3a86 4002
ad210f4f 4003 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4004 url = urljoin('https://youtube.com', traverse_obj(
4005 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4006 if url:
4007 return self.url_result(
4008 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4009
8bdd16b4 4010 def _post_thread_entries(self, post_thread_renderer):
4011 post_renderer = try_get(
4012 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4013 if not post_renderer:
4014 return
4015 # video attachment
4016 video_renderer = try_get(
895b0931 4017 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4018 video_id = video_renderer.get('videoId')
4019 if video_id:
4020 entry = self._extract_video(video_renderer)
8bdd16b4 4021 if entry:
4022 yield entry
895b0931 4023 # playlist attachment
4024 playlist_id = try_get(
4025 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
4026 if playlist_id:
4027 yield self.url_result(
e28f1c0a 4028 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4029 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4030 # inline video links
4031 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4032 for run in runs:
4033 if not isinstance(run, dict):
4034 continue
4035 ep_url = try_get(
4036 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
4037 if not ep_url:
4038 continue
4039 if not YoutubeIE.suitable(ep_url):
4040 continue
4041 ep_video_id = YoutubeIE._match_id(ep_url)
4042 if video_id == ep_video_id:
4043 continue
895b0931 4044 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4045
8bdd16b4 4046 def _post_thread_continuation_entries(self, post_thread_continuation):
4047 contents = post_thread_continuation.get('contents')
4048 if not isinstance(contents, list):
4049 return
4050 for content in contents:
4051 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4052 if isinstance(renderer, dict):
4053 yield from self._post_thread_entries(renderer)
8bdd16b4 4054 continue
6b0b0a28 4055 renderer = content.get('videoRenderer')
4056 if isinstance(renderer, dict):
4057 yield self._video_entry(renderer)
07aeced6 4058
39ed931e 4059 r''' # unused
4060 def _rich_grid_entries(self, contents):
4061 for content in contents:
4062 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4063 if video_renderer:
4064 entry = self._video_entry(video_renderer)
4065 if entry:
4066 yield entry
4067 '''
52efa4b3 4068
a6213a49 4069 def _extract_entries(self, parent_renderer, continuation_list):
4070 # continuation_list is modified in-place with continuation_list = [continuation_token]
4071 continuation_list[:] = [None]
4072 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4073 for content in contents:
4074 if not isinstance(content, dict):
4075 continue
16aa9ea4 4076 is_renderer = traverse_obj(
4077 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4078 expected_type=dict)
a6213a49 4079 if not is_renderer:
4080 renderer = content.get('richItemRenderer')
4081 if renderer:
4082 for entry in self._rich_entries(renderer):
4083 yield entry
4084 continuation_list[0] = self._extract_continuation(parent_renderer)
4085 continue
4086 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4087 for isr_content in isr_contents:
4088 if not isinstance(isr_content, dict):
8bdd16b4 4089 continue
69184e41 4090
a6213a49 4091 known_renderers = {
4092 'playlistVideoListRenderer': self._playlist_entries,
4093 'gridRenderer': self._grid_entries,
a17526e4 4094 'reelShelfRenderer': self._grid_entries,
4095 'shelfRenderer': self._shelf_entries,
16aa9ea4 4096 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4097 'backstagePostThreadRenderer': self._post_thread_entries,
4098 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4099 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4100 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4101 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4102 }
4103 for key, renderer in isr_content.items():
4104 if key not in known_renderers:
4105 continue
4106 for entry in known_renderers[key](renderer):
4107 if entry:
4108 yield entry
4109 continuation_list[0] = self._extract_continuation(renderer)
4110 break
70d5c17b 4111
4112 if not continuation_list[0]:
a6213a49 4113 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4114
a6213a49 4115 if not continuation_list[0]:
4116 continuation_list[0] = self._extract_continuation(parent_renderer)
4117
4118 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4119 continuation_list = [None]
4120 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4121 tab_content = try_get(tab, lambda x: x['content'], dict)
4122 if not tab_content:
4123 return
3462ffa8 4124 parent_renderer = (
29f7c58a 4125 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4126 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4127 yield from extract_entries(parent_renderer)
3462ffa8 4128 continuation = continuation_list[0]
d069eca7 4129
8bdd16b4 4130 for page_num in itertools.count(1):
4131 if not continuation:
4132 break
99e9e001 4133 headers = self.generate_api_headers(
4134 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4135 response = self._extract_response(
86e5f3ed 4136 item_id=f'{item_id} page {page_num}',
fe93e2c4 4137 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4138 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4139
4140 if not response:
8bdd16b4 4141 break
ac56cf38 4142 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4143 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4144 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4145
69184e41 4146 known_continuation_renderers = {
4147 'playlistVideoListContinuation': self._playlist_entries,
4148 'gridContinuation': self._grid_entries,
4149 'itemSectionContinuation': self._post_thread_continuation_entries,
4150 'sectionListContinuation': extract_entries, # for feeds
4151 }
8bdd16b4 4152 continuation_contents = try_get(
69184e41 4153 response, lambda x: x['continuationContents'], dict) or {}
4154 continuation_renderer = None
4155 for key, value in continuation_contents.items():
4156 if key not in known_continuation_renderers:
3462ffa8 4157 continue
69184e41 4158 continuation_renderer = value
4159 continuation_list = [None]
86e5f3ed 4160 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4161 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4162 break
4163 if continuation_renderer:
4164 continue
c5e8d7af 4165
a1b535bd 4166 known_renderers = {
e4b98809 4167 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4168 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4169 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4170 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4171 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4172 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4173 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4174 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4175 }
cce889b9 4176 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4177 continuation_items = try_get(
cce889b9 4178 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4179 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4180 video_items_renderer = None
4181 for key, value in continuation_item.items():
4182 if key not in known_renderers:
8bdd16b4 4183 continue
a1b535bd 4184 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4185 continuation_list = [None]
86e5f3ed 4186 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4187 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4188 break
4189 if video_items_renderer:
4190 continue
8bdd16b4 4191 break
9558dcec 4192
8bdd16b4 4193 @staticmethod
7c219ea6 4194 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4195 for tab in tabs:
cd684175 4196 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4197 if renderer.get('selected') is True:
4198 return renderer
2b3c2546 4199 else:
7c219ea6 4200 if fatal:
4201 raise ExtractorError('Unable to find selected tab')
b82f815f 4202
61d3665d 4203 def _extract_uploader(self, data):
8bdd16b4 4204 uploader = {}
61d3665d 4205 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4206 owner = try_get(
4207 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4208 if owner:
61d3665d 4209 owner_text = owner.get('text')
4210 uploader['uploader'] = self._search_regex(
4211 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4212 uploader['uploader_id'] = try_get(
4213 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
4214 uploader['uploader_url'] = urljoin(
4215 'https://www.youtube.com/',
4216 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 4217 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4218
ac56cf38 4219 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4220 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4221 tags = []
b60419c5 4222
8bdd16b4 4223 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4224 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4225 renderer = try_get(
4226 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4227 if renderer:
b60419c5 4228 channel_name = renderer.get('title')
4229 channel_url = renderer.get('channelUrl')
4230 channel_id = renderer.get('externalId')
39ed931e 4231 else:
64c0d954 4232 renderer = try_get(
4233 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4234
8bdd16b4 4235 if renderer:
4236 title = renderer.get('title')
ecc97af3 4237 description = renderer.get('description', '')
b60419c5 4238 playlist_id = channel_id
4239 tags = renderer.get('keywords', '').split()
b60419c5 4240
301d07fc 4241 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4242 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4243 def _get_uncropped(url):
4244 return url_or_none((url or '').split('=')[0] + '=s0')
4245
4246 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4247 if avatar_thumbnails:
4248 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4249 if uncropped_avatar:
4250 avatar_thumbnails.append({
4251 'url': uncropped_avatar,
4252 'id': 'avatar_uncropped',
4253 'preference': 1
4254 })
4255
4256 channel_banners = self._extract_thumbnails(
4257 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4258 for banner in channel_banners:
4259 banner['preference'] = -10
4260
4261 if channel_banners:
4262 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4263 if uncropped_banner:
4264 channel_banners.append({
4265 'url': uncropped_banner,
4266 'id': 'banner_uncropped',
4267 'preference': -5
4268 })
4269
4270 primary_thumbnails = self._extract_thumbnails(
a17526e4 4271 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4272
3462ffa8 4273 if playlist_id is None:
70d5c17b 4274 playlist_id = item_id
f0d785d3 4275
4276 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4277 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4278 if title is None:
f0d785d3 4279 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4280 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4281 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4282
b60419c5 4283 metadata = {
4284 'playlist_id': playlist_id,
4285 'playlist_title': title,
4286 'playlist_description': description,
4287 'uploader': channel_name,
4288 'uploader_id': channel_id,
4289 'uploader_url': channel_url,
301d07fc 4290 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4291 'tags': tags,
f0d785d3 4292 'view_count': self._get_count(playlist_stats, 1),
4293 'availability': self._extract_availability(data),
4294 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4295 'playlist_count': self._get_count(playlist_stats, 0),
4296 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4297 }
4298 if not channel_id:
4299 metadata.update(self._extract_uploader(data))
4300 metadata.update({
4301 'channel': metadata['uploader'],
4302 'channel_id': metadata['uploader_id'],
4303 'channel_url': metadata['uploader_url']})
4304 return self.playlist_result(
d069eca7 4305 self._entries(
ac56cf38 4306 selected_tab, playlist_id, ytcfg,
4307 self._extract_account_syncid(ytcfg, data),
4308 self._extract_visitor_data(data, ytcfg)),
b60419c5 4309 **metadata)
73c4ac2c 4310
6e634cbe 4311 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4312 first_id = last_id = response = None
2be71994 4313 for page_num in itertools.count(1):
cd7c66cf 4314 videos = list(self._playlist_entries(playlist))
4315 if not videos:
4316 return
2be71994 4317 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4318 if start >= len(videos):
4319 return
24146491 4320 yield from videos[start:]
2be71994 4321 first_id = first_id or videos[0]['id']
4322 last_id = videos[-1]['id']
79360d99 4323 watch_endpoint = try_get(
4324 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4325 headers = self.generate_api_headers(
4326 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4327 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4328 query = {
4329 'playlistId': playlist_id,
4330 'videoId': watch_endpoint.get('videoId') or last_id,
4331 'index': watch_endpoint.get('index') or len(videos),
4332 'params': watch_endpoint.get('params') or 'OAE%3D'
4333 }
4334 response = self._extract_response(
4335 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4336 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4337 check_get_keys='contents'
4338 )
cd7c66cf 4339 playlist = try_get(
79360d99 4340 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4341
ac56cf38 4342 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4343 title = playlist.get('title') or try_get(
4344 data, lambda x: x['titleText']['simpleText'], compat_str)
4345 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4346
4347 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4348 playlist_url = urljoin(url, try_get(
4349 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4350 compat_str))
6e634cbe 4351
4352 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4353 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4354 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4355
4356 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4357 return self.url_result(
4358 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4359 video_title=title)
cd7c66cf 4360
8bdd16b4 4361 return self.playlist_result(
6e634cbe 4362 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4363 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4364
47193e02 4365 def _extract_availability(self, data):
4366 """
4367 Gets the availability of a given playlist/tab.
4368 Note: Unless YouTube tells us explicitly, we do not assume it is public
4369 @param data: response
4370 """
4371 is_private = is_unlisted = None
4372 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4373 badge_labels = self._extract_badges(renderer)
4374
4375 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4376 privacy_dropdown_entries = try_get(
4377 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4378 for renderer_dict in privacy_dropdown_entries:
4379 is_selected = try_get(
4380 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4381 if not is_selected:
4382 continue
052e1350 4383 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4384 if label:
4385 badge_labels.add(label.lower())
4386 break
4387
4388 for badge_label in badge_labels:
4389 if badge_label == 'unlisted':
4390 is_unlisted = True
4391 elif badge_label == 'private':
4392 is_private = True
4393 elif badge_label == 'public':
4394 is_unlisted = is_private = False
4395 return self._availability(is_private, False, False, False, is_unlisted)
4396
4397 @staticmethod
4398 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4399 sidebar_renderer = try_get(
4400 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4401 for item in sidebar_renderer:
4402 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4403 if renderer:
4404 return renderer
4405
ac56cf38 4406 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4407 """
4408 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4409 """
5d342002 4410 browse_id = params = None
47193e02 4411 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4412 if not renderer:
4413 return
4414 menu_renderer = try_get(
4415 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4416 for menu_item in menu_renderer:
4417 if not isinstance(menu_item, dict):
358de58c 4418 continue
47193e02 4419 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4420 text = try_get(
4421 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4422 if not text or text.lower() != 'show unavailable videos':
4423 continue
4424 browse_endpoint = try_get(
4425 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4426 browse_id = browse_endpoint.get('browseId')
4427 params = browse_endpoint.get('params')
4428 break
5d342002 4429
11f9be09 4430 headers = self.generate_api_headers(
99e9e001 4431 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4432 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4433 query = {
4434 'params': params or 'wgYCCAA=',
4435 'browseId': browse_id or 'VL%s' % item_id
4436 }
4437 return self._extract_response(
4438 item_id=item_id, headers=headers, query=query,
fe93e2c4 4439 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4440 note='Downloading API JSON with unavailable videos')
358de58c 4441
2762dbb1 4442 @functools.cached_property
a25bca9f 4443 def skip_webpage(self):
4444 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4445
ac56cf38 4446 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4447 retries = self.get_param('extractor_retries', 3)
62bff2c1 4448 count = -1
ac56cf38 4449 webpage = data = last_error = None
14fdfea9 4450 while count < retries:
62bff2c1 4451 count += 1
14fdfea9 4452 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4453 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4454 if last_error:
c705177d 4455 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4456 try:
4457 webpage = self._download_webpage(
4458 url, item_id,
4459 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4460 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4461 except ExtractorError as e:
4462 if isinstance(e.cause, network_exceptions):
4463 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4464 last_error = error_to_compat_str(e.cause or e.msg)
4465 if count < retries:
4466 continue
4467 if fatal:
4468 raise
4469 self.report_warning(error_to_compat_str(e))
14fdfea9 4470 break
ac56cf38 4471 else:
4472 try:
4473 self._extract_and_report_alerts(data)
4474 except ExtractorError as e:
4475 if fatal:
4476 raise
4477 self.report_warning(error_to_compat_str(e))
4478 break
4479
7c219ea6 4480 if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
ac56cf38 4481 break
4482
4483 last_error = 'Incomplete yt initial data received'
4484 if count >= retries:
4485 if fatal:
4486 raise ExtractorError(last_error)
4487 self.report_warning(last_error)
4488 break
4489
cd7c66cf 4490 return webpage, data
4491
a25bca9f 4492 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4493 """Use if failed to extract ytcfg (and data) from initial webpage"""
4494 if not ytcfg and self.is_authenticated:
4495 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4496 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4497 raise ExtractorError(
4498 f'{msg}. If you are not downloading private content, or '
4499 'your cookies are only for the first account and channel,'
4500 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4501 expected=True)
4502 self.report_warning(msg, only_once=True)
4503
ac56cf38 4504 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4505 data = None
a25bca9f 4506 if not self.skip_webpage:
ac56cf38 4507 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4508 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4509 # Reject webpage data if redirected to home page without explicitly requesting
4510 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4511 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4512 if (url != 'https://www.youtube.com/feed/recommended'
4513 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4514 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4515 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4516 if fatal:
4517 raise ExtractorError(msg, expected=True)
4518 self.report_warning(msg, only_once=True)
ac56cf38 4519 if not data:
a25bca9f 4520 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4521 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4522 return data, ytcfg
4523
4524 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4525 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4526 resolve_response = self._extract_response(
4527 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4528 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4529 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4530 for ep_key, ep in endpoints.items():
4531 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4532 if params:
4533 return self._extract_response(
4534 item_id=item_id, query=params, ep=ep, headers=headers,
4535 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4536 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4537 err_note = 'Failed to resolve url (does the playlist exist?)'
4538 if fatal:
4539 raise ExtractorError(err_note, expected=True)
4540 self.report_warning(err_note, item_id)
4541
a6213a49 4542 _SEARCH_PARAMS = None
4543
af5c1c55 4544 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4545 data = {'query': query}
4546 if params is NO_DEFAULT:
4547 params = self._SEARCH_PARAMS
4548 if params:
4549 data['params'] = params
16aa9ea4 4550
4551 content_keys = (
4552 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4553 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4554 # ytmusic search
4555 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4556 ('continuationContents', ),
4557 )
a25bca9f 4558 display_id = f'query "{query}"'
86e5f3ed 4559 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4560 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4561 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4562
a61fd4cf 4563 continuation_list = [None]
a25bca9f 4564 search = None
a6213a49 4565 for page_num in itertools.count(1):
a61fd4cf 4566 data.update(continuation_list[0] or {})
a25bca9f 4567 headers = self.generate_api_headers(
4568 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4569 search = self._extract_response(
a25bca9f 4570 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4571 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4572 slr_contents = traverse_obj(search, *content_keys)
4573 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4574 if not continuation_list[0]:
a6213a49 4575 break
4576
4577
4578class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4579 IE_DESC = 'YouTube Tabs'
4580 _VALID_URL = r'''(?x:
4581 https?://
4582 (?:\w+\.)?
4583 (?:
4584 youtube(?:kids)?\.com|
4585 %(invidious)s
4586 )/
4587 (?:
4588 (?P<channel_type>channel|c|user|browse)/|
4589 (?P<not_channel>
4590 feed/|hashtag/|
4591 (?:playlist|watch)\?.*?\blist=
4592 )|
4593 (?!(?:%(reserved_names)s)\b) # Direct URLs
4594 )
4595 (?P<id>[^/?\#&]+)
4596 )''' % {
4597 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4598 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4599 }
4600 IE_NAME = 'youtube:tab'
4601
4602 _TESTS = [{
4603 'note': 'playlists, multipage',
4604 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4605 'playlist_mincount': 94,
4606 'info_dict': {
4607 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4608 'title': 'Igor Kleiner - Playlists',
a6213a49 4609 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4610 'uploader': 'Igor Kleiner',
a6213a49 4611 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4612 'channel': 'Igor Kleiner',
4613 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4614 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4615 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4616 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4617 'channel_follower_count': int
a6213a49 4618 },
4619 }, {
4620 'note': 'playlists, multipage, different order',
4621 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4622 'playlist_mincount': 94,
4623 'info_dict': {
4624 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4625 'title': 'Igor Kleiner - Playlists',
a6213a49 4626 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4627 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4628 'uploader': 'Igor Kleiner',
4629 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4630 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4631 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4632 'channel': 'Igor Kleiner',
4633 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4634 'channel_follower_count': int
a6213a49 4635 },
4636 }, {
4637 'note': 'playlists, series',
4638 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4639 'playlist_mincount': 5,
4640 'info_dict': {
4641 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4642 'title': '3Blue1Brown - Playlists',
4643 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4644 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4645 'uploader': '3Blue1Brown',
976ae3ea 4646 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4647 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4648 'channel': '3Blue1Brown',
4649 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4650 'tags': ['Mathematics'],
6c73052c 4651 'channel_follower_count': int
a6213a49 4652 },
4653 }, {
4654 'note': 'playlists, singlepage',
4655 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4656 'playlist_mincount': 4,
4657 'info_dict': {
4658 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4659 'title': 'ThirstForScience - Playlists',
4660 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4661 'uploader': 'ThirstForScience',
4662 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4663 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4664 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4665 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4666 'tags': 'count:13',
4667 'channel': 'ThirstForScience',
6c73052c 4668 'channel_follower_count': int
a6213a49 4669 }
4670 }, {
4671 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4672 'only_matching': True,
4673 }, {
4674 'note': 'basic, single video playlist',
4675 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4676 'info_dict': {
4677 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4678 'uploader': 'Sergey M.',
4679 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4680 'title': 'youtube-dl public playlist',
976ae3ea 4681 'description': '',
4682 'tags': [],
4683 'view_count': int,
4684 'modified_date': '20201130',
4685 'channel': 'Sergey M.',
4686 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4687 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4688 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4689 },
4690 'playlist_count': 1,
4691 }, {
4692 'note': 'empty playlist',
4693 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4694 'info_dict': {
4695 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4696 'uploader': 'Sergey M.',
4697 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4698 'title': 'youtube-dl empty playlist',
976ae3ea 4699 'tags': [],
4700 'channel': 'Sergey M.',
4701 'description': '',
4702 'modified_date': '20160902',
4703 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4704 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4705 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4706 },
4707 'playlist_count': 0,
4708 }, {
4709 'note': 'Home tab',
4710 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4711 'info_dict': {
4712 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4713 'title': 'lex will - Home',
4714 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4715 'uploader': 'lex will',
4716 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4717 'channel': 'lex will',
4718 'tags': ['bible', 'history', 'prophesy'],
4719 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4720 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4721 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4722 'channel_follower_count': int
a6213a49 4723 },
4724 'playlist_mincount': 2,
4725 }, {
4726 'note': 'Videos tab',
4727 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4728 'info_dict': {
4729 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4730 'title': 'lex will - Videos',
4731 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4732 'uploader': 'lex will',
4733 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4734 'tags': ['bible', 'history', 'prophesy'],
4735 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4736 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4737 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4738 'channel': 'lex will',
6c73052c 4739 'channel_follower_count': int
a6213a49 4740 },
4741 'playlist_mincount': 975,
4742 }, {
4743 'note': 'Videos tab, sorted by popular',
4744 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4745 'info_dict': {
4746 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4747 'title': 'lex will - Videos',
4748 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4749 'uploader': 'lex will',
4750 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4751 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4752 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4753 'channel': 'lex will',
4754 'tags': ['bible', 'history', 'prophesy'],
4755 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4756 'channel_follower_count': int
a6213a49 4757 },
4758 'playlist_mincount': 199,
4759 }, {
4760 'note': 'Playlists tab',
4761 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4762 'info_dict': {
4763 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4764 'title': 'lex will - Playlists',
4765 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4766 'uploader': 'lex will',
4767 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4768 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4769 'channel': 'lex will',
4770 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4771 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4772 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4773 'channel_follower_count': int
a6213a49 4774 },
4775 'playlist_mincount': 17,
4776 }, {
4777 'note': 'Community tab',
4778 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4779 'info_dict': {
4780 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4781 'title': 'lex will - Community',
4782 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4783 'uploader': 'lex will',
4784 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4785 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4786 'channel': 'lex will',
4787 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4788 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4789 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4790 'channel_follower_count': int
a6213a49 4791 },
4792 'playlist_mincount': 18,
4793 }, {
4794 'note': 'Channels tab',
4795 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4796 'info_dict': {
4797 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4798 'title': 'lex will - Channels',
4799 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4800 'uploader': 'lex will',
4801 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4802 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4803 'channel': 'lex will',
4804 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4805 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4806 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4807 'channel_follower_count': int
a6213a49 4808 },
4809 'playlist_mincount': 12,
4810 }, {
4811 'note': 'Search tab',
4812 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4813 'playlist_mincount': 40,
4814 'info_dict': {
4815 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4816 'title': '3Blue1Brown - Search - linear algebra',
4817 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4818 'uploader': '3Blue1Brown',
4819 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4820 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4821 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4822 'tags': ['Mathematics'],
4823 'channel': '3Blue1Brown',
4824 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4825 'channel_follower_count': int
a6213a49 4826 },
4827 }, {
4828 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4829 'only_matching': True,
4830 }, {
4831 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4832 'only_matching': True,
4833 }, {
4834 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4835 'only_matching': True,
4836 }, {
4837 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4838 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4839 'info_dict': {
4840 'title': '29C3: Not my department',
4841 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4842 'uploader': 'Christiaan008',
4843 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4844 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4845 'tags': [],
4846 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4847 'view_count': int,
4848 'modified_date': '20150605',
4849 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4850 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4851 'channel': 'Christiaan008',
a6213a49 4852 },
4853 'playlist_count': 96,
4854 }, {
4855 'note': 'Large playlist',
4856 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4857 'info_dict': {
4858 'title': 'Uploads from Cauchemar',
4859 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4860 'uploader': 'Cauchemar',
4861 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4862 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4863 'tags': [],
4864 'modified_date': r're:\d{8}',
4865 'channel': 'Cauchemar',
4866 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4867 'view_count': int,
4868 'description': '',
4869 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4870 },
4871 'playlist_mincount': 1123,
976ae3ea 4872 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4873 }, {
4874 'note': 'even larger playlist, 8832 videos',
4875 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4876 'only_matching': True,
4877 }, {
4878 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4879 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4880 'info_dict': {
4881 'title': 'Uploads from Interstellar Movie',
4882 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4883 'uploader': 'Interstellar Movie',
4884 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4885 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4886 'tags': [],
4887 'view_count': int,
4888 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4889 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4890 'channel': 'Interstellar Movie',
4891 'description': '',
4892 'modified_date': r're:\d{8}',
a6213a49 4893 },
4894 'playlist_mincount': 21,
4895 }, {
4896 'note': 'Playlist with "show unavailable videos" button',
4897 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4898 'info_dict': {
4899 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4900 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4901 'uploader': 'Phim Siêu Nhân Nhật Bản',
4902 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4903 'view_count': int,
4904 'channel': 'Phim Siêu Nhân Nhật Bản',
4905 'tags': [],
4906 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4907 'description': '',
4908 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4909 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4910 'modified_date': r're:\d{8}',
a6213a49 4911 },
4912 'playlist_mincount': 200,
976ae3ea 4913 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4914 }, {
4915 'note': 'Playlist with unavailable videos in page 7',
4916 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4917 'info_dict': {
4918 'title': 'Uploads from BlankTV',
4919 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4920 'uploader': 'BlankTV',
4921 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 4922 'channel': 'BlankTV',
4923 'channel_url': 'https://www.youtube.com/c/blanktv',
4924 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4925 'view_count': int,
4926 'tags': [],
4927 'uploader_url': 'https://www.youtube.com/c/blanktv',
4928 'modified_date': r're:\d{8}',
4929 'description': '',
a6213a49 4930 },
4931 'playlist_mincount': 1000,
976ae3ea 4932 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4933 }, {
4934 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4935 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4936 'info_dict': {
4937 'title': 'Data Analysis with Dr Mike Pound',
4938 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4939 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4940 'uploader': 'Computerphile',
4941 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 4942 'uploader_url': 'https://www.youtube.com/user/Computerphile',
4943 'tags': [],
4944 'view_count': int,
4945 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4946 'channel_url': 'https://www.youtube.com/user/Computerphile',
4947 'channel': 'Computerphile',
a6213a49 4948 },
4949 'playlist_mincount': 11,
4950 }, {
4951 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4952 'only_matching': True,
4953 }, {
4954 'note': 'Playlist URL that does not actually serve a playlist',
4955 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4956 'info_dict': {
4957 'id': 'FqZTN594JQw',
4958 'ext': 'webm',
4959 'title': "Smiley's People 01 detective, Adventure Series, Action",
4960 'uploader': 'STREEM',
4961 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4962 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4963 'upload_date': '20150526',
4964 'license': 'Standard YouTube License',
4965 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4966 'categories': ['People & Blogs'],
4967 'tags': list,
4968 'view_count': int,
4969 'like_count': int,
a6213a49 4970 },
4971 'params': {
4972 'skip_download': True,
4973 },
4974 'skip': 'This video is not available.',
4975 'add_ie': [YoutubeIE.ie_key()],
4976 }, {
4977 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4978 'only_matching': True,
4979 }, {
4980 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4981 'only_matching': True,
4982 }, {
4983 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4984 'info_dict': {
6c73052c 4985 'id': 'GgL890LIznQ', # This will keep changing
a6213a49 4986 'ext': 'mp4',
976ae3ea 4987 'title': str,
a6213a49 4988 'uploader': 'Sky News',
4989 'uploader_id': 'skynews',
4990 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4991 'upload_date': r're:\d{8}',
976ae3ea 4992 'description': str,
a6213a49 4993 'categories': ['News & Politics'],
4994 'tags': list,
4995 'like_count': int,
6c73052c 4996 'release_timestamp': 1642502819,
976ae3ea 4997 'channel': 'Sky News',
4998 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4999 'age_limit': 0,
5000 'view_count': int,
6c73052c 5001 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5002 'playable_in_embed': True,
6c73052c 5003 'release_date': '20220118',
976ae3ea 5004 'availability': 'public',
5005 'live_status': 'is_live',
5006 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5007 'channel_follower_count': int
a6213a49 5008 },
5009 'params': {
5010 'skip_download': True,
5011 },
976ae3ea 5012 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5013 }, {
5014 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5015 'info_dict': {
5016 'id': 'a48o2S1cPoo',
5017 'ext': 'mp4',
5018 'title': 'The Young Turks - Live Main Show',
5019 'uploader': 'The Young Turks',
5020 'uploader_id': 'TheYoungTurks',
5021 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5022 'upload_date': '20150715',
5023 'license': 'Standard YouTube License',
5024 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5025 'categories': ['News & Politics'],
5026 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5027 'like_count': int,
a6213a49 5028 },
5029 'params': {
5030 'skip_download': True,
5031 },
5032 'only_matching': True,
5033 }, {
5034 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5035 'only_matching': True,
5036 }, {
5037 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5038 'only_matching': True,
5039 }, {
5040 'note': 'A channel that is not live. Should raise error',
5041 'url': 'https://www.youtube.com/user/numberphile/live',
5042 'only_matching': True,
5043 }, {
5044 'url': 'https://www.youtube.com/feed/trending',
5045 'only_matching': True,
5046 }, {
5047 'url': 'https://www.youtube.com/feed/library',
5048 'only_matching': True,
5049 }, {
5050 'url': 'https://www.youtube.com/feed/history',
5051 'only_matching': True,
5052 }, {
5053 'url': 'https://www.youtube.com/feed/subscriptions',
5054 'only_matching': True,
5055 }, {
5056 'url': 'https://www.youtube.com/feed/watch_later',
5057 'only_matching': True,
5058 }, {
5059 'note': 'Recommended - redirects to home page.',
5060 'url': 'https://www.youtube.com/feed/recommended',
5061 'only_matching': True,
5062 }, {
5063 'note': 'inline playlist with not always working continuations',
5064 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5065 'only_matching': True,
5066 }, {
5067 'url': 'https://www.youtube.com/course',
5068 'only_matching': True,
5069 }, {
5070 'url': 'https://www.youtube.com/zsecurity',
5071 'only_matching': True,
5072 }, {
5073 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5074 'only_matching': True,
5075 }, {
5076 'url': 'https://www.youtube.com/TheYoungTurks/live',
5077 'only_matching': True,
5078 }, {
5079 'url': 'https://www.youtube.com/hashtag/cctv9',
5080 'info_dict': {
5081 'id': 'cctv9',
5082 'title': '#cctv9',
976ae3ea 5083 'tags': [],
a6213a49 5084 },
5085 'playlist_mincount': 350,
5086 }, {
5087 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5088 'only_matching': True,
5089 }, {
5090 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5091 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5092 'only_matching': True
5093 }, {
5094 'note': '/browse/ should redirect to /channel/',
5095 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5096 'only_matching': True
5097 }, {
5098 'note': 'VLPL, should redirect to playlist?list=PL...',
5099 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5100 'info_dict': {
5101 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5102 'uploader': 'NoCopyrightSounds',
5103 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5104 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5105 'title': 'NCS Releases',
976ae3ea 5106 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5107 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5108 'modified_date': r're:\d{8}',
5109 'view_count': int,
5110 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5111 'tags': [],
5112 'channel': 'NoCopyrightSounds',
a6213a49 5113 },
5114 'playlist_mincount': 166,
976ae3ea 5115 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5116 }, {
5117 'note': 'Topic, should redirect to playlist?list=UU...',
5118 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5119 'info_dict': {
5120 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5121 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5122 'title': 'Uploads from Royalty Free Music - Topic',
5123 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5124 'tags': [],
5125 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5126 'channel': 'Royalty Free Music - Topic',
5127 'view_count': int,
5128 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5129 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5130 'modified_date': r're:\d{8}',
5131 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5132 'description': '',
a6213a49 5133 },
5134 'expected_warnings': [
a6213a49 5135 'The URL does not have a videos tab',
976ae3ea 5136 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5137 ],
5138 'playlist_mincount': 101,
5139 }, {
5140 'note': 'Topic without a UU playlist',
5141 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5142 'info_dict': {
5143 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5144 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5145 'tags': [],
a6213a49 5146 },
5147 'expected_warnings': [
976ae3ea 5148 'the playlist redirect gave error',
a6213a49 5149 ],
5150 'playlist_mincount': 9,
5151 }, {
5152 'note': 'Youtube music Album',
5153 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5154 'info_dict': {
5155 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5156 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5157 'tags': [],
5158 'view_count': int,
5159 'description': '',
5160 'availability': 'unlisted',
5161 'modified_date': r're:\d{8}',
a6213a49 5162 },
5163 'playlist_count': 50,
5164 }, {
5165 'note': 'unlisted single video playlist',
5166 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5167 'info_dict': {
5168 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5169 'uploader': 'colethedj',
5170 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5171 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5172 'availability': 'unlisted',
5173 'tags': [],
5174 'modified_date': '20211208',
5175 'channel': 'colethedj',
5176 'view_count': int,
5177 'description': '',
5178 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5179 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5180 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5181 },
5182 'playlist_count': 1,
5183 }, {
5184 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5185 'url': 'https://www.youtube.com/feed/recommended',
5186 'info_dict': {
5187 'id': 'recommended',
5188 'title': 'recommended',
6c73052c 5189 'tags': [],
a6213a49 5190 },
5191 'playlist_mincount': 50,
5192 'params': {
5193 'skip_download': True,
5194 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5195 },
5196 }, {
5197 'note': 'API Fallback: /videos tab, sorted by oldest first',
5198 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5199 'info_dict': {
5200 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5201 'title': 'Cody\'sLab - Videos',
5202 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5203 'uploader': 'Cody\'sLab',
5204 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5205 'channel': 'Cody\'sLab',
5206 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5207 'tags': [],
5208 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5209 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5210 'channel_follower_count': int
a6213a49 5211 },
5212 'playlist_mincount': 650,
5213 'params': {
5214 'skip_download': True,
5215 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5216 },
5217 }, {
5218 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5219 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5220 'info_dict': {
5221 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5222 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5223 'title': 'Uploads from Royalty Free Music - Topic',
5224 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5225 'modified_date': r're:\d{8}',
5226 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5227 'description': '',
5228 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5229 'tags': [],
5230 'channel': 'Royalty Free Music - Topic',
5231 'view_count': int,
5232 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5233 },
5234 'expected_warnings': [
976ae3ea 5235 'does not have a videos tab',
5236 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5237 ],
5238 'playlist_mincount': 101,
5239 'params': {
5240 'skip_download': True,
5241 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5242 },
7c219ea6 5243 }, {
5244 'note': 'non-standard redirect to regional channel',
5245 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5246 'only_matching': True
61d3665d 5247 }, {
5248 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5249 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5250 'info_dict': {
5251 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5252 'modified_date': '20220407',
5253 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5254 'tags': [],
5255 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5256 'uploader': 'pukkandan',
5257 'availability': 'unlisted',
5258 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5259 'channel': 'pukkandan',
5260 'description': 'Test for collaborative playlist',
5261 'title': 'yt-dlp test - collaborative playlist',
5262 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5263 },
5264 'playlist_mincount': 2
a6213a49 5265 }]
5266
5267 @classmethod
5268 def suitable(cls, url):
86e5f3ed 5269 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5270
64f36541 5271 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5272
182bda88 5273 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5274 def _real_extract(self, url, smuggled_data):
cd7c66cf 5275 item_id = self._match_id(url)
5276 url = compat_urlparse.urlunparse(
5277 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5278 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5279
fe03a6cd 5280 def get_mobj(url):
37e57a9f 5281 mobj = self._URL_RE.match(url).groupdict()
07cce701 5282 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5283 return mobj
5284
37e57a9f 5285 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5286 # Youtube returns incomplete data if tabname is not lower case
5287 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5288 if is_channel:
5289 if smuggled_data.get('is_music_url'):
37e57a9f 5290 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5291 item_id = item_id[2:]
37e57a9f 5292 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5293 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5294 mdata = self._extract_tab_endpoint(
37e57a9f 5295 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5296 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5297 get_all=False, expected_type=compat_str)
ac56cf38 5298 if not murl:
37e57a9f 5299 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5300 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5301 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5302 pre = f'https://www.youtube.com/channel/{item_id}'
5303
64f36541 5304 original_tab_name = tab
fe03a6cd 5305 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5306 # Home URLs should redirect to /videos/
37e57a9f 5307 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5308 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5309 tab = '/videos'
5310
5311 url = ''.join((pre, tab, post))
5312 mobj = get_mobj(url)
cd7c66cf 5313
5314 # Handle both video/playlist URLs
201c1459 5315 qs = parse_qs(url)
86e5f3ed 5316 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5317
fe03a6cd 5318 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5319 if not playlist_id:
fe03a6cd 5320 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5321 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5322 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5323 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5324 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5325 mobj = get_mobj(url)
cd7c66cf 5326
5327 if video_id and playlist_id:
a06916d9 5328 if self.get_param('noplaylist'):
37e57a9f 5329 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5330 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5331 ie=YoutubeIE.ie_key(), video_id=video_id)
5332 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5333
ac56cf38 5334 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5335
7c219ea6 5336 # YouTube may provide a non-standard redirect to the regional channel
5337 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5338 redirect_url = traverse_obj(
5339 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5340 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5341 redirect_url = ''.join((
5342 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5343 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5344 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5345
37e57a9f 5346 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5347 if tabs:
5348 selected_tab = self._extract_selected_tab(tabs)
64f36541 5349 selected_tab_name = selected_tab.get('title', '').lower()
5350 if selected_tab_name == 'home':
5351 selected_tab_name = 'featured'
5352 requested_tab_name = mobj['tab'][1:]
09f1580e 5353 if 'no-youtube-channel-redirect' not in compat_opts:
64f36541 5354 if requested_tab_name == 'live':
09f1580e 5355 # Live tab should have redirected to the video
5356 raise ExtractorError('The channel is not currently live', expected=True)
64f36541 5357 if requested_tab_name not in ('', selected_tab_name):
5358 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5359 if not original_tab_name:
5360 if item_id[:2] == 'UC':
5361 # Topic channels don't have /videos. Use the equivalent playlist instead
5362 pl_id = f'UU{item_id[2:]}'
5363 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5364 try:
5365 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5366 except ExtractorError:
5367 redirect_warning += ' and the playlist redirect gave error'
5368 else:
5369 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5370 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5371 if selected_tab_name and selected_tab_name != requested_tab_name:
5372 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5373 else:
5374 raise ExtractorError(redirect_warning, expected=True)
18db7548 5375
37e57a9f 5376 if redirect_warning:
64f36541 5377 self.to_screen(redirect_warning)
37e57a9f 5378 self.write_debug(f'Final URL: {url}')
18db7548 5379
358de58c 5380 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5381 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5382 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5383 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5384 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5385 if tabs:
ac56cf38 5386 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5387
37e57a9f 5388 playlist = traverse_obj(
5389 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5390 if playlist:
ac56cf38 5391 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5392
37e57a9f 5393 video_id = traverse_obj(
5394 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5395 if video_id:
09f1580e 5396 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5397 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5398 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5399 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5400
8bdd16b4 5401 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5402
c5e8d7af 5403
8bdd16b4 5404class YoutubePlaylistIE(InfoExtractor):
96565c7e 5405 IE_DESC = 'YouTube playlists'
8bdd16b4 5406 _VALID_URL = r'''(?x)(?:
5407 (?:https?://)?
5408 (?:\w+\.)?
5409 (?:
5410 (?:
5411 youtube(?:kids)?\.com|
d9190e44 5412 %(invidious)s
8bdd16b4 5413 )
5414 /.*?\?.*?\blist=
5415 )?
5416 (?P<id>%(playlist_id)s)
d9190e44
RH
5417 )''' % {
5418 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5419 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5420 }
8bdd16b4 5421 IE_NAME = 'youtube:playlist'
cdc628a4 5422 _TESTS = [{
8bdd16b4 5423 'note': 'issue #673',
5424 'url': 'PLBB231211A4F62143',
cdc628a4 5425 'info_dict': {
8bdd16b4 5426 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5427 'id': 'PLBB231211A4F62143',
976ae3ea 5428 'uploader': 'Wickman',
8bdd16b4 5429 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5430 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5431 'view_count': int,
5432 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5433 'modified_date': r're:\d{8}',
5434 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5435 'channel': 'Wickman',
5436 'tags': [],
5437 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5438 },
5439 'playlist_mincount': 29,
5440 }, {
5441 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5442 'info_dict': {
5443 'title': 'YDL_safe_search',
5444 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5445 },
5446 'playlist_count': 2,
5447 'skip': 'This playlist is private',
9558dcec 5448 }, {
8bdd16b4 5449 'note': 'embedded',
5450 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5451 'playlist_count': 4,
9558dcec 5452 'info_dict': {
8bdd16b4 5453 'title': 'JODA15',
5454 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5455 'uploader': 'milan',
5456 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5457 'description': '',
5458 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5459 'tags': [],
5460 'modified_date': '20140919',
5461 'view_count': int,
5462 'channel': 'milan',
5463 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5464 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5465 },
5466 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5467 }, {
8bdd16b4 5468 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 5469 'playlist_mincount': 654,
8bdd16b4 5470 'info_dict': {
5471 'title': '2018 Chinese New Singles (11/6 updated)',
5472 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5473 'uploader': 'LBK',
5474 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5475 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5476 'channel': 'LBK',
5477 'view_count': int,
5478 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5479 'tags': [],
5480 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5481 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5482 'modified_date': r're:\d{8}',
5483 },
5484 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5485 }, {
29f7c58a 5486 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5487 'only_matching': True,
5488 }, {
5489 # music album playlist
5490 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5491 'only_matching': True,
5492 }]
5493
5494 @classmethod
5495 def suitable(cls, url):
201c1459 5496 if YoutubeTabIE.suitable(url):
5497 return False
49a57e70 5498 from ..utils import parse_qs
201c1459 5499 qs = parse_qs(url)
5500 if qs.get('v', [None])[0]:
5501 return False
86e5f3ed 5502 return super().suitable(url)
29f7c58a 5503
5504 def _real_extract(self, url):
5505 playlist_id = self._match_id(url)
46953e7e 5506 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5507 url = update_url_query(
5508 'https://www.youtube.com/playlist',
5509 parse_qs(url) or {'list': playlist_id})
5510 if is_music_url:
5511 url = smuggle_url(url, {'is_music_url': True})
5512 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5513
5514
5515class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5516 IE_DESC = 'youtu.be'
29f7c58a 5517 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5518 _TESTS = [{
8bdd16b4 5519 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5520 'info_dict': {
5521 'id': 'yeWKywCrFtk',
5522 'ext': 'mp4',
5523 'title': 'Small Scale Baler and Braiding Rugs',
5524 'uploader': 'Backus-Page House Museum',
5525 'uploader_id': 'backuspagemuseum',
5526 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5527 'upload_date': '20161008',
5528 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5529 'categories': ['Nonprofits & Activism'],
5530 'tags': list,
5531 'like_count': int,
976ae3ea 5532 'age_limit': 0,
5533 'playable_in_embed': True,
5534 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5535 'channel': 'Backus-Page House Museum',
5536 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5537 'live_status': 'not_live',
5538 'view_count': int,
5539 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5540 'availability': 'public',
5541 'duration': 59,
8bdd16b4 5542 },
5543 'params': {
5544 'noplaylist': True,
5545 'skip_download': True,
5546 },
39e7107d 5547 }, {
8bdd16b4 5548 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5549 'only_matching': True,
cdc628a4
PH
5550 }]
5551
8bdd16b4 5552 def _real_extract(self, url):
5ad28e7f 5553 mobj = self._match_valid_url(url)
29f7c58a 5554 video_id = mobj.group('id')
5555 playlist_id = mobj.group('playlist_id')
8bdd16b4 5556 return self.url_result(
29f7c58a 5557 update_url_query('https://www.youtube.com/watch', {
5558 'v': video_id,
5559 'list': playlist_id,
5560 'feature': 'youtu.be',
5561 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5562
5563
b6ce9bb0 5564class YoutubeLivestreamEmbedIE(InfoExtractor):
5565 IE_DESC = 'YouTube livestream embeds'
5566 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5567 _TESTS = [{
5568 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5569 'only_matching': True,
5570 }]
5571
5572 def _real_extract(self, url):
5573 channel_id = self._match_id(url)
5574 return self.url_result(
5575 f'https://www.youtube.com/channel/{channel_id}/live',
5576 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5577
5578
8bdd16b4 5579class YoutubeYtUserIE(InfoExtractor):
96565c7e 5580 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5581 IE_NAME = 'youtube:user'
8bdd16b4 5582 _VALID_URL = r'ytuser:(?P<id>.+)'
5583 _TESTS = [{
5584 'url': 'ytuser:phihag',
5585 'only_matching': True,
5586 }]
5587
5588 def _real_extract(self, url):
5589 user_id = self._match_id(url)
5590 return self.url_result(
c586f9e8 5591 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5592 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5593
b05654f0 5594
3d3dddc9 5595class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5596 IE_NAME = 'youtube:favorites'
96565c7e 5597 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5598 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5599 _LOGIN_REQUIRED = True
5600 _TESTS = [{
5601 'url': ':ytfav',
5602 'only_matching': True,
5603 }, {
5604 'url': ':ytfavorites',
5605 'only_matching': True,
5606 }]
5607
5608 def _real_extract(self, url):
5609 return self.url_result(
5610 'https://www.youtube.com/playlist?list=LL',
5611 ie=YoutubeTabIE.ie_key())
5612
5613
ca5300c7 5614class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5615 IE_NAME = 'youtube:notif'
5616 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5617 _VALID_URL = r':ytnotif(?:ication)?s?'
5618 _LOGIN_REQUIRED = True
5619 _TESTS = [{
5620 'url': ':ytnotif',
5621 'only_matching': True,
5622 }, {
5623 'url': ':ytnotifications',
5624 'only_matching': True,
5625 }]
5626
5627 def _extract_notification_menu(self, response, continuation_list):
5628 notification_list = traverse_obj(
5629 response,
5630 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5631 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5632 expected_type=list) or []
5633 continuation_list[0] = None
5634 for item in notification_list:
5635 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5636 if entry:
5637 yield entry
5638 continuation = item.get('continuationItemRenderer')
5639 if continuation:
5640 continuation_list[0] = continuation
5641
5642 def _extract_notification_renderer(self, notification):
5643 video_id = traverse_obj(
5644 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5645 url = f'https://www.youtube.com/watch?v={video_id}'
5646 channel_id = None
5647 if not video_id:
5648 browse_ep = traverse_obj(
5649 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5650 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5651 post_id = self._search_regex(
5652 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5653 'post id', default=None)
5654 if not channel_id or not post_id:
5655 return
5656 # The direct /post url redirects to this in the browser
5657 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5658
5659 channel = traverse_obj(
5660 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5661 expected_type=str)
c7a7baaa 5662 notification_title = self._get_text(notification, 'shortMessage')
5663 if notification_title:
5664 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5665 # TODO: handle recommended videos
ca5300c7 5666 title = self._search_regex(
c7a7baaa 5667 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5668 'video title', default=None)
ca5300c7 5669 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5670 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5671 else None)
5672 return {
5673 '_type': 'url',
5674 'url': url,
5675 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5676 'video_id': video_id,
5677 'title': title,
5678 'channel_id': channel_id,
5679 'channel': channel,
5680 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5681 'upload_date': upload_date,
5682 }
5683
5684 def _notification_menu_entries(self, ytcfg):
5685 continuation_list = [None]
5686 response = None
5687 for page in itertools.count(1):
5688 ctoken = traverse_obj(
5689 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5690 response = self._extract_response(
5691 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5692 ep='notification/get_notification_menu', check_get_keys='actions',
5693 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5694 yield from self._extract_notification_menu(response, continuation_list)
5695 if not continuation_list[0]:
5696 break
5697
5698 def _real_extract(self, url):
5699 display_id = 'notifications'
5700 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5701 self._report_playlist_authcheck(ytcfg)
5702 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5703
5704
a6213a49 5705class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5706 IE_DESC = 'YouTube search'
78caa52a 5707 IE_NAME = 'youtube:search'
b05654f0 5708 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5709 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5710 _TESTS = [{
5711 'url': 'ytsearch5:youtube-dl test video',
5712 'playlist_count': 5,
5713 'info_dict': {
5714 'id': 'youtube-dl test video',
5715 'title': 'youtube-dl test video',
5716 }
5717 }]
b05654f0 5718
a61fd4cf 5719
5f7cb91a 5720class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5721 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5722 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5723 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5724 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5725 _TESTS = [{
5726 'url': 'ytsearchdate5:youtube-dl test video',
5727 'playlist_count': 5,
5728 'info_dict': {
5729 'id': 'youtube-dl test video',
5730 'title': 'youtube-dl test video',
5731 }
5732 }]
75dff0ee 5733
c9ae7b95 5734
a6213a49 5735class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5736 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5737 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5738 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5739 _TESTS = [{
5740 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5741 'playlist_mincount': 5,
5742 'info_dict': {
11f9be09 5743 'id': 'youtube-dl test video',
3462ffa8 5744 'title': 'youtube-dl test video',
5745 }
a61fd4cf 5746 }, {
5747 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5748 'playlist_mincount': 5,
5749 'info_dict': {
5750 'id': 'python',
5751 'title': 'python',
5752 }
ad210f4f 5753 }, {
5754 'url': 'https://www.youtube.com/results?search_query=%23cats',
5755 'playlist_mincount': 1,
5756 'info_dict': {
5757 'id': '#cats',
5758 'title': '#cats',
5759 'entries': [{
5760 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5761 'title': '#cats',
5762 }],
5763 },
3462ffa8 5764 }, {
5765 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5766 'only_matching': True,
5767 }]
5768
5769 def _real_extract(self, url):
4dfbf869 5770 qs = parse_qs(url)
386e1dd9 5771 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5772 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5773
5774
16aa9ea4 5775class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
455a15e2 5776 IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
16aa9ea4 5777 IE_NAME = 'youtube:music:search_url'
5778 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5779 _TESTS = [{
5780 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5781 'playlist_count': 16,
5782 'info_dict': {
5783 'id': 'royalty free music',
5784 'title': 'royalty free music',
5785 }
5786 }, {
5787 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5788 'playlist_mincount': 30,
5789 'info_dict': {
5790 'id': 'royalty free music - songs',
5791 'title': 'royalty free music - songs',
5792 },
5793 'params': {'extract_flat': 'in_playlist'}
5794 }, {
5795 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5796 'playlist_mincount': 30,
5797 'info_dict': {
5798 'id': 'royalty free music - community playlists',
5799 'title': 'royalty free music - community playlists',
5800 },
5801 'params': {'extract_flat': 'in_playlist'}
5802 }]
5803
5804 _SECTIONS = {
5805 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5806 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5807 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5808 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5809 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5810 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5811 }
5812
5813 def _real_extract(self, url):
5814 qs = parse_qs(url)
5815 query = (qs.get('search_query') or qs.get('q'))[0]
5816 params = qs.get('sp', (None,))[0]
5817 if params:
5818 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5819 else:
5820 section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
5821 params = self._SECTIONS.get(section)
5822 if not params:
5823 section = None
5824 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5825 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5826
5827
182bda88 5828class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5829 """
25f14e9f 5830 Base class for feed extractors
82d02080 5831 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5832 """
b2e8bc1b 5833 _LOGIN_REQUIRED = True
82d02080 5834 _FEED_NAME = 'feeds'
a25bca9f 5835
5836 def _real_initialize(self):
5837 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5838
82d02080 5839 @classproperty
d7ae0639 5840 def IE_NAME(self):
82d02080 5841 return f'youtube:{self._FEED_NAME}'
04cc9617 5842
3853309f 5843 def _real_extract(self, url):
3d3dddc9 5844 return self.url_result(
182bda88 5845 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5846
5847
ef2f3c7f 5848class YoutubeWatchLaterIE(InfoExtractor):
5849 IE_NAME = 'youtube:watchlater'
96565c7e 5850 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5851 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5852 _TESTS = [{
8bdd16b4 5853 'url': ':ytwatchlater',
bc7a9cd8
S
5854 'only_matching': True,
5855 }]
25f14e9f
S
5856
5857 def _real_extract(self, url):
ef2f3c7f 5858 return self.url_result(
5859 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5860
5861
25f14e9f 5862class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5863 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5864 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5865 _FEED_NAME = 'recommended'
45db527f 5866 _LOGIN_REQUIRED = False
3d3dddc9 5867 _TESTS = [{
5868 'url': ':ytrec',
5869 'only_matching': True,
5870 }, {
5871 'url': ':ytrecommended',
5872 'only_matching': True,
5873 }, {
5874 'url': 'https://youtube.com',
5875 'only_matching': True,
5876 }]
1ed5b5c9 5877
1ed5b5c9 5878
25f14e9f 5879class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5880 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5881 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5882 _FEED_NAME = 'subscriptions'
3d3dddc9 5883 _TESTS = [{
5884 'url': ':ytsubs',
5885 'only_matching': True,
5886 }, {
5887 'url': ':ytsubscriptions',
5888 'only_matching': True,
5889 }]
1ed5b5c9 5890
1ed5b5c9 5891
25f14e9f 5892class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5893 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5894 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5895 _FEED_NAME = 'history'
3d3dddc9 5896 _TESTS = [{
5897 'url': ':ythistory',
5898 'only_matching': True,
5899 }]
1ed5b5c9
JMF
5900
5901
6e634cbe 5902class YoutubeStoriesIE(InfoExtractor):
5903 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
5904 IE_NAME = 'youtube:stories'
5905 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
5906 _TESTS = [{
5907 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
5908 'only_matching': True,
5909 }]
5910
5911 def _real_extract(self, url):
5912 playlist_id = f'RLTD{self._match_id(url)}'
5913 return self.url_result(
5914 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
5915 ie=YoutubeTabIE, video_id=playlist_id)
5916
5917
15870e90
PH
5918class YoutubeTruncatedURLIE(InfoExtractor):
5919 IE_NAME = 'youtube:truncated_url'
5920 IE_DESC = False # Do not list
975d35db 5921 _VALID_URL = r'''(?x)
b95aab84
PH
5922 (?:https?://)?
5923 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5924 (?:watch\?(?:
c4808c60 5925 feature=[a-z_]+|
b95aab84
PH
5926 annotation_id=annotation_[^&]+|
5927 x-yt-cl=[0-9]+|
c1708b89 5928 hl=[^&]*|
287be8c6 5929 t=[0-9]+
b95aab84
PH
5930 )?
5931 |
5932 attribution_link\?a=[^&]+
5933 )
5934 $
975d35db 5935 '''
15870e90 5936
c4808c60 5937 _TESTS = [{
2d3d2997 5938 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 5939 'only_matching': True,
dc2fc736 5940 }, {
2d3d2997 5941 'url': 'https://www.youtube.com/watch?',
dc2fc736 5942 'only_matching': True,
b95aab84
PH
5943 }, {
5944 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5945 'only_matching': True,
5946 }, {
5947 'url': 'https://www.youtube.com/watch?feature=foo',
5948 'only_matching': True,
c1708b89
PH
5949 }, {
5950 'url': 'https://www.youtube.com/watch?hl=en-GB',
5951 'only_matching': True,
287be8c6
PH
5952 }, {
5953 'url': 'https://www.youtube.com/watch?t=2372',
5954 'only_matching': True,
c4808c60
PH
5955 }]
5956
15870e90
PH
5957 def _real_extract(self, url):
5958 raise ExtractorError(
78caa52a
PH
5959 'Did you forget to quote the URL? Remember that & is a meta '
5960 'character in most shells, so you want to put the URL in quotes, '
3867038a 5961 'like youtube-dl '
2d3d2997 5962 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 5963 ' or simply youtube-dl BaW_jenozKc .',
15870e90 5964 expected=True)
772fd5cc
PH
5965
5966
3cd786db 5967class YoutubeClipIE(InfoExtractor):
5968 IE_NAME = 'youtube:clip'
5969 IE_DESC = False # Do not list
5970 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5971
5972 def _real_extract(self, url):
5973 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5974 return self.url_result(url, 'Generic')
5975
5976
772fd5cc
PH
5977class YoutubeTruncatedIDIE(InfoExtractor):
5978 IE_NAME = 'youtube:truncated_id'
5979 IE_DESC = False # Do not list
b95aab84 5980 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
5981
5982 _TESTS = [{
5983 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5984 'only_matching': True,
5985 }]
5986
5987 def _real_extract(self, url):
5988 video_id = self._match_id(url)
5989 raise ExtractorError(
86e5f3ed 5990 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 5991 expected=True)