]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cleanup] Minor fixes
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
c26f9b99 5import enum
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
720c3099 9import math
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
46383212 13import sys
f8271158 14import threading
8a784c74 15import time
e0df6211 16import traceback
14f25df2 17import urllib.error
ac668111 18import urllib.parse
c5e8d7af 19
b05654f0 20from .common import InfoExtractor, SearchInfoExtractor
25836db6 21from .openload import PhantomJSwrapper
14f25df2 22from ..compat import functools
545cc85d 23from ..jsinterp import JSInterpreter
4bb4a188 24from ..utils import (
f8271158 25 NO_DEFAULT,
26 ExtractorError,
693f0600 27 UserNotLive,
720c3099 28 bug_reports_message,
82d02080 29 classproperty,
c5e8d7af 30 clean_html,
d92f5d5a 31 datetime_from_str,
11f9be09 32 dict_get,
7a32c70d 33 filter_dict,
2d30521a 34 float_or_none,
11f9be09 35 format_field,
ff91cf74 36 get_first,
dd27fd17 37 int_or_none,
641ad5d8 38 is_html,
34921b43 39 join_nonempty,
48416bc4 40 js_to_json,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
4dfbf869 48 parse_qs,
dca3ff4a 49 qualities,
3995d37d 50 remove_start,
cf7e015f 51 smuggle_url,
dbdaaa23 52 str_or_none,
c93d53f5 53 str_to_int,
f3aa3c3f 54 strftime_or_none,
7c365c21 55 traverse_obj,
556dbe7f 56 try_get,
c5e8d7af
PH
57 unescapeHTML,
58 unified_strdate,
f0d785d3 59 unified_timestamp,
cf7e015f 60 unsmuggle_url,
8bdd16b4 61 update_url_query,
21c340b8 62 url_or_none,
fe93e2c4 63 urljoin,
7c365c21 64 variadic,
c5e8d7af
PH
65)
66
962ffcf8 67# any clients starting with _ cannot be explicitly requested by the user
000c15a4 68INNERTUBE_CLIENTS = {
69 'web': {
70 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
71 'INNERTUBE_CONTEXT': {
72 'client': {
73 'clientName': 'WEB',
a0c830f4 74 'clientVersion': '2.20220801.00.00',
000c15a4 75 }
76 },
77 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
78 },
79 'web_embedded': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 84 'clientVersion': '1.20220731.00.00',
000c15a4 85 },
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
88 },
89 'web_music': {
90 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
91 'INNERTUBE_HOST': 'music.youtube.com',
92 'INNERTUBE_CONTEXT': {
93 'client': {
94 'clientName': 'WEB_REMIX',
a0c830f4 95 'clientVersion': '1.20220727.01.00',
000c15a4 96 }
97 },
98 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
99 },
e7e94f2a 100 'web_creator': {
18c7683d 101 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_CREATOR',
a0c830f4 105 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
109 },
000c15a4 110 'android': {
18c7683d 111 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'ANDROID',
50ac0e54 115 'clientVersion': '17.31.35',
116 'androidSdkVersion': 30,
117 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 118 }
119 },
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 121 'REQUIRE_JS_PLAYER': False
000c15a4 122 },
123 'android_embedded': {
18c7683d 124 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 125 'INNERTUBE_CONTEXT': {
126 'client': {
127 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 128 'clientVersion': '17.31.35',
129 'androidSdkVersion': 30,
130 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 131 },
132 },
b6de707d 133 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
134 'REQUIRE_JS_PLAYER': False
000c15a4 135 },
136 'android_music': {
18c7683d 137 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 138 'INNERTUBE_CONTEXT': {
139 'client': {
140 'clientName': 'ANDROID_MUSIC',
a0c830f4 141 'clientVersion': '5.16.51',
50ac0e54 142 'androidSdkVersion': 30,
143 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 144 }
145 },
146 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 147 'REQUIRE_JS_PLAYER': False
000c15a4 148 },
e7e94f2a 149 'android_creator': {
18c7683d 150 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
50ac0e54 154 'clientVersion': '22.30.100',
155 'androidSdkVersion': 30,
156 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
157 },
158 },
b6de707d 159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
e7e94f2a 161 },
18c7683d 162 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 164 'ios': {
18c7683d 165 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
224b5a35 169 'clientVersion': '17.33.2',
18c7683d 170 'deviceModel': 'iPhone14,3',
224b5a35 171 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 172 }
173 },
b6de707d 174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
000c15a4 176 },
177 'ios_embedded': {
000c15a4 178 'INNERTUBE_CONTEXT': {
179 'client': {
180 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 181 'clientVersion': '17.33.2',
18c7683d 182 'deviceModel': 'iPhone14,3',
224b5a35 183 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 184 },
185 },
b6de707d 186 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
187 'REQUIRE_JS_PLAYER': False
000c15a4 188 },
189 'ios_music': {
18c7683d 190 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
224b5a35
SF
194 'clientVersion': '5.21',
195 'deviceModel': 'iPhone14,3',
196 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 197 },
198 },
b6de707d 199 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
200 'REQUIRE_JS_PLAYER': False
000c15a4 201 },
e7e94f2a
D
202 'ios_creator': {
203 'INNERTUBE_CONTEXT': {
204 'client': {
205 'clientName': 'IOS_CREATOR',
224b5a35
SF
206 'clientVersion': '22.33.101',
207 'deviceModel': 'iPhone14,3',
208 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
209 },
210 },
b6de707d 211 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
212 'REQUIRE_JS_PLAYER': False
e7e94f2a 213 },
3619f78d 214 # mweb has 'ultralow' formats
215 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 216 'mweb': {
18c7683d 217 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 218 'INNERTUBE_CONTEXT': {
219 'client': {
220 'clientName': 'MWEB',
a0c830f4 221 'clientVersion': '2.20220801.00.00',
000c15a4 222 }
223 },
224 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
225 },
226 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
227 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
228 'tv_embedded': {
229 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
230 'INNERTUBE_CONTEXT': {
231 'client': {
232 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
233 'clientVersion': '2.0',
234 },
235 },
236 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
237 },
000c15a4 238}
239
240
e7870111
D
241def _split_innertube_client(client_name):
242 variant, *base = client_name.rsplit('.', 1)
243 if base:
244 return variant, base[0], variant
245 base, *variant = client_name.split('_', 1)
246 return client_name, base, variant[0] if variant else None
247
248
000c15a4 249def build_innertube_clients():
2e4cacd0 250 THIRD_PARTY = {
e7870111 251 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 252 }
e7870111 253 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 254 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 255
256 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 257 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 258 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 259 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 260 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 261
e7870111 262 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 263 ytcfg['priority'] = 10 * priority(base_client)
264
e48b3875 265 if not variant:
e7870111
D
266 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
267 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
268 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
269 embedscreen['priority'] -= 3
270 elif variant == 'embedded':
e48b3875 271 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 272 ytcfg['priority'] -= 2
e48b3875 273 else:
000c15a4 274 ytcfg['priority'] -= 3
275
276
277build_innertube_clients()
278
279
c26f9b99 280class BadgeType(enum.Enum):
281 AVAILABILITY_UNLISTED = enum.auto()
282 AVAILABILITY_PRIVATE = enum.auto()
283 AVAILABILITY_PUBLIC = enum.auto()
284 AVAILABILITY_PREMIUM = enum.auto()
285 AVAILABILITY_SUBSCRIPTION = enum.auto()
286 LIVE_NOW = enum.auto()
287
288
de7f3446 289class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 290 """Provide base functions for Youtube extractors"""
e00eb564 291
3462ffa8 292 _RESERVED_NAMES = (
3cd786db 293 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 294 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 295 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 296 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 297
3619f78d 298 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
299
52efa4b3 300 # _NETRC_MACHINE = 'youtube'
3619f78d 301
b2e8bc1b
JMF
302 # If True it will raise an error if no login info is provided
303 _LOGIN_REQUIRED = False
304
d9190e44
RH
305 _INVIDIOUS_SITES = (
306 # invidious-redirect websites
307 r'(?:www\.)?redirect\.invidious\.io',
308 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 309 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
310 r'(?:www\.)?invidious\.pussthecat\.org',
311 r'(?:www\.)?invidious\.zee\.li',
312 r'(?:www\.)?invidious\.ethibox\.fr',
313 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
314 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
315 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
316 # youtube-dl invidious instances list
317 r'(?:(?:www|no)\.)?invidiou\.sh',
318 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
319 r'(?:www\.)?invidious\.kabi\.tk',
320 r'(?:www\.)?invidious\.mastodon\.host',
321 r'(?:www\.)?invidious\.zapashcanon\.fr',
322 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
323 r'(?:www\.)?invidious\.tinfoil-hat\.net',
324 r'(?:www\.)?invidious\.himiko\.cloud',
325 r'(?:www\.)?invidious\.reallyancient\.tech',
326 r'(?:www\.)?invidious\.tube',
327 r'(?:www\.)?invidiou\.site',
328 r'(?:www\.)?invidious\.site',
329 r'(?:www\.)?invidious\.xyz',
330 r'(?:www\.)?invidious\.nixnet\.xyz',
331 r'(?:www\.)?invidious\.048596\.xyz',
332 r'(?:www\.)?invidious\.drycat\.fr',
333 r'(?:www\.)?inv\.skyn3t\.in',
334 r'(?:www\.)?tube\.poal\.co',
335 r'(?:www\.)?tube\.connect\.cafe',
336 r'(?:www\.)?vid\.wxzm\.sx',
337 r'(?:www\.)?vid\.mint\.lgbt',
338 r'(?:www\.)?vid\.puffyan\.us',
339 r'(?:www\.)?yewtu\.be',
340 r'(?:www\.)?yt\.elukerio\.org',
341 r'(?:www\.)?yt\.lelux\.fi',
342 r'(?:www\.)?invidious\.ggc-project\.de',
343 r'(?:www\.)?yt\.maisputain\.ovh',
344 r'(?:www\.)?ytprivate\.com',
345 r'(?:www\.)?invidious\.13ad\.de',
346 r'(?:www\.)?invidious\.toot\.koeln',
347 r'(?:www\.)?invidious\.fdn\.fr',
348 r'(?:www\.)?watch\.nettohikari\.com',
349 r'(?:www\.)?invidious\.namazso\.eu',
350 r'(?:www\.)?invidious\.silkky\.cloud',
351 r'(?:www\.)?invidious\.exonip\.de',
352 r'(?:www\.)?invidious\.riverside\.rocks',
353 r'(?:www\.)?invidious\.blamefran\.net',
354 r'(?:www\.)?invidious\.moomoo\.de',
355 r'(?:www\.)?ytb\.trom\.tf',
356 r'(?:www\.)?yt\.cyberhost\.uk',
357 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
358 r'(?:www\.)?qklhadlycap4cnod\.onion',
359 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
360 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
361 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
362 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
363 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
364 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
365 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
366 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
367 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
368 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
369 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
370 r'(?:www\.)?piped\.kavin\.rocks',
371 r'(?:www\.)?piped\.silkky\.cloud',
372 r'(?:www\.)?piped\.tokhmi\.xyz',
373 r'(?:www\.)?piped\.moomoo\.me',
374 r'(?:www\.)?il\.ax',
375 r'(?:www\.)?piped\.syncpundit\.com',
376 r'(?:www\.)?piped\.mha\.fi',
377 r'(?:www\.)?piped\.mint\.lgbt',
378 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
379 )
380
c26f9b99 381 # extracted from account/account_menu ep
382 # XXX: These are the supported YouTube UI and API languages,
383 # which is slightly different from languages supported for translation in YouTube studio
384 _SUPPORTED_LANG_CODES = [
385 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
386 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
387 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
388 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
389 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
390 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
391 ]
392
a057779d 393 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
394
c26f9b99 395 @functools.cached_property
396 def _preferred_lang(self):
397 """
398 Returns a language code supported by YouTube for the user preferred language.
399 Returns None if no preferred language set.
400 """
401 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
402 if not preferred_lang:
403 return
404 if preferred_lang not in self._SUPPORTED_LANG_CODES:
405 raise ExtractorError(
406 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
407 expected=True)
408 elif preferred_lang != 'en':
409 self.report_warning(
410 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
411 return preferred_lang
412
cce889b9 413 def _initialize_consent(self):
414 cookies = self._get_cookies('https://www.youtube.com/')
415 if cookies.get('__Secure-3PSID'):
416 return
417 consent_id = None
418 consent = cookies.get('CONSENT')
419 if consent:
420 if 'YES' in consent.value:
421 return
422 consent_id = self._search_regex(
423 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
424 if not consent_id:
425 consent_id = random.randint(100, 999)
426 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 427
f3aa3c3f 428 def _initialize_pref(self):
429 cookies = self._get_cookies('https://www.youtube.com/')
430 pref_cookie = cookies.get('PREF')
431 pref = {}
432 if pref_cookie:
433 try:
14f25df2 434 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 435 except ValueError:
436 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 437 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 438 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 439
b2e8bc1b 440 def _real_initialize(self):
f3aa3c3f 441 self._initialize_pref()
cce889b9 442 self._initialize_consent()
a25bca9f 443 self._check_login_required()
444
445 def _check_login_required(self):
24146491 446 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 447 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 448
b7c47b74 449 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
450 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 451
000c15a4 452 def _get_default_ytcfg(self, client='web'):
453 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 454
000c15a4 455 def _get_innertube_host(self, client='web'):
456 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 457
000c15a4 458 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 459 # try_get but with fallback to default ytcfg client values when present
460 _func = lambda y: try_get(y, getter, expected_type)
461 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
462
000c15a4 463 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 464 return self._ytcfg_get_safe(
465 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 466 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 467
000c15a4 468 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 469 return self._ytcfg_get_safe(
470 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 471 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 472
2ae778b8 473 def _select_api_hostname(self, req_api_hostname, default_client=None):
474 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
475 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
476
000c15a4 477 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 478 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 479
000c15a4 480 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 481 context = get_first(
482 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 483 # Enforce language and tz for extraction
484 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 485 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 486 return context
487
cf87314d 488 _SAPISID = None
489
109dd3b2 490 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 491 time_now = round(time.time())
cf87314d 492 if self._SAPISID is None:
493 yt_cookies = self._get_cookies('https://www.youtube.com')
494 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
495 # See: https://github.com/yt-dlp/yt-dlp/issues/393
496 sapisid_cookie = dict_get(
497 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
498 if sapisid_cookie and sapisid_cookie.value:
499 self._SAPISID = sapisid_cookie.value
500 self.write_debug('Extracted SAPISID cookie')
501 # SAPISID cookie is required if not already present
502 if not yt_cookies.get('SAPISID'):
503 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
504 self._set_cookie(
505 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
506 else:
507 self._SAPISID = False
508 if not self._SAPISID:
509 return None
1974e99f 510 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
511 sapisidhash = hashlib.sha1(
86e5f3ed 512 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 513 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
514
515 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 516 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 517 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 518
109dd3b2 519 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 520 data.update(query)
11f9be09 521 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 522 real_headers.update({'content-type': 'application/json'})
523 if headers:
524 real_headers.update(headers)
2ae778b8 525 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
526 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 527 return self._download_json(
2ae778b8 528 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 529 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 530 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 531 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 532
65141660 533 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
534 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 535
99e9e001 536 @staticmethod
537 def _extract_session_index(*data):
538 """
539 Index of current account in account list.
540 See: https://github.com/yt-dlp/yt-dlp/pull/519
541 """
542 for ytcfg in data:
543 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
544 if session_index is not None:
545 return session_index
546
547 # Deprecated?
548 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 549 if ytcfg:
14f25df2 550 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
551 if token:
552 return token
99e9e001 553 if webpage:
554 return self._search_regex(
555 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
556 'identity token', default=None, fatal=False)
a1c5d2ca
M
557
558 @staticmethod
fe93e2c4 559 def _extract_account_syncid(*args):
8ea3f7b9 560 """
561 Extract syncId required to download private playlists of secondary channels
fe93e2c4 562 @params response and/or ytcfg
8ea3f7b9 563 """
fe93e2c4 564 for data in args:
565 # ytcfg includes channel_syncid if on secondary channel
14f25df2 566 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 567 if delegated_sid:
568 return delegated_sid
569 sync_ids = (try_get(
570 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 571 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 572 if len(sync_ids) >= 2 and sync_ids[1]:
573 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
574 # and just "user_syncid||" for primary channel. We only want the channel_syncid
575 return sync_ids[0]
a1c5d2ca 576
ac56cf38 577 @staticmethod
578 def _extract_visitor_data(*args):
579 """
580 Extracts visitorData from an API response or ytcfg
581 Appears to be used to track session state
582 """
9222c381 583 return get_first(
6c73052c 584 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 585 expected_type=str)
ac56cf38 586
2762dbb1 587 @functools.cached_property
99e9e001 588 def is_authenticated(self):
589 return bool(self._generate_sapisidhash_header())
590
11f9be09 591 def extract_ytcfg(self, video_id, webpage):
8c54a305 592 if not webpage:
593 return {}
29f7c58a 594 return self._parse_json(
595 self._search_regex(
596 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 597 default='{}'), video_id, fatal=False) or {}
598
11f9be09 599 def generate_api_headers(
99e9e001 600 self, *, ytcfg=None, account_syncid=None, session_index=None,
601 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
602
2ae778b8 603 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 604 headers = {
14f25df2 605 'X-YouTube-Client-Name': str(
11f9be09 606 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
607 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 608 'Origin': origin,
609 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
610 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 611 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
612 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 613 }
614 if session_index is None:
314ee305 615 session_index = self._extract_session_index(ytcfg)
616 if account_syncid or session_index is not None:
617 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 618
109dd3b2 619 auth = self._generate_sapisidhash_header(origin)
f4f751af 620 if auth is not None:
621 headers['Authorization'] = auth
109dd3b2 622 headers['X-Origin'] = origin
7a32c70d 623 return filter_dict(headers)
29f7c58a 624
a25bca9f 625 def _download_ytcfg(self, client, video_id):
626 url = {
627 'web': 'https://www.youtube.com',
628 'web_music': 'https://music.youtube.com',
629 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
630 }.get(client)
631 if not url:
632 return {}
633 webpage = self._download_webpage(
634 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
635 return self.extract_ytcfg(video_id, webpage) or {}
636
2d6659b9 637 @staticmethod
638 def _build_api_continuation_query(continuation, ctp=None):
639 query = {
640 'continuation': continuation
641 }
642 # TODO: Inconsistency with clickTrackingParams.
643 # Currently we have a fixed ctp contained within context (from ytcfg)
644 # and a ctp in root query for continuation.
645 if ctp:
646 query['clickTracking'] = {'clickTrackingParams': ctp}
647 return query
648
2d6659b9 649 @classmethod
650 def _extract_next_continuation_data(cls, renderer):
651 next_continuation = try_get(
652 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
653 lambda x: x['continuation']['reloadContinuationData']), dict)
654 if not next_continuation:
655 return
656 continuation = next_continuation.get('continuation')
657 if not continuation:
658 return
659 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 660 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 661
662 @classmethod
663 def _extract_continuation_ep_data(cls, continuation_ep: dict):
664 if isinstance(continuation_ep, dict):
665 continuation = try_get(
14f25df2 666 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 667 if not continuation:
668 return
669 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 670 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 671
672 @classmethod
673 def _extract_continuation(cls, renderer):
674 next_continuation = cls._extract_next_continuation_data(renderer)
675 if next_continuation:
676 return next_continuation
fe93e2c4 677
7a32c70d 678 return traverse_obj(renderer, (
679 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
680 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
681 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 682
fe93e2c4 683 @classmethod
684 def _extract_alerts(cls, data):
109dd3b2 685 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
686 if not isinstance(alert_dict, dict):
687 continue
688 for alert in alert_dict.values():
689 alert_type = alert.get('type')
690 if not alert_type:
691 continue
052e1350 692 message = cls._get_text(alert, 'text')
109dd3b2 693 if message:
694 yield alert_type, message
695
c0ac49bc 696 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 697 errors, warnings = [], []
109dd3b2 698 for alert_type, alert_message in alerts:
641ad5d8 699 if alert_type.lower() == 'error' and fatal:
109dd3b2 700 errors.append([alert_type, alert_message])
a057779d 701 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 702 warnings.append([alert_type, alert_message])
703
704 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 705 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 706 if errors:
707 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
708
709 def _extract_and_report_alerts(self, data, *args, **kwargs):
710 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
711
47193e02 712 def _extract_badges(self, renderer: dict):
c26f9b99 713 privacy_icon_map = {
714 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
715 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
716 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
717 }
718
719 badge_style_map = {
720 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
721 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
722 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
723 }
724
725 label_map = {
726 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
727 'private': BadgeType.AVAILABILITY_PRIVATE,
728 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
729 'live': BadgeType.LIVE_NOW,
730 'premium': BadgeType.AVAILABILITY_PREMIUM
731 }
732
733 badges = []
734 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
735 badge_type = (
736 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
737 or badge_style_map.get(traverse_obj(badge, 'style'))
738 )
739 if badge_type:
740 badges.append({'type': badge_type})
741 continue
742
743 # fallback, won't work in some languages
744 label = traverse_obj(badge, 'label', expected_type=str, default='')
745 for match, label_badge_type in label_map.items():
746 if match in label.lower():
747 badges.append({'type': badge_type})
748 continue
749
47193e02 750 return badges
751
c26f9b99 752 @staticmethod
753 def _has_badge(badges, badge_type):
754 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
755
47193e02 756 @staticmethod
052e1350 757 def _get_text(data, *path_list, max_runs=None):
758 for path in path_list or [None]:
759 if path is None:
760 obj = [data]
761 else:
762 obj = traverse_obj(data, path, default=[])
763 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
764 obj = [obj]
765 for item in obj:
14f25df2 766 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 767 if text:
768 return text
769 runs = try_get(item, lambda x: x['runs'], list) or []
770 if not runs and isinstance(item, list):
771 runs = item
772
773 runs = runs[:min(len(runs), max_runs or len(runs))]
774 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
775 if text:
776 return text
47193e02 777
f0d785d3 778 def _get_count(self, data, *path_list):
779 count_text = self._get_text(data, *path_list) or ''
780 count = parse_count(count_text)
781 if count is None:
782 count = str_to_int(
783 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
784 return count
785
a709d873 786 @staticmethod
787 def _extract_thumbnails(data, *path_list):
788 """
789 Extract thumbnails from thumbnails dict
790 @param path_list: path list to level that contains 'thumbnails' key
791 """
792 thumbnails = []
793 for path in path_list or [()]:
794 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
795 thumbnail_url = url_or_none(thumbnail.get('url'))
796 if not thumbnail_url:
797 continue
798 # Sometimes youtube gives a wrong thumbnail URL. See:
799 # https://github.com/yt-dlp/yt-dlp/issues/233
800 # https://github.com/ytdl-org/youtube-dl/issues/28023
801 if 'maxresdefault' in thumbnail_url:
802 thumbnail_url = thumbnail_url.split('?')[0]
803 thumbnails.append({
804 'url': thumbnail_url,
805 'height': int_or_none(thumbnail.get('height')),
806 'width': int_or_none(thumbnail.get('width')),
807 })
808 return thumbnails
809
f3aa3c3f 810 @staticmethod
811 def extract_relative_time(relative_time_text):
812 """
813 Extracts a relative time from string and converts to dt object
f0d785d3 814 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 815 """
f0d785d3 816 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 817 if mobj:
f0d785d3 818 start = mobj.group('start')
819 if start:
820 return datetime_from_str(start)
f3aa3c3f 821 try:
f0d785d3 822 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 823 except ValueError:
824 return None
825
c26f9b99 826 def _parse_time_text(self, text):
827 if not text:
828 return
f3aa3c3f 829 dt = self.extract_relative_time(text)
830 timestamp = None
831 if isinstance(dt, datetime.datetime):
832 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 833
834 if timestamp is None:
835 timestamp = (
836 unified_timestamp(text) or unified_timestamp(
837 self._search_regex(
17322130 838 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 839 text.lower(), 'time text', default=None)))
f0d785d3 840
c26f9b99 841 if text and timestamp is None and self._preferred_lang in (None, 'en'):
842 self.report_warning(
843 f'Cannot parse localized time text "{text}"', only_once=True)
844 return timestamp
f3aa3c3f 845
109dd3b2 846 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
847 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 848 default_client='web'):
be5c1ae8 849 for retry in self.RetryManager():
109dd3b2 850 try:
851 response = self._call_api(
852 ep=ep, fatal=True, headers=headers,
be5c1ae8 853 video_id=item_id, query=query, note=note,
109dd3b2 854 context=self._extract_context(ytcfg, default_client),
855 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 856 api_hostname=api_hostname, default_client=default_client)
109dd3b2 857 except ExtractorError as e:
be5c1ae8 858 if not isinstance(e.cause, network_exceptions):
859 return self._error_or_warning(e, fatal=fatal)
860 elif not isinstance(e.cause, urllib.error.HTTPError):
861 retry.error = e
862 continue
109dd3b2 863
be5c1ae8 864 first_bytes = e.cause.read(512)
865 if not is_html(first_bytes):
866 yt_error = try_get(
867 self._parse_json(
868 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
869 lambda x: x['error']['message'], str)
870 if yt_error:
871 self._report_alerts([('ERROR', yt_error)], fatal=False)
872 # Downloading page may result in intermittent 5xx HTTP error
873 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
874 # We also want to catch all other network exceptions since errors in later pages can be troublesome
875 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
876 if e.cause.code not in (403, 429):
877 retry.error = e
878 continue
879 return self._error_or_warning(e, fatal=fatal)
880
881 try:
882 self._extract_and_report_alerts(response, only_once=True)
883 except ExtractorError as e:
884 # YouTube servers may return errors we want to retry on in a 200 OK response
885 # See: https://github.com/yt-dlp/yt-dlp/issues/839
886 if 'unknown error' in e.msg.lower():
887 retry.error = e
888 continue
889 return self._error_or_warning(e, fatal=fatal)
890 # Youtube sometimes sends incomplete data
891 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
892 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 893 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 894 continue
895
896 return response
109dd3b2 897
9297939e 898 @staticmethod
899 def is_music_url(url):
900 return re.match(r'https?://music\.youtube\.com/', url) is not None
901
30a074c2 902 def _extract_video(self, renderer):
903 video_id = renderer.get('videoId')
052e1350 904 title = self._get_text(renderer, 'title')
905 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 906 duration = parse_duration(self._get_text(
907 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 908 if duration is None:
909 duration = parse_duration(self._search_regex(
910 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
911 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
912 video_id, default=None, group='duration'))
913
f0d785d3 914 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 915
052e1350 916 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 917 channel_id = traverse_obj(
a44ca5a4 918 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
919 expected_type=str, get_all=False)
c26f9b99 920 time_text = self._get_text(renderer, 'publishedTimeText') or ''
f3aa3c3f 921 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
922 overlay_style = traverse_obj(
a44ca5a4 923 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
924 get_all=False, expected_type=str)
f3aa3c3f 925 badges = self._extract_badges(renderer)
a709d873 926 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 927 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 928 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
929 expected_type=str)) or ''
fd2ad7cb 930 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 931 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 932 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 933
30a074c2 934 return {
39ed931e 935 '_type': 'url',
30a074c2 936 'ie_key': YoutubeIE.ie_key(),
937 'id': video_id,
fd2ad7cb 938 'url': url,
30a074c2 939 'title': title,
940 'description': description,
941 'duration': duration,
942 'view_count': view_count,
943 'uploader': uploader,
f3aa3c3f 944 'channel_id': channel_id,
a709d873 945 'thumbnails': thumbnails,
c26f9b99 946 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
a44ca5a4 947 if self._configuration_arg('approximate_date', ie_key='youtubetab')
948 else None),
f3aa3c3f 949 'live_status': ('is_upcoming' if scheduled_timestamp is not None
950 else 'was_live' if 'streamed' in time_text.lower()
c26f9b99 951 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
f3aa3c3f 952 else None),
953 'release_timestamp': scheduled_timestamp,
c26f9b99 954 'availability':
955 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
956 else self._availability(
957 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
958 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
959 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
960 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
30a074c2 961 }
962
0c148415 963
360e1ca5 964class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 965 IE_DESC = 'YouTube'
cb7dfeea 966 _VALID_URL = r"""(?x)^
c5e8d7af 967 (
edb53e2d 968 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 969 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
970 (?:www\.)?deturl\.com/www\.youtube\.com|
971 (?:www\.)?pwnyoutube\.com|
972 (?:www\.)?hooktube\.com|
973 (?:www\.)?yourepeat\.com|
974 tube\.majestyc\.net|
975 %(invidious)s|
976 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
977 (?:.*?\#/)? # handle anchor (#/) redirect urls
978 (?: # the various things that can precede the ID:
b6ce9bb0 979 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 980 |(?: # or the v= param in all its forms
f7000f3a 981 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 982 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 983 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
984 v=
985 )
f4b05232 986 ))
cbaed4bb
S
987 |(?:
988 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
989 vid\.plus| # or vid.plus/xxxx
990 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 991 %(invidious)s
cbaed4bb 992 )/
edb53e2d 993 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 994 )
c5e8d7af 995 )? # all until now is optional -> you can pass the naked ID
201c1459 996 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 997 (?(1).+)? # if we found the ID, everything can follow
9297939e 998 (?:\#|$)""" % {
d9190e44 999 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1000 }
7c6eb424 1001 _EMBED_REGEX = [
1002 r'''(?x)
1003 (?:
0ca0f881 1004 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1005 data-video-url=|
1006 <embed[^>]+?src=|
1007 embedSWF\(?:\s*|
1008 <object[^>]+data=|
1009 new\s+SWFObject\(
1010 )
1011 (["\'])
1012 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1013 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1014 \1''',
1015 # https://wordpress.org/plugins/lazy-load-for-videos/
1016 r'''(?xs)
1017 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1018 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1019 ]
1020
e40c758c 1021 _PLAYER_INFO_RE = (
cc2db878 1022 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1023 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1024 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1025 )
2c62dc26 1026 _formats = {
c2d3cb4c 1027 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1028 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1029 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1030 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1031 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1032 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1033 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1034 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1035 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1036 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1037 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1038 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1039 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1040 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1041 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1042 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1043 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1044 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1045
1046
1047 # 3D videos
c2d3cb4c 1048 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1049 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1050 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1051 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1052 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1053 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1054 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1055
96fb5605 1056 # Apple HTTP Live Streaming
11f12195 1057 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1058 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1059 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1060 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1061 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1062 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1063 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1064 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1065
1066 # DASH mp4 video
d23028a8
S
1067 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1068 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1069 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1070 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1071 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1072 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1073 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1074 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1075 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1076 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1077 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1078 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1079
f6f1fc92 1080 # Dash mp4 audio
d23028a8
S
1081 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1082 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1083 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1084 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1085 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1086 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1087 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1088
1089 # Dash webm
d23028a8
S
1090 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1091 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1092 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1093 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1094 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1095 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1096 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1097 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1098 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1100 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1101 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1102 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1103 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1104 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1105 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1106 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1107 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1108 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1109 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1110 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1111 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1112
1113 # Dash webm audio
d23028a8
S
1114 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1115 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1116
0857baad 1117 # Dash webm audio with opus inside
d23028a8
S
1118 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1119 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1120 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1121
ce6b9a2d
PH
1122 # RTMP (unnamed)
1123 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1124
1125 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1126 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1127 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1128 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1129 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1130 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1131 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1132 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1133 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1134 }
29f7c58a 1135 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1136
fd5c4aab
S
1137 _GEO_BYPASS = False
1138
78caa52a 1139 IE_NAME = 'youtube'
2eb88d95
PH
1140 _TESTS = [
1141 {
2d3d2997 1142 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1143 'info_dict': {
1144 'id': 'BaW_jenozKc',
1145 'ext': 'mp4',
3867038a 1146 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1147 'uploader': 'Philipp Hagemeister',
1148 'uploader_id': 'phihag',
ec85ded8 1149 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1150 'channel': 'Philipp Hagemeister',
dd4c4492
S
1151 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1152 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1153 'upload_date': '20121002',
ff9f925b 1154 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1155 'categories': ['Science & Technology'],
3867038a 1156 'tags': ['youtube-dl'],
556dbe7f 1157 'duration': 10,
dbdaaa23 1158 'view_count': int,
3e7c1224 1159 'like_count': int,
ff9f925b 1160 'availability': 'public',
1161 'playable_in_embed': True,
1162 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1163 'live_status': 'not_live',
1164 'age_limit': 0,
7c80519c 1165 'start_time': 1,
297a564b 1166 'end_time': 9,
12a1b225 1167 'comment_count': int,
6c73052c 1168 'channel_follower_count': int
2eb88d95 1169 }
0e853ca4 1170 },
fccd3771 1171 {
4bc3a23e
PH
1172 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1173 'note': 'Embed-only video (#1746)',
1174 'info_dict': {
1175 'id': 'yZIXLfi8CZQ',
1176 'ext': 'mp4',
1177 'upload_date': '20120608',
1178 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1179 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1180 'uploader': 'SET India',
94bfcd23 1181 'uploader_id': 'setindia',
ec85ded8 1182 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1183 'age_limit': 18,
545cc85d 1184 },
1185 'skip': 'Private video',
fccd3771 1186 },
11b56058 1187 {
8bdd16b4 1188 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1189 'note': 'Use the first video ID in the URL',
1190 'info_dict': {
1191 'id': 'BaW_jenozKc',
1192 'ext': 'mp4',
3867038a 1193 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1194 'uploader': 'Philipp Hagemeister',
1195 'uploader_id': 'phihag',
ec85ded8 1196 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1197 'channel': 'Philipp Hagemeister',
1198 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1199 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1200 'upload_date': '20121002',
976ae3ea 1201 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1202 'categories': ['Science & Technology'],
3867038a 1203 'tags': ['youtube-dl'],
556dbe7f 1204 'duration': 10,
dbdaaa23 1205 'view_count': int,
11b56058 1206 'like_count': int,
976ae3ea 1207 'availability': 'public',
1208 'playable_in_embed': True,
1209 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1210 'live_status': 'not_live',
1211 'age_limit': 0,
12a1b225 1212 'comment_count': int,
6c73052c 1213 'channel_follower_count': int
34a7de29
S
1214 },
1215 'params': {
1216 'skip_download': True,
1217 },
11b56058 1218 },
dd27fd17 1219 {
2d3d2997 1220 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1221 'note': '256k DASH audio (format 141) via DASH manifest',
1222 'info_dict': {
1223 'id': 'a9LDPn-MO4I',
1224 'ext': 'm4a',
1225 'upload_date': '20121002',
1226 'uploader_id': '8KVIDEO',
ec85ded8 1227 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1228 'description': '',
1229 'uploader': '8KVIDEO',
1230 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1231 },
4bc3a23e
PH
1232 'params': {
1233 'youtube_include_dash_manifest': True,
1234 'format': '141',
4919603f 1235 },
de3c7fe0 1236 'skip': 'format 141 not served anymore',
dd27fd17 1237 },
8bdd16b4 1238 # DASH manifest with encrypted signature
1239 {
1240 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1241 'info_dict': {
1242 'id': 'IB3lcPjvWLA',
1243 'ext': 'm4a',
1244 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1245 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1246 'duration': 244,
1247 'uploader': 'AfrojackVEVO',
1248 'uploader_id': 'AfrojackVEVO',
1249 'upload_date': '20131011',
cc2db878 1250 'abr': 129.495,
976ae3ea 1251 'like_count': int,
1252 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1253 'playable_in_embed': True,
1254 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1255 'view_count': int,
1256 'track': 'The Spark',
1257 'live_status': 'not_live',
1258 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1259 'channel': 'Afrojack',
1260 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1261 'tags': 'count:19',
1262 'availability': 'public',
1263 'categories': ['Music'],
1264 'age_limit': 0,
1265 'alt_title': 'The Spark',
6c73052c 1266 'channel_follower_count': int
8bdd16b4 1267 },
1268 'params': {
1269 'youtube_include_dash_manifest': True,
1270 'format': '141/bestaudio[ext=m4a]',
1271 },
1272 },
65c2fde2 1273 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1274 {
65c2fde2 1275 'note': 'Embed allowed age-gate video',
2d3d2997 1276 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1277 'info_dict': {
1278 'id': 'HtVdAasjOgU',
1279 'ext': 'mp4',
1280 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1281 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1282 'duration': 142,
c522adb1
JMF
1283 'uploader': 'The Witcher',
1284 'uploader_id': 'WitcherGame',
ec85ded8 1285 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1286 'upload_date': '20140605',
34952f09 1287 'age_limit': 18,
976ae3ea 1288 'categories': ['Gaming'],
1289 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1290 'availability': 'needs_auth',
1291 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1292 'like_count': int,
1293 'channel': 'The Witcher',
1294 'live_status': 'not_live',
1295 'tags': 'count:17',
1296 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1297 'playable_in_embed': True,
1298 'view_count': int,
6c73052c 1299 'channel_follower_count': int
c522adb1
JMF
1300 },
1301 },
65c2fde2 1302 {
1303 'note': 'Age-gate video with embed allowed in public site',
1304 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1305 'info_dict': {
1306 'id': 'HsUATh_Nc2U',
1307 'ext': 'mp4',
1308 'title': 'Godzilla 2 (Official Video)',
1309 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1310 'upload_date': '20200408',
1311 'uploader_id': 'FlyingKitty900',
1312 'uploader': 'FlyingKitty',
1313 'age_limit': 18,
976ae3ea 1314 'availability': 'needs_auth',
1315 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1316 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1317 'channel': 'FlyingKitty',
1318 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1319 'view_count': int,
1320 'categories': ['Entertainment'],
1321 'live_status': 'not_live',
1322 'tags': ['Flyingkitty', 'godzilla 2'],
1323 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1324 'like_count': int,
1325 'duration': 177,
1326 'playable_in_embed': True,
6c73052c 1327 'channel_follower_count': int
65c2fde2 1328 },
1329 },
1330 {
1331 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1332 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1333 'info_dict': {
1334 'id': 'Tq92D6wQ1mg',
1335 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1336 'ext': 'mp4',
17322130 1337 'upload_date': '20191228',
65c2fde2 1338 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1339 'uploader': 'Projekt Melody',
1340 'description': 'md5:17eccca93a786d51bc67646756894066',
1341 'age_limit': 18,
976ae3ea 1342 'like_count': int,
1343 'availability': 'needs_auth',
1344 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1345 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1346 'view_count': int,
1347 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1348 'channel': 'Projekt Melody',
1349 'live_status': 'not_live',
1350 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1351 'playable_in_embed': True,
1352 'categories': ['Entertainment'],
1353 'duration': 106,
1354 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1355 'comment_count': int,
6c73052c 1356 'channel_follower_count': int
65c2fde2 1357 },
1358 },
1359 {
1360 'note': 'Non-Agegated non-embeddable video',
1361 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1362 'info_dict': {
1363 'id': 'MeJVWBSsPAY',
1364 'ext': 'mp4',
1365 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1366 'uploader': 'Herr Lurik',
1367 'uploader_id': 'st3in234',
1368 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1369 'upload_date': '20130730',
976ae3ea 1370 'track': 'Such mich find mich',
1371 'age_limit': 0,
1372 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1373 'like_count': int,
1374 'playable_in_embed': False,
1375 'creator': 'OOMPH!',
1376 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1377 'view_count': int,
1378 'alt_title': 'Such mich find mich',
1379 'duration': 210,
1380 'channel': 'Herr Lurik',
1381 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1382 'categories': ['Music'],
1383 'availability': 'public',
1384 'uploader_url': 'http://www.youtube.com/user/st3in234',
1385 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1386 'live_status': 'not_live',
1387 'artist': 'OOMPH!',
6c73052c 1388 'channel_follower_count': int
65c2fde2 1389 },
1390 },
1391 {
1392 'note': 'Non-bypassable age-gated video',
1393 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1394 'only_matching': True,
1395 },
8bdd16b4 1396 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1397 # YouTube Red ad is not captured for creator
1398 {
1399 'url': '__2ABJjxzNo',
1400 'info_dict': {
1401 'id': '__2ABJjxzNo',
1402 'ext': 'mp4',
1403 'duration': 266,
1404 'upload_date': '20100430',
1405 'uploader_id': 'deadmau5',
1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1407 'creator': 'deadmau5',
1408 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1409 'uploader': 'deadmau5',
1410 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1411 'alt_title': 'Some Chords',
976ae3ea 1412 'availability': 'public',
1413 'tags': 'count:14',
1414 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1415 'view_count': int,
1416 'live_status': 'not_live',
1417 'channel': 'deadmau5',
1418 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1419 'like_count': int,
1420 'track': 'Some Chords',
1421 'artist': 'deadmau5',
1422 'playable_in_embed': True,
1423 'age_limit': 0,
1424 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1425 'categories': ['Music'],
1426 'album': 'Some Chords',
6c73052c 1427 'channel_follower_count': int
8bdd16b4 1428 },
1429 'expected_warnings': [
1430 'DASH manifest missing',
1431 ]
1432 },
067aa17e 1433 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1434 {
1435 'url': 'lqQg6PlCWgI',
1436 'info_dict': {
1437 'id': 'lqQg6PlCWgI',
1438 'ext': 'mp4',
556dbe7f 1439 'duration': 6085,
90227264 1440 'upload_date': '20150827',
cbe2bd91 1441 'uploader_id': 'olympic',
ec85ded8 1442 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1443 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1444 'uploader': 'Olympics',
cbe2bd91 1445 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1446 'like_count': int,
1447 'release_timestamp': 1343767800,
1448 'playable_in_embed': True,
1449 'categories': ['Sports'],
1450 'release_date': '20120731',
1451 'channel': 'Olympics',
1452 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1453 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1454 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1455 'age_limit': 0,
1456 'availability': 'public',
1457 'live_status': 'was_live',
1458 'view_count': int,
1459 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1460 'channel_follower_count': int
cbe2bd91
PH
1461 },
1462 'params': {
1463 'skip_download': 'requires avconv',
e52a40ab 1464 }
cbe2bd91 1465 },
6271f1ca
PH
1466 # Non-square pixels
1467 {
1468 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1469 'info_dict': {
1470 'id': '_b-2C3KPAM0',
1471 'ext': 'mp4',
1472 'stretched_ratio': 16 / 9.,
556dbe7f 1473 'duration': 85,
6271f1ca
PH
1474 'upload_date': '20110310',
1475 'uploader_id': 'AllenMeow',
ec85ded8 1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1477 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1478 'uploader': '孫ᄋᄅ',
6271f1ca 1479 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1480 'playable_in_embed': True,
1481 'channel': '孫ᄋᄅ',
1482 'age_limit': 0,
1483 'tags': 'count:11',
1484 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1485 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1486 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1487 'view_count': int,
1488 'categories': ['People & Blogs'],
1489 'like_count': int,
1490 'live_status': 'not_live',
1491 'availability': 'unlisted',
12a1b225 1492 'comment_count': int,
6c73052c 1493 'channel_follower_count': int
6271f1ca 1494 },
06b491eb
S
1495 },
1496 # url_encoded_fmt_stream_map is empty string
1497 {
1498 'url': 'qEJwOuvDf7I',
1499 'info_dict': {
1500 'id': 'qEJwOuvDf7I',
f57b7835 1501 'ext': 'webm',
06b491eb
S
1502 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1503 'description': '',
1504 'upload_date': '20150404',
1505 'uploader_id': 'spbelect',
1506 'uploader': 'Наблюдатели Петербурга',
1507 },
1508 'params': {
1509 'skip_download': 'requires avconv',
e323cf3f
S
1510 },
1511 'skip': 'This live event has ended.',
06b491eb 1512 },
067aa17e 1513 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1514 {
1515 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1516 'info_dict': {
1517 'id': 'FIl7x6_3R5Y',
eb6793ba 1518 'ext': 'webm',
da77d856
S
1519 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1520 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1521 'duration': 220,
da77d856
S
1522 'upload_date': '20150625',
1523 'uploader_id': 'dorappi2000',
ec85ded8 1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1525 'uploader': 'dorappi2000',
eb6793ba 1526 'formats': 'mincount:31',
da77d856 1527 },
eb6793ba 1528 'skip': 'not actual anymore',
2ee8f5d8 1529 },
8a1a26ce
YCH
1530 # DASH manifest with segment_list
1531 {
1532 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1533 'md5': '8ce563a1d667b599d21064e982ab9e31',
1534 'info_dict': {
1535 'id': 'CsmdDsKjzN8',
1536 'ext': 'mp4',
17ee98e1 1537 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1538 'uploader': 'Airtek',
1539 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1540 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1541 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1542 },
1543 'params': {
1544 'youtube_include_dash_manifest': True,
1545 'format': '135', # bestvideo
be49068d
S
1546 },
1547 'skip': 'This live event has ended.',
2ee8f5d8 1548 },
cf7e015f
S
1549 {
1550 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1551 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1552 'info_dict': {
545cc85d 1553 'id': 'jvGDaLqkpTg',
1554 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1555 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1556 },
1557 'playlist': [{
1558 'info_dict': {
545cc85d 1559 'id': 'jvGDaLqkpTg',
cf7e015f 1560 'ext': 'mp4',
545cc85d 1561 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1562 'description': 'md5:e03b909557865076822aa169218d6a5d',
1563 'duration': 10643,
1564 'upload_date': '20161111',
1565 'uploader': 'Team PGP',
1566 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1568 },
1569 }, {
1570 'info_dict': {
545cc85d 1571 'id': '3AKt1R1aDnw',
cf7e015f 1572 'ext': 'mp4',
545cc85d 1573 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1574 'description': 'md5:e03b909557865076822aa169218d6a5d',
1575 'duration': 10991,
1576 'upload_date': '20161111',
1577 'uploader': 'Team PGP',
1578 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1580 },
1581 }, {
1582 'info_dict': {
545cc85d 1583 'id': 'RtAMM00gpVc',
cf7e015f 1584 'ext': 'mp4',
545cc85d 1585 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1586 'description': 'md5:e03b909557865076822aa169218d6a5d',
1587 'duration': 10995,
1588 'upload_date': '20161111',
1589 'uploader': 'Team PGP',
1590 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1592 },
1593 }, {
1594 'info_dict': {
545cc85d 1595 'id': '6N2fdlP3C5U',
cf7e015f 1596 'ext': 'mp4',
545cc85d 1597 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1598 'description': 'md5:e03b909557865076822aa169218d6a5d',
1599 'duration': 10990,
1600 'upload_date': '20161111',
1601 'uploader': 'Team PGP',
1602 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1603 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1604 },
1605 }],
1606 'params': {
1607 'skip_download': True,
1608 },
65c2fde2 1609 'skip': 'Not multifeed anymore',
cbaed4bb 1610 },
f9f49d87 1611 {
067aa17e 1612 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1613 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1614 'info_dict': {
1615 'id': 'gVfLd0zydlo',
1616 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1617 },
1618 'playlist_count': 2,
be49068d 1619 'skip': 'Not multifeed anymore',
f9f49d87 1620 },
cbaed4bb 1621 {
2d3d2997 1622 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1623 'only_matching': True,
0e49d9a6 1624 },
6d4fc66b 1625 {
2d3d2997 1626 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1627 'only_matching': True,
1628 },
0e49d9a6 1629 {
067aa17e 1630 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1631 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1632 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1633 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1634 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1635 'info_dict': {
1636 'id': 'lsguqyKfVQg',
1637 'ext': 'mp4',
1638 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1639 'alt_title': 'Dark Walk',
0e49d9a6 1640 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1641 'duration': 133,
0e49d9a6
LL
1642 'upload_date': '20151119',
1643 'uploader_id': 'IronSoulElf',
ec85ded8 1644 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1645 'uploader': 'IronSoulElf',
11f9be09 1646 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1647 'track': 'Dark Walk',
1648 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1649 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1650 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1651 'categories': ['Film & Animation'],
1652 'view_count': int,
1653 'live_status': 'not_live',
1654 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1655 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1656 'tags': 'count:13',
1657 'availability': 'public',
1658 'channel': 'IronSoulElf',
1659 'playable_in_embed': True,
1660 'like_count': int,
1661 'age_limit': 0,
6c73052c 1662 'channel_follower_count': int
0e49d9a6
LL
1663 },
1664 'params': {
1665 'skip_download': True,
1666 },
1667 },
61f92af1 1668 {
067aa17e 1669 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1670 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1671 'only_matching': True,
1672 },
313dfc45
LL
1673 {
1674 # Video with yt:stretch=17:0
1675 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1676 'info_dict': {
1677 'id': 'Q39EVAstoRM',
1678 'ext': 'mp4',
1679 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1680 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1681 'upload_date': '20151107',
1682 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1683 'uploader': 'CH GAMER DROID',
1684 },
1685 'params': {
1686 'skip_download': True,
1687 },
be49068d 1688 'skip': 'This video does not exist.',
313dfc45 1689 },
201c1459 1690 {
1691 # Video with incomplete 'yt:stretch=16:'
1692 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1693 'only_matching': True,
1694 },
7caf9830
S
1695 {
1696 # Video licensed under Creative Commons
1697 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1698 'info_dict': {
1699 'id': 'M4gD1WSo5mA',
1700 'ext': 'mp4',
1701 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1702 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1703 'duration': 721,
17322130 1704 'upload_date': '20150128',
7caf9830 1705 'uploader_id': 'BerkmanCenter',
ec85ded8 1706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1707 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1708 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1709 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1710 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1711 'like_count': int,
1712 'age_limit': 0,
1713 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1714 'channel': 'The Berkman Klein Center for Internet & Society',
1715 'availability': 'public',
1716 'view_count': int,
1717 'categories': ['Education'],
1718 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1719 'live_status': 'not_live',
1720 'playable_in_embed': True,
12a1b225 1721 'comment_count': int,
6c73052c 1722 'channel_follower_count': int
7caf9830
S
1723 },
1724 'params': {
1725 'skip_download': True,
1726 },
1727 },
fd050249
S
1728 {
1729 # Channel-like uploader_url
1730 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1731 'info_dict': {
1732 'id': 'eQcmzGIKrzg',
1733 'ext': 'mp4',
1734 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1735 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1736 'duration': 4060,
17322130 1737 'upload_date': '20151120',
eb6793ba 1738 'uploader': 'Bernie Sanders',
fd050249 1739 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1741 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1742 'playable_in_embed': True,
1743 'tags': 'count:12',
1744 'like_count': int,
1745 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1746 'age_limit': 0,
1747 'availability': 'public',
1748 'categories': ['News & Politics'],
1749 'channel': 'Bernie Sanders',
1750 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1751 'view_count': int,
1752 'live_status': 'not_live',
1753 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1754 'comment_count': int,
6c73052c 1755 'channel_follower_count': int
fd050249
S
1756 },
1757 'params': {
1758 'skip_download': True,
1759 },
1760 },
040ac686
S
1761 {
1762 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1763 'only_matching': True,
7f29cf54
S
1764 },
1765 {
067aa17e 1766 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1767 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1768 'only_matching': True,
6496ccb4
S
1769 },
1770 {
1771 # Rental video preview
1772 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1773 'info_dict': {
1774 'id': 'uGpuVWrhIzE',
1775 'ext': 'mp4',
1776 'title': 'Piku - Trailer',
1777 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1778 'upload_date': '20150811',
1779 'uploader': 'FlixMatrix',
1780 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1781 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1782 'license': 'Standard YouTube License',
1783 },
1784 'params': {
1785 'skip_download': True,
1786 },
eb6793ba 1787 'skip': 'This video is not available.',
022a5d66 1788 },
12afdc2a
S
1789 {
1790 # YouTube Red video with episode data
1791 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1792 'info_dict': {
1793 'id': 'iqKdEhx-dD4',
1794 'ext': 'mp4',
1795 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1796 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1797 'duration': 2085,
12afdc2a
S
1798 'upload_date': '20170118',
1799 'uploader': 'Vsauce',
1800 'uploader_id': 'Vsauce',
1801 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1802 'series': 'Mind Field',
1803 'season_number': 1,
1804 'episode_number': 1,
976ae3ea 1805 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1806 'tags': 'count:12',
1807 'view_count': int,
1808 'availability': 'public',
1809 'age_limit': 0,
1810 'channel': 'Vsauce',
1811 'episode': 'Episode 1',
1812 'categories': ['Entertainment'],
1813 'season': 'Season 1',
1814 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1815 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1816 'like_count': int,
1817 'playable_in_embed': True,
1818 'live_status': 'not_live',
6c73052c 1819 'channel_follower_count': int
12afdc2a
S
1820 },
1821 'params': {
1822 'skip_download': True,
1823 },
1824 'expected_warnings': [
1825 'Skipping DASH manifest',
1826 ],
1827 },
c7121fa7
S
1828 {
1829 # The following content has been identified by the YouTube community
1830 # as inappropriate or offensive to some audiences.
1831 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1832 'info_dict': {
1833 'id': '6SJNVb0GnPI',
1834 'ext': 'mp4',
1835 'title': 'Race Differences in Intelligence',
1836 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1837 'duration': 965,
1838 'upload_date': '20140124',
1839 'uploader': 'New Century Foundation',
1840 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1841 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1842 },
1843 'params': {
1844 'skip_download': True,
1845 },
545cc85d 1846 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1847 },
022a5d66
S
1848 {
1849 # itag 212
1850 'url': '1t24XAntNCY',
1851 'only_matching': True,
fd5c4aab
S
1852 },
1853 {
1854 # geo restricted to JP
1855 'url': 'sJL6WA-aGkQ',
1856 'only_matching': True,
1857 },
cd5a74a2
S
1858 {
1859 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1860 'only_matching': True,
1861 },
bc2ca1bb 1862 {
1863 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1864 'only_matching': True,
1865 },
1866 {
1867 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1868 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1869 'only_matching': True,
1870 },
825cd268
RA
1871 {
1872 # DRM protected
1873 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1874 'only_matching': True,
4fe54c12
S
1875 },
1876 {
1877 # Video with unsupported adaptive stream type formats
1878 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1879 'info_dict': {
1880 'id': 'Z4Vy8R84T1U',
1881 'ext': 'mp4',
1882 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1883 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1884 'duration': 433,
1885 'upload_date': '20130923',
1886 'uploader': 'Amelia Putri Harwita',
1887 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1888 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1889 'formats': 'maxcount:10',
1890 },
1891 'params': {
1892 'skip_download': True,
1893 'youtube_include_dash_manifest': False,
1894 },
5429d6a9 1895 'skip': 'not actual anymore',
5caabd3c 1896 },
1897 {
822b9d9c 1898 # Youtube Music Auto-generated description
5caabd3c 1899 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1900 'info_dict': {
1901 'id': 'MgNrAu2pzNs',
1902 'ext': 'mp4',
1903 'title': 'Voyeur Girl',
1904 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1905 'upload_date': '20190312',
5429d6a9
S
1906 'uploader': 'Stephen - Topic',
1907 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1908 'artist': 'Stephen',
1909 'track': 'Voyeur Girl',
1910 'album': 'it\'s too much love to know my dear',
1911 'release_date': '20190313',
1912 'release_year': 2019,
976ae3ea 1913 'alt_title': 'Voyeur Girl',
1914 'view_count': int,
1915 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1916 'playable_in_embed': True,
1917 'like_count': int,
1918 'categories': ['Music'],
1919 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1920 'channel': 'Stephen',
1921 'availability': 'public',
1922 'creator': 'Stephen',
1923 'duration': 169,
1924 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1925 'age_limit': 0,
1926 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1927 'tags': 'count:11',
1928 'live_status': 'not_live',
6c73052c 1929 'channel_follower_count': int
5caabd3c 1930 },
1931 'params': {
1932 'skip_download': True,
1933 },
1934 },
66b48727
RA
1935 {
1936 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1937 'only_matching': True,
1938 },
011e75e6
S
1939 {
1940 # invalid -> valid video id redirection
1941 'url': 'DJztXj2GPfl',
1942 'info_dict': {
1943 'id': 'DJztXj2GPfk',
1944 'ext': 'mp4',
1945 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1946 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1947 'upload_date': '20090125',
1948 'uploader': 'Prochorowka',
1949 'uploader_id': 'Prochorowka',
1950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1951 'artist': 'Panjabi MC',
1952 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1953 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1954 },
1955 'params': {
1956 'skip_download': True,
1957 },
545cc85d 1958 'skip': 'Video unavailable',
ea74e00b
DP
1959 },
1960 {
1961 # empty description results in an empty string
1962 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1963 'info_dict': {
1964 'id': 'x41yOUIvK2k',
1965 'ext': 'mp4',
1966 'title': 'IMG 3456',
1967 'description': '',
1968 'upload_date': '20170613',
1969 'uploader_id': 'ElevageOrVert',
1970 'uploader': 'ElevageOrVert',
976ae3ea 1971 'view_count': int,
1972 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1973 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1974 'like_count': int,
1975 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1976 'tags': [],
1977 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1978 'availability': 'public',
1979 'age_limit': 0,
1980 'categories': ['Pets & Animals'],
1981 'duration': 7,
1982 'playable_in_embed': True,
1983 'live_status': 'not_live',
1984 'channel': 'ElevageOrVert',
6c73052c 1985 'channel_follower_count': int
ea74e00b
DP
1986 },
1987 'params': {
1988 'skip_download': True,
1989 },
1990 },
a0566bbf 1991 {
29f7c58a 1992 # with '};' inside yt initial data (see [1])
1993 # see [2] for an example with '};' inside ytInitialPlayerResponse
1994 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1995 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1996 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1997 'info_dict': {
1998 'id': 'CHqg6qOn4no',
1999 'ext': 'mp4',
2000 'title': 'Part 77 Sort a list of simple types in c#',
2001 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2002 'upload_date': '20130831',
2003 'uploader_id': 'kudvenkat',
2004 'uploader': 'kudvenkat',
976ae3ea 2005 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2006 'like_count': int,
2007 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2008 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2009 'live_status': 'not_live',
2010 'categories': ['Education'],
2011 'availability': 'public',
2012 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2013 'tags': 'count:12',
2014 'playable_in_embed': True,
2015 'age_limit': 0,
2016 'view_count': int,
2017 'duration': 522,
2018 'channel': 'kudvenkat',
12a1b225 2019 'comment_count': int,
6c73052c 2020 'channel_follower_count': int
a0566bbf 2021 },
2022 'params': {
2023 'skip_download': True,
2024 },
2025 },
29f7c58a 2026 {
2027 # another example of '};' in ytInitialData
2028 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2029 'only_matching': True,
2030 },
2031 {
2032 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2033 'only_matching': True,
2034 },
545cc85d 2035 {
cc2db878 2036 # https://github.com/ytdl-org/youtube-dl/pull/28094
2037 'url': 'OtqTfy26tG0',
2038 'info_dict': {
2039 'id': 'OtqTfy26tG0',
2040 'ext': 'mp4',
2041 'title': 'Burn Out',
2042 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2043 'upload_date': '20141120',
2044 'uploader': 'The Cinematic Orchestra - Topic',
2045 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2047 'artist': 'The Cinematic Orchestra',
2048 'track': 'Burn Out',
2049 'album': 'Every Day',
976ae3ea 2050 'like_count': int,
2051 'live_status': 'not_live',
2052 'alt_title': 'Burn Out',
2053 'duration': 614,
2054 'age_limit': 0,
2055 'view_count': int,
2056 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2057 'creator': 'The Cinematic Orchestra',
2058 'channel': 'The Cinematic Orchestra',
2059 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2060 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2061 'availability': 'public',
2062 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2063 'categories': ['Music'],
2064 'playable_in_embed': True,
6c73052c 2065 'channel_follower_count': int
cc2db878 2066 },
2067 'params': {
2068 'skip_download': True,
2069 },
545cc85d 2070 },
bc2ca1bb 2071 {
2072 # controversial video, only works with bpctr when authenticated with cookies
2073 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2074 'only_matching': True,
2075 },
a1a7907b 2076 {
2077 # controversial video, requires bpctr/contentCheckOk
2078 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2079 'info_dict': {
2080 'id': 'SZJvDhaSDnc',
2081 'ext': 'mp4',
2082 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2083 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2084 'uploader': 'CBS Mornings',
11f9be09 2085 'uploader_id': 'CBSThisMorning',
a1a7907b 2086 'upload_date': '20140716',
976ae3ea 2087 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2088 'duration': 170,
2089 'categories': ['News & Politics'],
2090 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2091 'view_count': int,
2092 'channel': 'CBS Mornings',
2093 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2094 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2095 'age_limit': 18,
2096 'availability': 'needs_auth',
2097 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2098 'like_count': int,
2099 'live_status': 'not_live',
2100 'playable_in_embed': True,
6c73052c 2101 'channel_follower_count': int
a1a7907b 2102 }
2103 },
f7ad7160 2104 {
2105 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2106 'url': 'cBvYw8_A0vQ',
2107 'info_dict': {
2108 'id': 'cBvYw8_A0vQ',
2109 'ext': 'mp4',
2110 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2111 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2112 'upload_date': '20201120',
2113 'uploader': 'Walk around Japan',
2114 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2116 'duration': 1456,
2117 'categories': ['Travel & Events'],
2118 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2119 'view_count': int,
2120 'channel': 'Walk around Japan',
2121 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2122 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2123 'age_limit': 0,
2124 'availability': 'public',
2125 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2126 'live_status': 'not_live',
2127 'playable_in_embed': True,
6c73052c 2128 'channel_follower_count': int
f7ad7160 2129 },
2130 'params': {
2131 'skip_download': True,
2132 },
0fb983f6 2133 }, {
2134 # Has multiple audio streams
2135 'url': 'WaOKSUlf4TM',
2136 'only_matching': True
9297939e 2137 }, {
2138 # Requires Premium: has format 141 when requested using YTM url
2139 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2140 'only_matching': True
2141 }, {
120916da 2142 # multiple subtitles with same lang_code
2143 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2144 'only_matching': True,
109dd3b2 2145 }, {
2146 # Force use android client fallback
2147 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2148 'info_dict': {
2149 'id': 'YOelRv7fMxY',
11f9be09 2150 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2151 'ext': '3gp',
2152 'upload_date': '20210624',
2153 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2154 'uploader': 'colinfurze',
11f9be09 2155 'uploader_id': 'colinfurze',
109dd3b2 2156 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2157 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2158 'duration': 596,
2159 'categories': ['Entertainment'],
2160 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2161 'view_count': int,
2162 'channel': 'colinfurze',
2163 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2164 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2165 'age_limit': 0,
2166 'availability': 'public',
2167 'like_count': int,
2168 'live_status': 'not_live',
2169 'playable_in_embed': True,
6c73052c 2170 'channel_follower_count': int
109dd3b2 2171 },
2172 'params': {
2173 'format': '17', # 3gp format available on android
2174 'extractor_args': {'youtube': {'player_client': ['android']}},
2175 },
120916da 2176 },
109dd3b2 2177 {
2178 # Skip download of additional client configs (remix client config in this case)
2179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2180 'only_matching': True,
2181 'params': {
2182 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2183 },
8fc54b12 2184 }, {
2185 # shorts
2186 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2187 'only_matching': True,
9222c381 2188 }, {
2189 'note': 'Storyboards',
2190 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2191 'info_dict': {
2192 'id': '5KLPxDtMqe8',
2193 'ext': 'mhtml',
2194 'format_id': 'sb0',
2195 'title': 'Your Brain is Plastic',
2196 'uploader_id': 'scishow',
2197 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2198 'upload_date': '20140324',
2199 'uploader': 'SciShow',
976ae3ea 2200 'like_count': int,
2201 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2202 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2203 'view_count': int,
2204 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2205 'playable_in_embed': True,
2206 'tags': 'count:12',
2207 'uploader_url': 'http://www.youtube.com/user/scishow',
2208 'availability': 'public',
2209 'channel': 'SciShow',
2210 'live_status': 'not_live',
2211 'duration': 248,
2212 'categories': ['Education'],
2213 'age_limit': 0,
6c73052c 2214 'channel_follower_count': int
9222c381 2215 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2216 }, {
2217 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2218 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2219 'info_dict': {
2220 'id': '2NUZ8W2llS4',
2221 'ext': 'mp4',
2222 'title': 'The NP that test your phone performance 🙂',
2223 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2224 'uploader': 'Leon Nguyen',
2225 'uploader_id': 'VNSXIII',
2226 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2227 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2228 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2229 'duration': 21,
2230 'view_count': int,
2231 'age_limit': 0,
2232 'categories': ['Gaming'],
2233 'tags': 'count:23',
2234 'playable_in_embed': True,
2235 'live_status': 'not_live',
2236 'upload_date': '20220103',
2237 'like_count': int,
2238 'availability': 'public',
2239 'channel': 'Leon Nguyen',
2240 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2241 'comment_count': int,
992f9a73 2242 'channel_follower_count': int
2243 }
1ff88b7a 2244 }, {
2245 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2246 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2247 'info_dict': {
2248 'id': '2NUZ8W2llS4',
2249 'ext': 'mp4',
2250 'title': 'The NP that test your phone performance 🙂',
2251 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2252 'uploader': 'Leon Nguyen',
2253 'uploader_id': 'VNSXIII',
2254 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2255 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2256 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2257 'duration': 21,
2258 'view_count': int,
2259 'age_limit': 0,
2260 'categories': ['Gaming'],
2261 'tags': 'count:23',
2262 'playable_in_embed': True,
2263 'live_status': 'not_live',
2264 'upload_date': '20220102',
2265 'like_count': int,
2266 'availability': 'public',
2267 'channel': 'Leon Nguyen',
2268 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2269 'comment_count': int,
2270 'channel_follower_count': int
2271 },
2272 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2273 }, {
2274 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2275 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2276 'info_dict': {
2277 'id': 'mzZzzBU6lrM',
2278 'ext': 'mp4',
2279 'title': 'I Met GeorgeNotFound In Real Life...',
2280 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2281 'uploader': 'Quackity',
2282 'uploader_id': 'QuackityHQ',
2283 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2284 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2285 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2286 'duration': 955,
2287 'view_count': int,
2288 'age_limit': 0,
2289 'categories': ['Entertainment'],
2290 'tags': 'count:26',
2291 'playable_in_embed': True,
2292 'live_status': 'not_live',
2293 'release_timestamp': 1641172509,
2294 'release_date': '20220103',
2295 'upload_date': '20220103',
2296 'like_count': int,
2297 'availability': 'public',
2298 'channel': 'Quackity',
2299 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2300 'channel_follower_count': int
2301 }
2302 },
2303 { # continuous livestream. Microformat upload date should be preferred.
2304 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2305 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2306 'info_dict': {
2307 'id': 'kgx4WGK0oNU',
2308 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2309 'ext': 'mp4',
2310 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2311 'availability': 'public',
2312 'age_limit': 0,
2313 'release_timestamp': 1637975704,
2314 'upload_date': '20210619',
2315 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2316 'live_status': 'is_live',
2317 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2318 'uploader': '阿鲍Abao',
2319 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2320 'channel': 'Abao in Tokyo',
2321 'channel_follower_count': int,
2322 'release_date': '20211127',
2323 'tags': 'count:39',
2324 'categories': ['People & Blogs'],
2325 'like_count': int,
2326 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2327 'view_count': int,
2328 'playable_in_embed': True,
2329 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2330 },
2331 'params': {'skip_download': True}
6e634cbe 2332 }, {
2333 # Story. Requires specific player params to work.
ee27297f 2334 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2335 'info_dict': {
ee27297f 2336 'id': 'vv8qTUWmulI',
6e634cbe 2337 'ext': 'mp4',
ee27297f 2338 'availability': 'unlisted',
2339 'view_count': int,
2340 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2341 'upload_date': '20220526',
2342 'categories': ['Education'],
2343 'title': 'Story',
2344 'channel': 'IT\'S HISTORY',
2345 'description': '',
2346 'uploader_id': 'BlastfromthePast',
2347 'duration': 12,
2348 'uploader': 'IT\'S HISTORY',
6e634cbe 2349 'playable_in_embed': True,
6e634cbe 2350 'age_limit': 0,
6e634cbe 2351 'live_status': 'not_live',
ee27297f 2352 'tags': [],
2353 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2354 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2355 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2356 },
2357 'skip': 'stories get removed after some period of time',
ee27297f 2358 }, {
2359 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2360 'info_dict': {
2361 'id': 'tjjjtzRLHvA',
2362 'ext': 'mp4',
2363 'title': 'ハッシュタグ無し };if window.ytcsi',
2364 'upload_date': '20220323',
2365 'like_count': int,
2366 'availability': 'unlisted',
2367 'channel': 'nao20010128nao',
2368 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2369 'age_limit': 0,
2370 'uploader': 'nao20010128nao',
2371 'uploader_id': 'nao20010128nao',
2372 'categories': ['Music'],
6e634cbe 2373 'view_count': int,
2374 'description': '',
ee27297f 2375 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2376 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2377 'live_status': 'not_live',
2378 'playable_in_embed': True,
2379 'channel_follower_count': int,
2380 'duration': 6,
2381 'tags': [],
2382 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2383 }
c26f9b99 2384 }, {
2385 # Prefer primary title+description language metadata by default
2386 # Do not prefer translated description if primary is empty
2387 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2388 'info_dict': {
2389 'id': 'el3E4MbxRqQ',
2390 'ext': 'mp4',
2391 'title': 'dlp test video 2 - primary sv no desc',
2392 'description': '',
2393 'channel': 'cole-dlp-test-acc',
2394 'tags': [],
2395 'view_count': int,
2396 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2397 'like_count': int,
2398 'playable_in_embed': True,
2399 'availability': 'unlisted',
2400 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2401 'age_limit': 0,
2402 'duration': 5,
2403 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2404 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2405 'live_status': 'not_live',
2406 'upload_date': '20220908',
2407 'categories': ['People & Blogs'],
2408 'uploader': 'cole-dlp-test-acc',
2409 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2410 },
2411 'params': {'skip_download': True}
2412 }, {
2413 # Extractor argument: prefer translated title+description
2414 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2415 'info_dict': {
2416 'id': 'gHKT4uU8Zng',
2417 'ext': 'mp4',
2418 'channel': 'cole-dlp-test-acc',
2419 'tags': [],
2420 'duration': 5,
2421 'live_status': 'not_live',
2422 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2423 'upload_date': '20220728',
2424 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2425 'view_count': int,
2426 'categories': ['People & Blogs'],
2427 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2428 'title': 'dlp test video title translated (fr)',
2429 'availability': 'public',
2430 'uploader': 'cole-dlp-test-acc',
2431 'age_limit': 0,
2432 'description': 'dlp test video description translated (fr)',
2433 'playable_in_embed': True,
2434 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2435 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2436 },
2437 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2438 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2439 }, {
2440 'note': '6 channel audio',
2441 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2442 'only_matching': True,
6e634cbe 2443 }
2eb88d95
PH
2444 ]
2445
f2e8dbcc 2446 _WEBPAGE_TESTS = [
2447 # YouTube <object> embed
2448 {
2449 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2450 'md5': '873c81d308b979f0e23ee7e620b312a3',
2451 'info_dict': {
2452 'id': 'msN87y-iEx0',
2453 'ext': 'mp4',
2454 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2455 'upload_date': '20080526',
2456 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2457 'uploader': 'Christopher Sykes',
2458 'uploader_id': 'ChristopherJSykes',
2459 'age_limit': 0,
2460 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2461 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2462 'playable_in_embed': True,
2463 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2464 'like_count': int,
2465 'comment_count': int,
2466 'channel': 'Christopher Sykes',
2467 'live_status': 'not_live',
2468 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2469 'availability': 'public',
2470 'duration': 195,
2471 'view_count': int,
2472 'categories': ['Science & Technology'],
2473 'channel_follower_count': int,
2474 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2475 },
2476 'params': {
2477 'skip_download': True,
2478 }
2479 },
2480 ]
2481
201c1459 2482 @classmethod
2483 def suitable(cls, url):
4dfbf869 2484 from ..utils import parse_qs
2485
201c1459 2486 qs = parse_qs(url)
2487 if qs.get('list', [None])[0]:
2488 return False
86e5f3ed 2489 return super().suitable(url)
201c1459 2490
e0df6211 2491 def __init__(self, *args, **kwargs):
86e5f3ed 2492 super().__init__(*args, **kwargs)
545cc85d 2493 self._code_cache = {}
83799698 2494 self._player_cache = {}
e0df6211 2495
adbc4ec4 2496 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2497 lock = threading.Lock()
2498
2499 is_live = True
185bf310 2500 start_time = time.time()
adbc4ec4
THD
2501 formats = [f for f in formats if f.get('is_from_start')]
2502
185bf310 2503 def refetch_manifest(format_id, delay):
2504 nonlocal formats, start_time, is_live
2505 if time.time() <= start_time + delay:
adbc4ec4
THD
2506 return
2507
2508 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2509 video_details = traverse_obj(
2510 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2511 microformats = traverse_obj(
2512 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2513 expected_type=dict, default=[])
c646d76f 2514 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2515 start_time = time.time()
adbc4ec4 2516
185bf310 2517 def mpd_feed(format_id, delay):
adbc4ec4
THD
2518 """
2519 @returns (manifest_url, manifest_stream_number, is_live) or None
2520 """
2521 with lock:
185bf310 2522 refetch_manifest(format_id, delay)
adbc4ec4
THD
2523
2524 f = next((f for f in formats if f['format_id'] == format_id), None)
2525 if not f:
185bf310 2526 if not is_live:
2527 self.to_screen(f'{video_id}: Video is no longer live')
2528 else:
2529 self.report_warning(
2530 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2531 return None
2532 return f['manifest_url'], f['manifest_stream_number'], is_live
2533
2534 for f in formats:
a539f065 2535 f['is_live'] = True
adbc4ec4
THD
2536 f['protocol'] = 'http_dash_segments_generator'
2537 f['fragments'] = functools.partial(
2538 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2539
2540 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2541 FETCH_SPAN, MAX_DURATION = 5, 432000
2542
2543 mpd_url, stream_number, is_live = None, None, True
2544
2545 begin_index = 0
2546 download_start_time = ctx.get('start') or time.time()
2547
2548 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2549 if lack_early_segments:
2550 self.report_warning(bug_reports_message(
2551 'Starting download from the last 120 hours of the live stream since '
2552 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2553 lack_early_segments = True
2554
2555 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2556 fragments, fragment_base_url = None, None
2557
a539f065 2558 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2559 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2560 # Obtain from MPD's maximum seq value
2561 old_mpd_url = mpd_url
185bf310 2562 last_error = ctx.pop('last_error', None)
14f25df2 2563 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2564 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2565 or (mpd_url, stream_number, False))
2566 if not refresh_sequence:
2567 if expire_fast and not is_live:
2568 return False, last_seq
2569 elif old_mpd_url == mpd_url:
2570 return True, last_seq
adbc4ec4
THD
2571 try:
2572 fmts, _ = self._extract_mpd_formats_and_subtitles(
2573 mpd_url, None, note=False, errnote=False, fatal=False)
2574 except ExtractorError:
2575 fmts = None
2576 if not fmts:
a539f065 2577 no_fragment_score += 2
adbc4ec4
THD
2578 return False, last_seq
2579 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2580 fragments = fmt_info['fragments']
2581 fragment_base_url = fmt_info['fragment_base_url']
2582 assert fragment_base_url
2583
2584 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2585 return True, _last_seq
2586
2587 while is_live:
2588 fetch_time = time.time()
2589 if no_fragment_score > 30:
2590 return
2591 if last_segment_url:
2592 # Obtain from "X-Head-Seqnum" header value from each segment
2593 try:
2594 urlh = self._request_webpage(
2595 last_segment_url, None, note=False, errnote=False, fatal=False)
2596 except ExtractorError:
2597 urlh = None
2598 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2599 if last_seq is None:
a539f065 2600 no_fragment_score += 2
adbc4ec4
THD
2601 last_segment_url = None
2602 continue
2603 else:
a539f065
LNO
2604 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2605 no_fragment_score += 2
185bf310 2606 if not should_continue:
adbc4ec4
THD
2607 continue
2608
2609 if known_idx > last_seq:
2610 last_segment_url = None
2611 continue
2612
2613 last_seq += 1
2614
2615 if begin_index < 0 and known_idx < 0:
2616 # skip from the start when it's negative value
2617 known_idx = last_seq + begin_index
2618 if lack_early_segments:
2619 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2620 try:
2621 for idx in range(known_idx, last_seq):
2622 # do not update sequence here or you'll get skipped some part of it
a539f065 2623 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2624 if not should_continue:
adbc4ec4
THD
2625 known_idx = idx - 1
2626 raise ExtractorError('breaking out of outer loop')
2627 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2628 yield {
2629 'url': last_segment_url,
36195c44 2630 'fragment_count': last_seq,
adbc4ec4
THD
2631 }
2632 if known_idx == last_seq:
2633 no_fragment_score += 5
2634 else:
2635 no_fragment_score = 0
2636 known_idx = last_seq
2637 except ExtractorError:
2638 continue
2639
2640 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2641
b6de707d 2642 def _extract_player_url(self, *ytcfgs, webpage=None):
2643 player_url = traverse_obj(
2644 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2645 get_all=False, expected_type=str)
11f9be09 2646 if not player_url:
b6de707d 2647 return
60f393e4 2648 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2649
b6de707d 2650 def _download_player_url(self, video_id, fatal=False):
2651 res = self._download_webpage(
2652 'https://www.youtube.com/iframe_api',
2653 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2654 if res:
2655 player_version = self._search_regex(
2656 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2657 if player_version:
2658 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2659
60064c53
PH
2660 def _signature_cache_id(self, example_sig):
2661 """ Return a string representation of a signature """
14f25df2 2662 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2663
e40c758c
S
2664 @classmethod
2665 def _extract_player_info(cls, player_url):
2666 for player_re in cls._PLAYER_INFO_RE:
2667 id_m = re.search(player_re, player_url)
2668 if id_m:
2669 break
2670 else:
c081b35c 2671 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2672 return id_m.group('id')
e40c758c 2673
404f611f 2674 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2675 player_id = self._extract_player_info(player_url)
2676 if player_id not in self._code_cache:
1276a43a 2677 code = self._download_webpage(
109dd3b2 2678 player_url, video_id, fatal=fatal,
2679 note='Downloading player ' + player_id,
2680 errnote='Download of %s failed' % player_url)
1276a43a 2681 if code:
2682 self._code_cache[player_id] = code
404f611f 2683 return self._code_cache.get(player_id)
109dd3b2 2684
e40c758c 2685 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2686 player_id = self._extract_player_info(player_url)
e0df6211 2687
c4417ddb 2688 # Read from filesystem cache
86e5f3ed 2689 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2690 assert os.path.basename(func_id) == func_id
a0e07d31 2691
ae61d108 2692 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2693 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2694
580ce007 2695 if not cache_spec:
2696 code = self._load_player(video_id, player_url)
404f611f 2697 if code:
109dd3b2 2698 res = self._parse_sig_js(code)
ac668111 2699 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2700 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2701 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2702
2703 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2704
60064c53 2705 def _print_sig_code(self, func, example_sig):
404f611f 2706 if not self.get_param('youtube_print_sig_code'):
2707 return
2708
edf3e38e
PH
2709 def gen_sig_code(idxs):
2710 def _genslice(start, end, step):
78caa52a 2711 starts = '' if start == 0 else str(start)
8bcc8756 2712 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2713 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2714 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2715
2716 step = None
7af808a5
PH
2717 # Quelch pyflakes warnings - start will be set when step is set
2718 start = '(Never used)'
edf3e38e
PH
2719 for i, prev in zip(idxs[1:], idxs[:-1]):
2720 if step is not None:
2721 if i - prev == step:
2722 continue
2723 yield _genslice(start, prev, step)
2724 step = None
2725 continue
2726 if i - prev in [-1, 1]:
2727 step = i - prev
2728 start = prev
2729 continue
2730 else:
78caa52a 2731 yield 's[%d]' % prev
edf3e38e 2732 if step is None:
78caa52a 2733 yield 's[%d]' % i
edf3e38e
PH
2734 else:
2735 yield _genslice(start, i, step)
2736
ac668111 2737 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2738 cache_res = func(test_string)
edf3e38e 2739 cache_spec = [ord(c) for c in cache_res]
78caa52a 2740 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2741 signature_id_tuple = '(%s)' % (
14f25df2 2742 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2743 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2744 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2745 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2746
e0df6211
PH
2747 def _parse_sig_js(self, jscode):
2748 funcname = self._search_regex(
abefc03f
S
2749 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2750 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2751 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2752 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2753 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2754 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2755 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2756 # Obsolete patterns
2757 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2758 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2759 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2760 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2761 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2762 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2763 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2764 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2765 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2766
2767 jsi = JSInterpreter(jscode)
2768 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2769 return lambda s: initial_function([s])
2770
580ce007 2771 def _cached(self, func, *cache_id):
2772 def inner(*args, **kwargs):
2773 if cache_id not in self._player_cache:
2774 try:
2775 self._player_cache[cache_id] = func(*args, **kwargs)
2776 except ExtractorError as e:
2777 self._player_cache[cache_id] = e
2778 except Exception as e:
2779 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2780
2781 ret = self._player_cache[cache_id]
2782 if isinstance(ret, Exception):
2783 raise ret
2784 return ret
2785 return inner
2786
545cc85d 2787 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2788 """Turn the encrypted s field into a working signature"""
580ce007 2789 extract_sig = self._cached(
2790 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2791 func = extract_sig(video_id, player_url, s)
2792 self._print_sig_code(func, s)
2793 return func(s)
404f611f 2794
2795 def _decrypt_nsig(self, s, video_id, player_url):
2796 """Turn the encrypted n field into a working signature"""
2797 if player_url is None:
2798 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2799 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2800
b505e851 2801 try:
2802 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2803 except ExtractorError as e:
2804 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2805 if self.get_param('youtube_print_sig_code'):
2806 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2807
25836db6 2808 try:
2809 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2810 ret = extract_nsig(jsi, func_code)(s)
2811 except JSInterpreter.Exception as e:
2812 try:
992dc6b4 2813 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2814 except ExtractorError:
2815 raise e
2816 self.report_warning(
2817 f'Native nsig extraction failed: Trying with PhantomJS\n'
2818 f' n = {s} ; player = {player_url}', video_id)
2819 self.write_debug(e)
2820
2821 args, func_body = func_code
2822 ret = jsi.execute(
2823 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2824 video_id=video_id, note='Executing signature code').strip()
580ce007 2825
2826 self.write_debug(f'Decrypted nsig {s} => {ret}')
2827 return ret
2828
90a1df30 2829 def _extract_n_function_name(self, jscode):
2830 funcname, idx = self._search_regex(
2831 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2832 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2833 if not idx:
2834 return funcname
2835
2836 return json.loads(js_to_json(self._search_regex(
2837 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2838 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2839
580ce007 2840 def _extract_n_function_code(self, video_id, player_url):
404f611f 2841 player_id = self._extract_player_info(player_url)
05deb747 2842 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 2843 jscode = func_code or self._load_player(video_id, player_url)
2844 jsi = JSInterpreter(jscode)
404f611f 2845
2846 if func_code:
580ce007 2847 return jsi, player_id, func_code
404f611f 2848
b505e851 2849 func_name = self._extract_n_function_name(jscode)
2850
2851 # For redundancy
2852 func_code = self._search_regex(
2853 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2854 # NB: The end of the regex is intentionally kept strict
2855 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2856 jscode, 'nsig function', group=('var', 'code'), default=None)
2857 if func_code:
2858 func_code = ([func_code[0]], func_code[1])
2859 else:
2860 self.write_debug('Extracting nsig function with jsinterp')
2861 func_code = jsi.extract_function_code(func_name)
2862
580ce007 2863 self.cache.store('youtube-nsig', player_id, func_code)
2864 return jsi, player_id, func_code
2865
2866 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2867 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2868
580ce007 2869 def extract_nsig(s):
25836db6 2870 try:
2871 ret = func([s])
2872 except JSInterpreter.Exception:
2873 raise
2874 except Exception as e:
2875 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2876
f6ca640b 2877 if ret.startswith('enhanced_except_'):
25836db6 2878 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2879 return ret
580ce007 2880
2881 return extract_nsig
e0df6211 2882
109dd3b2 2883 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2884 """
2885 Extract signatureTimestamp (sts)
2886 Required to tell API what sig/player version is in use.
2887 """
2888 sts = None
2889 if isinstance(ytcfg, dict):
2890 sts = int_or_none(ytcfg.get('STS'))
2891
2892 if not sts:
2893 # Attempt to extract from player
2894 if player_url is None:
2895 error_msg = 'Cannot extract signature timestamp without player_url.'
2896 if fatal:
2897 raise ExtractorError(error_msg)
2898 self.report_warning(error_msg)
2899 return
404f611f 2900 code = self._load_player(video_id, player_url, fatal=fatal)
2901 if code:
109dd3b2 2902 sts = int_or_none(self._search_regex(
2903 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2904 'JS player signature timestamp', group='sts', fatal=fatal))
2905 return sts
2906
11f9be09 2907 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2908 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2909 label = 'fully ' if is_full else ''
2910 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2911 expected_type=url_or_none)
2912 if not url:
2913 self.report_warning(f'Unable to mark {label}watched')
2914 return
14f25df2 2915 parsed_url = urllib.parse.urlparse(url)
2916 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2917
2918 # cpn generation algorithm is reverse engineered from base.js.
2919 # In fact it works even with dummy cpn.
2920 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2921 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2922
2923 # # more consistent results setting it to right before the end
2924 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2925
2926 qs.update({
2927 'ver': ['2'],
2928 'cpn': [cpn],
2929 'cmt': video_length,
2930 'el': 'detailpage', # otherwise defaults to "shorts"
2931 })
2932
2933 if is_full:
2934 # these seem to mark watchtime "history" in the real world
2935 # they're required, so send in a single value
2936 qs.update({
2937 'st': video_length,
2938 'et': video_length,
2939 })
2940
14f25df2 2941 url = urllib.parse.urlunparse(
2942 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2943
2944 self._download_webpage(
2945 url, video_id, f'Marking {label}watched',
2946 'Unable to mark watched', fatal=False)
d77ab8e2 2947
bfd973ec 2948 @classmethod
2949 def _extract_from_webpage(cls, url, webpage):
2950 # Invidious Instances
2951 # https://github.com/yt-dlp/yt-dlp/issues/195
2952 # https://github.com/iv-org/invidious/pull/1730
2953 mobj = re.search(
2954 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2955 webpage)
2956 if mobj:
2957 yield cls.url_result(mobj.group('url'), cls)
2958 raise cls.StopExtraction()
2959
2960 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2961
2962 # lazyYT YouTube embed
bfd973ec 2963 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2964 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2965
2966 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2967 for m in re.findall(r'''(?x)<div[^>]+
2968 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2969 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2970 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2971
97665381
PH
2972 @classmethod
2973 def extract_id(cls, url):
ae61d108 2974 video_id = cls.get_temp_id(url)
2975 if not video_id:
2976 raise ExtractorError(f'Invalid URL: {url}')
2977 return video_id
c5e8d7af 2978
7c365c21 2979 def _extract_chapters_from_json(self, data, duration):
2980 chapter_list = traverse_obj(
2981 data, (
2982 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2983 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2984 ), expected_type=list)
2985
2986 return self._extract_chapters(
2987 chapter_list,
2988 chapter_time=lambda chapter: float_or_none(
2989 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2990 chapter_title=lambda chapter: traverse_obj(
2991 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2992 duration=duration)
2993
2994 def _extract_chapters_from_engagement_panel(self, data, duration):
2995 content_list = traverse_obj(
8bdd16b4 2996 data,
7c365c21 2997 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2998 expected_type=list, default=[])
052e1350 2999 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3000 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3001
1890fc63 3002 return next(filter(None, (
3003 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3004 chapter_time, chapter_title, duration)
3005 for contents in content_list)), [])
7c365c21 3006
1890fc63 3007 def _extract_chapters_from_description(self, description, duration):
3008 return self._extract_chapters(
3009 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
3010 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3011 duration=duration, strict=False)
84213ea8 3012
1890fc63 3013 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3014 if not duration:
3015 return
3016 chapter_list = [{
3017 'start_time': chapter_time(chapter),
3018 'title': chapter_title(chapter),
3019 } for chapter in chapter_list or []]
3020 if not strict:
3021 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3022
a3976e07 3023 chapters = [{'start_time': 0}]
1890fc63 3024 for idx, chapter in enumerate(chapter_list):
a3976e07 3025 if chapter['start_time'] is None:
1890fc63 3026 self.report_warning(f'Incomplete chapter {idx}')
3027 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 3028 chapters.append(chapter)
709ee214 3029 elif chapter not in chapters:
3030 self.report_warning(
3031 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
a3976e07 3032 return chapters[1:]
84213ea8 3033
a1c5d2ca
M
3034 def _extract_comment(self, comment_renderer, parent=None):
3035 comment_id = comment_renderer.get('commentId')
3036 if not comment_id:
3037 return
fe93e2c4 3038
052e1350 3039 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 3040
c26f9b99 3041 # Timestamp is an estimate calculated from the current time and time_text
3042 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3043 timestamp = self._parse_time_text(time_text)
3044
052e1350 3045 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 3046 author_id = try_get(comment_renderer,
14f25df2 3047 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 3048
49bd8c66 3049 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 3050 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 3051 author_thumbnail = try_get(comment_renderer,
14f25df2 3052 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
3053
3054 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 3055 is_favorited = 'creatorHeart' in (try_get(
3056 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
3057 return {
3058 'id': comment_id,
3059 'text': text,
d92f5d5a 3060 'timestamp': timestamp,
a1c5d2ca
M
3061 'time_text': time_text,
3062 'like_count': votes,
97524332 3063 'is_favorited': is_favorited,
a1c5d2ca
M
3064 'author': author,
3065 'author_id': author_id,
3066 'author_thumbnail': author_thumbnail,
3067 'author_is_uploader': author_is_uploader,
3068 'parent': parent or 'root'
3069 }
3070
46383212 3071 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3072
3073 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3074
3075 def extract_header(contents):
2d6659b9 3076 _continuation = None
3077 for content in contents:
46383212 3078 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3079 expected_comment_count = self._get_count(
3080 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3081
2d6659b9 3082 if expected_comment_count:
46383212 3083 tracker['est_total'] = expected_comment_count
3084 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3085 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3086
3087 sort_menu_item = try_get(
3088 comments_header_renderer,
3089 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3090 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3091
3092 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3093 if not _continuation:
3094 continue
3095
46383212 3096 sort_text = str_or_none(sort_menu_item.get('title'))
3097 if not sort_text:
2d6659b9 3098 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3099 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3100 break
a2160aa4 3101 return _continuation
a1c5d2ca 3102
2d6659b9 3103 def extract_thread(contents):
a1c5d2ca 3104 if not parent:
46383212 3105 tracker['current_page_thread'] = 0
a1c5d2ca 3106 for content in contents:
46383212 3107 if not parent and tracker['total_parent_comments'] >= max_parents:
3108 yield
a1c5d2ca 3109 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3110 comment_renderer = get_first(
3111 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3112 expected_type=dict, default={})
a1c5d2ca 3113
a1c5d2ca
M
3114 comment = self._extract_comment(comment_renderer, parent)
3115 if not comment:
3116 continue
46383212 3117
3118 tracker['running_total'] += 1
3119 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3120 yield comment
46383212 3121
a1c5d2ca
M
3122 # Attempt to get the replies
3123 comment_replies_renderer = try_get(
3124 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3125
3126 if comment_replies_renderer:
46383212 3127 tracker['current_page_thread'] += 1
a1c5d2ca 3128 comment_entries_iter = self._comment_entries(
99e9e001 3129 comment_replies_renderer, ytcfg, video_id,
46383212 3130 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3131 yield from itertools.islice(comment_entries_iter, min(
3132 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3133
46383212 3134 # Keeps track of counts across recursive calls
3135 if not tracker:
3136 tracker = dict(
3137 running_total=0,
3138 est_total=0,
3139 current_page_thread=0,
3140 total_parent_comments=0,
3141 total_reply_comments=0)
3142
3143 # TODO: Deprecated
2d6659b9 3144 # YouTube comments have a max depth of 2
46383212 3145 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3146 if max_depth:
da4db748 3147 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3148 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3149 if max_depth == 1 and parent:
3150 return
a1c5d2ca 3151
46383212 3152 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3153 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3154
46383212 3155 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3156
46383212 3157 response = None
6e634cbe 3158 is_forced_continuation = False
2d6659b9 3159 is_first_continuation = parent is None
6e634cbe 3160 if is_first_continuation and not continuation:
3161 # Sometimes you can get comments by generating the continuation yourself,
3162 # even if YouTube initially reports them being disabled - e.g. stories comments.
3163 # Note: if the comment section is actually disabled, YouTube may return a response with
3164 # required check_get_keys missing. So we will disable that check initially in this case.
3165 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3166 is_forced_continuation = True
a1c5d2ca
M
3167
3168 for page_num in itertools.count(0):
3169 if not continuation:
3170 break
46383212 3171 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3172 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3173 if page_num == 0:
3174 if is_first_continuation:
3175 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3176 else:
2d6659b9 3177 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3178 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3179 else:
3180 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3181 ' ' if parent else '', ' replies' if parent else '',
3182 page_num, comment_prog_str)
3183
3184 response = self._extract_response(
fe93e2c4 3185 item_id=None, query=continuation,
2d6659b9 3186 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 3187 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3188 is_forced_continuation = False
46383212 3189 continuation_contents = traverse_obj(
3190 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3191
2d6659b9 3192 continuation = None
46383212 3193 for continuation_section in continuation_contents:
3194 continuation_items = traverse_obj(
3195 continuation_section,
3196 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3197 get_all=False, expected_type=list) or []
3198 if is_first_continuation:
3199 continuation = extract_header(continuation_items)
3200 is_first_continuation = False
2d6659b9 3201 if continuation:
a1c5d2ca 3202 break
46383212 3203 continue
a1c5d2ca 3204
46383212 3205 for entry in extract_thread(continuation_items):
3206 if not entry:
3207 return
3208 yield entry
3209 continuation = self._extract_continuation({'contents': continuation_items})
3210 if continuation:
2d6659b9 3211 break
a1c5d2ca 3212
6e634cbe 3213 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3214 if message and not parent and tracker['running_total'] == 0:
3215 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3216
3217 @staticmethod
3218 def _generate_comment_continuation(video_id):
3219 """
3220 Generates initial comment section continuation token from given video id
3221 """
3222 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3223 return base64.b64encode(token.encode()).decode()
3224
a2160aa4 3225 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3226 """Entry for comment extraction"""
2d6659b9 3227 def _real_comment_extract(contents):
aae16f6e 3228 renderer = next((
3229 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3230 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3231 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3232
a2160aa4 3233 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3234 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3235
109dd3b2 3236 @staticmethod
99e9e001 3237 def _get_checkok_params():
3238 return {'contentCheckOk': True, 'racyCheckOk': True}
3239
3240 @classmethod
3241 def _generate_player_context(cls, sts=None):
109dd3b2 3242 context = {
3243 'html5Preference': 'HTML5_PREF_WANTS',
3244 }
3245 if sts is not None:
3246 context['signatureTimestamp'] = sts
3247 return {
3248 'playbackContext': {
3249 'contentPlaybackContext': context
a1a7907b 3250 },
99e9e001 3251 **cls._get_checkok_params()
109dd3b2 3252 }
3253
e7e94f2a
D
3254 @staticmethod
3255 def _is_agegated(player_response):
3256 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3257 return True
e7e94f2a
D
3258
3259 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3260 AGE_GATE_REASONS = (
3261 'confirm your age', 'age-restricted', 'inappropriate', # reason
3262 'age_verification_required', 'age_check_required', # status
3263 )
3264 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3265
3266 @staticmethod
3267 def _is_unplayable(player_response):
3268 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3269
50ac0e54 3270 _STORY_PLAYER_PARAMS = '8AEB'
3271
3272 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3273
11f9be09 3274 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3275 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3276 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3277 headers = self.generate_api_headers(
99e9e001 3278 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3279
6e634cbe 3280 yt_query = {
3281 'videoId': video_id,
6e634cbe 3282 }
50ac0e54 3283 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3284 yt_query['params'] = self._STORY_PLAYER_PARAMS
3285
11f9be09 3286 yt_query.update(self._generate_player_context(sts))
3287 return self._extract_response(
3288 item_id=video_id, ep='player', query=yt_query,
379e44ed 3289 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3290 default_client=client,
11f9be09 3291 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3292 ) or None
3293
11f9be09 3294 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3295 requested_clients = []
d0d012d4 3296 default = ['android', 'web']
000c15a4 3297 allowed_clients = sorted(
86e5f3ed 3298 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3299 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3300 for client in self._configuration_arg('player_client'):
3301 if client in allowed_clients:
3302 requested_clients.append(client)
d0d012d4 3303 elif client == 'default':
3304 requested_clients.extend(default)
b4c055ba 3305 elif client == 'all':
3306 requested_clients.extend(allowed_clients)
3307 else:
3308 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3309 if not requested_clients:
d0d012d4 3310 requested_clients = default
cf7e015f 3311
11f9be09 3312 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3313 requested_clients.extend(
e7e94f2a 3314 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3315
11f9be09 3316 return orderedSet(requested_clients)
cf7e015f 3317
50ac0e54 3318 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3319 initial_pr = None
3320 if webpage:
b7c47b74 3321 initial_pr = self._search_json(
3322 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3323
ae729626 3324 all_clients = set(clients)
c0bc527b 3325 clients = clients[::-1]
b6de707d 3326 prs = []
e7e94f2a 3327
ae729626 3328 def append_client(*client_names):
e7870111 3329 """ Append the first client name that exists but not already used """
ae729626 3330 for client_name in client_names:
e7870111
D
3331 actual_client = _split_innertube_client(client_name)[0]
3332 if actual_client in INNERTUBE_CLIENTS:
3333 if actual_client not in all_clients:
ae729626 3334 clients.append(client_name)
e7870111
D
3335 all_clients.add(actual_client)
3336 return
e7e94f2a 3337
379e44ed 3338 # Android player_response does not have microFormats which are needed for
3339 # extraction of some data. So we return the initial_pr with formats
3340 # stripped out even if not requested by the user
3341 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3342 if initial_pr:
3343 pr = dict(initial_pr)
3344 pr['streamingData'] = None
b6de707d 3345 prs.append(pr)
379e44ed 3346
3347 last_error = None
b6de707d 3348 tried_iframe_fallback = False
3349 player_url = None
c0bc527b 3350 while clients:
e7870111 3351 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3352 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3353 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3354 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3355
b6de707d 3356 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3357 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3358 if 'js' in self._configuration_arg('player_skip'):
3359 require_js_player = False
3360 player_url = None
3361
3362 if not player_url and not tried_iframe_fallback and require_js_player:
3363 player_url = self._download_player_url(video_id)
3364 tried_iframe_fallback = True
3365
379e44ed 3366 try:
3367 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3368 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3369 except ExtractorError as e:
3370 if last_error:
3371 self.report_warning(last_error)
3372 last_error = e
3373 continue
3374
11f9be09 3375 if pr:
a3e96421 3376 # YouTube may return a different video player response than expected.
3377 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3378 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3379 if pr_video_id and pr_video_id != video_id:
3380 self.report_warning(
c7dcf0b3 3381 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3382 else:
3383 prs.append(pr)
c0bc527b 3384
e7e94f2a 3385 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3386 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3387 append_client(f'{base_client}_creator')
e7e94f2a 3388 elif self._is_agegated(pr):
e7870111
D
3389 if variant == 'tv_embedded':
3390 append_client(f'{base_client}_embedded')
3391 elif not variant:
3392 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3393
379e44ed 3394 if last_error:
b6de707d 3395 if not len(prs):
379e44ed 3396 raise last_error
3397 self.report_warning(last_error)
b6de707d 3398 return prs, player_url
11f9be09 3399
c646d76f 3400 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3401 itags, stream_ids = {}, []
b25cac65 3402 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3403 q = qualities([
2a9c6dcd 3404 # Normally tiny is the smallest video-only formats. But
3405 # audio-only formats with unknown quality may get tagged as tiny
3406 'tiny',
3407 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3408 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3409 ])
11f9be09 3410 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3411
545cc85d 3412 for fmt in streaming_formats:
727029c5 3413 if fmt.get('targetDurationSec'):
545cc85d 3414 continue
321bf820 3415
cc2db878 3416 itag = str_or_none(fmt.get('itag'))
9297939e 3417 audio_track = fmt.get('audioTrack') or {}
3418 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3419 if stream_id in stream_ids:
3420 continue
3421
cc2db878 3422 quality = fmt.get('quality')
2a9c6dcd 3423 height = int_or_none(fmt.get('height'))
d3fc8074 3424 if quality == 'tiny' or not quality:
3425 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3426 # The 3gp format (17) in android client has a quality of "small",
3427 # but is actually worse than other formats
3428 if itag == '17':
3429 quality = 'tiny'
3430 if quality:
3431 if itag:
3432 itag_qualities[itag] = quality
3433 if height:
3434 res_qualities[height] = quality
cc2db878 3435 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3436 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3437 # number of fragment that would subsequently requested with (`&sq=N`)
3438 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3439 continue
3440
545cc85d 3441 fmt_url = fmt.get('url')
3442 if not fmt_url:
14f25df2 3443 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3444 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3445 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3446 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3447 continue
52023f12 3448 try:
3449 fmt_url += '&%s=%s' % (
3450 traverse_obj(sc, ('sp', -1)) or 'signature',
3451 self._decrypt_signature(encrypted_sig, video_id, player_url)
3452 )
3453 except ExtractorError as e:
580ce007 3454 self.report_warning('Signature extraction failed: Some formats may be missing',
3455 video_id=video_id, only_once=True)
52023f12 3456 self.write_debug(e, only_once=True)
201e9eaa 3457 continue
545cc85d 3458
404f611f 3459 query = parse_qs(fmt_url)
3460 throttled = False
b2916526 3461 if query.get('n'):
404f611f 3462 try:
580ce007 3463 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3464 fmt_url = update_url_query(fmt_url, {
580ce007 3465 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3466 })
404f611f 3467 except ExtractorError as e:
25836db6 3468 phantomjs_hint = ''
3469 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3470 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3471 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3472 if player_url:
3473 self.report_warning(
3474 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3475 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3476 self.write_debug(e, only_once=True)
3477 else:
3478 self.report_warning(
3479 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3480 video_id=video_id, only_once=True)
404f611f 3481 throttled = True
3482
545cc85d 3483 if itag:
a0bb6ce5 3484 itags[itag] = 'https'
9297939e 3485 stream_ids.append(stream_id)
3486
0ad92dfb 3487 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3488 language_preference = (
3489 10 if audio_track.get('audioIsDefault') and 10
3490 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3491 else -1)
0ad92dfb 3492 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3493 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3494 # Make sure to avoid false positives with small duration differences.
62b58c09 3495 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3496 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3497 if is_damaged:
0f06bcd7 3498 self.report_warning(
3499 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3500 dct = {
3501 'asr': int_or_none(fmt.get('audioSampleRate')),
3502 'filesize': int_or_none(fmt.get('contentLength')),
3503 'format_id': itag,
34921b43 3504 'format_note': join_nonempty(
26e8e044 3505 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3506 ' (default)' if language_preference > 0 else ''),
404f611f 3507 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3508 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3509 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3510 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3511 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3512 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3513 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3514 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3515 'height': height,
dca3ff4a 3516 'quality': q(quality),
727029c5 3517 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3518 'tbr': tbr,
545cc85d 3519 'url': fmt_url,
2a9c6dcd 3520 'width': int_or_none(fmt.get('width')),
ab6df717 3521 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3522 'desc' if language_preference < -1 else ''),
3523 'language_preference': language_preference,
a405b38f 3524 # Strictly de-prioritize damaged and 3gp formats
3525 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3526 }
60bdb7bd 3527 mime_mobj = re.match(
3528 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3529 if mime_mobj:
3530 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3531 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3532 no_audio = dct.get('acodec') == 'none'
3533 no_video = dct.get('vcodec') == 'none'
3534 if no_audio:
3535 dct['vbr'] = tbr
3536 if no_video:
3537 dct['abr'] = tbr
3538 if no_audio or no_video:
545cc85d 3539 dct['downloader_options'] = {
3540 # Youtube throttles chunks >~10M
3541 'http_chunk_size': 10485760,
bf1317d2 3542 }
7c60c33e 3543 if dct.get('ext'):
3544 dct['container'] = dct['ext'] + '_dash'
11f9be09 3545 yield dct
545cc85d 3546
adbc4ec4 3547 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3548 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3549 if not self.get_param('youtube_include_hls_manifest', True):
3550 skip_manifests.append('hls')
0f06bcd7 3551 if not self.get_param('youtube_include_dash_manifest', True):
3552 skip_manifests.append('dash')
adbc4ec4
THD
3553 get_dash = 'dash' not in skip_manifests and (
3554 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3555 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3556
a0bb6ce5 3557 def process_manifest_format(f, proto, itag):
3558 if itag in itags:
3559 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3560 return False
3561 itag = f'{itag}-{proto}'
3562 if itag:
3563 f['format_id'] = itag
3564 itags[itag] = proto
3565
b25cac65 3566 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3567 if f['quality'] == -1 and f.get('height'):
3568 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3569 return True
2a9c6dcd 3570
c646d76f 3571 subtitles = {}
11f9be09 3572 for sd in streaming_data:
5d3a0e79 3573 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3574 if hls_manifest_url:
c646d76f 3575 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3576 subtitles = self._merge_subtitles(subs, subtitles)
3577 for f in fmts:
a0bb6ce5 3578 if process_manifest_format(f, 'hls', self._search_regex(
3579 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3580 yield f
545cc85d 3581
5d3a0e79 3582 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3583 if dash_manifest_url:
c646d76f 3584 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3585 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3586 for f in formats:
a0bb6ce5 3587 if process_manifest_format(f, 'dash', f['format_id']):
3588 f['filesize'] = int_or_none(self._search_regex(
3589 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3590 if live_from_start:
3591 f['is_from_start'] = True
3592
a0bb6ce5 3593 yield f
c646d76f 3594 yield subtitles
11f9be09 3595
720c3099 3596 def _extract_storyboard(self, player_responses, duration):
3597 spec = get_first(
3598 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3599 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3600 if not base_url:
720c3099 3601 return
720c3099 3602 L = len(spec) - 1
3603 for i, args in enumerate(spec):
3604 args = args.split('#')
3605 counts = list(map(int_or_none, args[:5]))
3606 if len(args) != 8 or not all(counts):
3607 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3608 continue
3609 width, height, frame_count, cols, rows = counts
3610 N, sigh = args[6:]
3611
3612 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3613 fragment_count = frame_count / (cols * rows)
3614 fragment_duration = duration / fragment_count
3615 yield {
3616 'format_id': f'sb{i}',
3617 'format_note': 'storyboard',
3618 'ext': 'mhtml',
3619 'protocol': 'mhtml',
3620 'acodec': 'none',
3621 'vcodec': 'none',
3622 'url': url,
3623 'width': width,
3624 'height': height,
45e8a04e 3625 'fps': frame_count / duration,
3626 'rows': rows,
3627 'columns': cols,
720c3099 3628 'fragments': [{
b3edc806 3629 'url': url.replace('$M', str(j)),
720c3099 3630 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3631 } for j in range(math.ceil(fragment_count))],
3632 }
3633
adbc4ec4 3634 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3635 webpage = None
3636 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3637 query = {'bpctr': '9999999999', 'has_verified': '1'}
3638 if smuggled_data.get('is_story'):
3639 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3640 webpage = self._download_webpage(
50ac0e54 3641 webpage_url, video_id, fatal=False, query=query)
11f9be09 3642
3643 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3644
b6de707d 3645 player_responses, player_url = self._extract_player_responses(
11f9be09 3646 self._get_requested_clients(url, smuggled_data),
50ac0e54 3647 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3648
adbc4ec4
THD
3649 return webpage, master_ytcfg, player_responses, player_url
3650
a1b2d843 3651 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3652 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3653 is_live = get_first(video_details, 'isLive')
3654 if is_live is None:
3655 is_live = get_first(live_broadcast_details, 'isLiveNow')
3656
3657 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3658 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3659
c646d76f 3660 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3661
3662 def _real_extract(self, url):
3663 url, smuggled_data = unsmuggle_url(url, {})
3664 video_id = self._match_id(url)
3665
3666 base_url = self.http_scheme() + '//www.youtube.com/'
3667 webpage_url = base_url + 'watch?v=' + video_id
3668
3669 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3670
11f9be09 3671 playability_statuses = traverse_obj(
3672 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3673
3674 trailer_video_id = get_first(
3675 playability_statuses,
3676 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3677 expected_type=str)
3678 if trailer_video_id:
3679 return self.url_result(
3680 trailer_video_id, self.ie_key(), trailer_video_id)
3681
3682 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3683 if webpage else (lambda x: None))
3684
3685 video_details = traverse_obj(
3686 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3687 microformats = traverse_obj(
3688 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3689 expected_type=dict, default=[])
c26f9b99 3690
3691 translated_title = self._get_text(microformats, (..., 'title'))
3692 video_title = (self._preferred_lang and translated_title
3693 or get_first(video_details, 'title') # primary
3694 or translated_title
3695 or search_meta(['og:title', 'twitter:title', 'title']))
3696 translated_description = self._get_text(microformats, (..., 'description'))
3697 original_description = get_first(video_details, 'shortDescription')
3698 video_description = (
3699 self._preferred_lang and translated_description
3700 # If original description is blank, it will be an empty string.
3701 # Do not prefer translated description in this case.
3702 or original_description if original_description is not None else translated_description)
11f9be09 3703
d89257f3 3704 multifeed_metadata_list = get_first(
3705 player_responses,
3706 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3707 expected_type=str)
3708 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3709 if self.get_param('noplaylist'):
11f9be09 3710 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3711 else:
3712 entries = []
3713 feed_ids = []
3714 for feed in multifeed_metadata_list.split(','):
3715 # Unquote should take place before split on comma (,) since textual
3716 # fields may contain comma as well (see
3717 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3718 feed_data = urllib.parse.parse_qs(
ac668111 3719 urllib.parse.unquote_plus(feed))
d89257f3 3720
3721 def feed_entry(name):
3722 return try_get(
14f25df2 3723 feed_data, lambda x: x[name][0], str)
d89257f3 3724
3725 feed_id = feed_entry('id')
3726 if not feed_id:
3727 continue
3728 feed_title = feed_entry('title')
3729 title = video_title
3730 if feed_title:
3731 title += ' (%s)' % feed_title
3732 entries.append({
3733 '_type': 'url_transparent',
3734 'ie_key': 'Youtube',
3735 'url': smuggle_url(
3736 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3737 {'force_singlefeed': True}),
3738 'title': title,
3739 })
3740 feed_ids.append(feed_id)
3741 self.to_screen(
3742 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3743 % (', '.join(feed_ids), video_id))
3744 return self.playlist_result(
3745 entries, video_id, video_title, video_description)
11f9be09 3746
a1b2d843 3747 duration = int_or_none(
3748 get_first(video_details, 'lengthSeconds')
3749 or get_first(microformats, 'lengthSeconds')
3750 or parse_duration(search_meta('duration'))) or None
3751
c646d76f 3752 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3753 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3754
545cc85d 3755 if not formats:
11f9be09 3756 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3757 self.report_drm(video_id)
11f9be09 3758 pemr = get_first(
3759 playability_statuses,
3760 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3761 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3762 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3763 if subreason:
545cc85d 3764 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3765 countries = get_first(microformats, 'availableCountries')
545cc85d 3766 if not countries:
3767 regions_allowed = search_meta('regionsAllowed')
3768 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3769 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3770 reason += f'. {subreason}'
545cc85d 3771 if reason:
b7da73eb 3772 self.raise_no_formats(reason, expected=True)
bf1317d2 3773
11f9be09 3774 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3775 if not keywords and webpage:
3776 keywords = [
3777 unescapeHTML(m.group('content'))
3778 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3779 for keyword in keywords:
3780 if keyword.startswith('yt:stretch='):
201c1459 3781 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3782 if mobj:
3783 # NB: float is intentional for forcing float division
3784 w, h = (float(v) for v in mobj.groups())
3785 if w > 0 and h > 0:
3786 ratio = w / h
3787 for f in formats:
3788 if f.get('vcodec') != 'none':
3789 f['stretched_ratio'] = ratio
3790 break
a709d873 3791 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3792 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3793 if thumbnail_url:
3794 thumbnails.append({
3795 'url': thumbnail_url,
ff2751ac 3796 })
fccf5021 3797 original_thumbnails = thumbnails.copy()
3798
0ba692ac 3799 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3800 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3801 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3802 thumbnail_names = [
962ffcf8 3803 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3804 # in resolution, these are not the custom thumbnail. So de-prioritize them
3805 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3806 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3807 ]
cca80fe6 3808 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3809 thumbnails.extend({
3810 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3811 video_id=video_id, name=name, ext=ext,
3812 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3813 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3814 for thumb in thumbnails:
cca80fe6 3815 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3816 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3817 self._remove_duplicate_formats(thumbnails)
fccf5021 3818 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3819
7ea65411 3820 category = get_first(microformats, 'category') or search_meta('genre')
3821 channel_id = str_or_none(
3822 get_first(video_details, 'channelId')
3823 or get_first(microformats, 'externalChannelId')
3824 or search_meta('channelId'))
7ea65411 3825 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3826
3827 live_content = get_first(video_details, 'isLiveContent')
3828 is_upcoming = get_first(video_details, 'isUpcoming')
3829 if is_live is None:
3830 if is_upcoming or live_content is False:
3831 is_live = False
3832 if is_upcoming is None and (live_content or is_live):
3833 is_upcoming = False
adbc4ec4
THD
3834 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3835 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3836 if not duration and live_end_time and live_start_time:
3837 duration = live_end_time - live_start_time
3838
3839 if is_live and self.get_param('live_from_start'):
3840 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3841
720c3099 3842 formats.extend(self._extract_storyboard(player_responses, duration))
3843
31b532a1 3844 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3845 self._sort_formats(formats, (
3846 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3847
545cc85d 3848 info = {
3849 'id': video_id,
39ca3b5c 3850 'title': video_title,
545cc85d 3851 'formats': formats,
3852 'thumbnails': thumbnails,
fccf5021 3853 # The best thumbnail that we are sure exists. Prevents unnecessary
3854 # URL checking if user don't care about getting the best possible thumbnail
3855 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3856 'description': video_description,
11f9be09 3857 'uploader': get_first(video_details, 'author'),
545cc85d 3858 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3859 'uploader_url': owner_profile_url,
3860 'channel_id': channel_id,
a70635b8 3861 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3862 'duration': duration,
3863 'view_count': int_or_none(
11f9be09 3864 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3865 or search_meta('interactionCount')),
11f9be09 3866 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3867 'age_limit': 18 if (
11f9be09 3868 get_first(microformats, 'isFamilySafe') is False
545cc85d 3869 or search_meta('isFamilyFriendly') == 'false'
3870 or search_meta('og:restrictions:age') == '18+') else 0,
3871 'webpage_url': webpage_url,
3872 'categories': [category] if category else None,
3873 'tags': keywords,
11f9be09 3874 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3875 'is_live': is_live,
3876 'was_live': (False if is_live or is_upcoming or live_content is False
3877 else None if is_live is None or is_upcoming is None
3878 else live_content),
3879 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3880 'release_timestamp': live_start_time,
545cc85d 3881 }
b477fc13 3882
e325a21a 3883 if get_first(video_details, 'isPostLiveDvr'):
3884 self.write_debug('Video is in Post-Live Manifestless mode')
3885 info['live_status'] = 'post_live'
3886 if (duration or 0) > 4 * 3600:
3887 self.report_warning(
3888 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3889 'This is a known issue and patches are welcome')
3890
c646d76f 3891 subtitles = {}
3944e7af 3892 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3893 if pctr:
ecdc9049 3894 def get_lang_code(track):
3895 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3896 or track.get('languageCode'))
3897
3898 # Converted into dicts to remove duplicates
3899 captions = {
3900 get_lang_code(sub): sub
3901 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3902 translation_languages = {
3903 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3904 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3905
774d79cc 3906 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3907 lang_subs = container.setdefault(lang_code, [])
545cc85d 3908 for fmt in self._SUBTITLE_FORMATS:
3909 query.update({
3910 'fmt': fmt,
3911 })
3912 lang_subs.append({
3913 'ext': fmt,
60f393e4 3914 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3915 'name': sub_name,
545cc85d 3916 })
7e72694b 3917
07b47084 3918 # NB: Constructing the full subtitle dictionary is slow
3919 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3920 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3921 for lang_code, caption_track in captions.items():
3922 base_url = caption_track.get('baseUrl')
1235d333 3923 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3924 if not base_url:
3925 continue
ecdc9049 3926 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3927 if caption_track.get('kind') != 'asr':
545cc85d 3928 if not lang_code:
3929 continue
3930 process_language(
ecdc9049 3931 subtitles, base_url, lang_code, lang_name, {})
3932 if not caption_track.get('isTranslatable'):
3933 continue
3944e7af 3934 for trans_code, trans_name in translation_languages.items():
3935 if not trans_code:
545cc85d 3936 continue
1235d333 3937 orig_trans_code = trans_code
ecdc9049 3938 if caption_track.get('kind') != 'asr':
07b47084 3939 if not get_translated_subs:
18e49408 3940 continue
ecdc9049 3941 trans_code += f'-{lang_code}'
a70635b8 3942 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3943 # Add an "-orig" label to the original language so that it can be distinguished.
3944 # The subs are returned without "-orig" as well for compatibility
1235d333 3945 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3946 process_language(
d49669ac 3947 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3948 # Setting tlang=lang returns damaged subtitles.
d49669ac 3949 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3950 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3951
3952 info['automatic_captions'] = automatic_captions
3953 info['subtitles'] = subtitles
7e72694b 3954
14f25df2 3955 parsed_url = urllib.parse.urlparse(url)
545cc85d 3956 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3957 query = urllib.parse.parse_qs(component)
545cc85d 3958 for k, v in query.items():
3959 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3960 d_k += '_time'
3961 if d_k not in info and k in s_ks:
3962 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3963
3964 # Youtube Music Auto-generated description
822b9d9c 3965 if video_description:
1890fc63 3966 mobj = re.search(
3967 r'''(?xs)
3968 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3969 (?P<album>[^\n]+)
3970 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3971 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3972 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3973 .+\nAuto-generated\ by\ YouTube\.\s*$
3974 ''', video_description)
822b9d9c 3975 if mobj:
822b9d9c
RA
3976 release_year = mobj.group('release_year')
3977 release_date = mobj.group('release_date')
3978 if release_date:
3979 release_date = release_date.replace('-', '')
3980 if not release_year:
545cc85d 3981 release_year = release_date[:4]
3982 info.update({
3983 'album': mobj.group('album'.strip()),
3984 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3985 'track': mobj.group('track').strip(),
3986 'release_date': release_date,
cc2db878 3987 'release_year': int_or_none(release_year),
545cc85d 3988 })
7e72694b 3989
545cc85d 3990 initial_data = None
3991 if webpage:
56ba69e4 3992 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3993 if not initial_data:
99e9e001 3994 query = {'videoId': video_id}
3995 query.update(self._get_checkok_params())
109dd3b2 3996 initial_data = self._extract_response(
3997 item_id=video_id, ep='next', fatal=False,
99e9e001 3998 ytcfg=master_ytcfg, query=query,
3999 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4000 note='Downloading initial data API JSON')
545cc85d 4001
0df111a3 4002 info['comment_count'] = traverse_obj(initial_data, (
4003 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4004 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4005 ), (
4006 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4007 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4008 ), expected_type=int_or_none, get_all=False)
4009
19a03940 4010 try: # This will error if there is no livechat
c60ee3a2 4011 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4012 except (KeyError, IndexError, TypeError):
4013 pass
4014 else:
ecdc9049 4015 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4016 # url is needed to set cookies
4017 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4018 'video_id': video_id,
4019 'ext': 'json',
f6745c49 4020 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 4021 }]
545cc85d 4022
4023 if initial_data:
7c365c21 4024 info['chapters'] = (
4025 self._extract_chapters_from_json(initial_data, duration)
4026 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4027 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4028 or None)
545cc85d 4029
17322130 4030 contents = traverse_obj(
4031 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4032 expected_type=list, default=[])
4033
4034 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4035 if vpir:
4036 stl = vpir.get('superTitleLink')
4037 if stl:
4038 stl = self._get_text(stl)
4039 if try_get(
4040 vpir,
4041 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4042 info['location'] = stl
4043 else:
affc4fef 4044 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4045 if mobj:
545cc85d 4046 info.update({
17322130 4047 'series': mobj.group(1),
4048 'season_number': int(mobj.group(2)),
4049 'episode_number': int(mobj.group(3)),
545cc85d 4050 })
17322130 4051 for tlb in (try_get(
4052 vpir,
4053 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4054 list) or []):
3ffb2f5b 4055 tbrs = variadic(
4056 traverse_obj(
4057 tlb, 'toggleButtonRenderer',
4058 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4059 default=[]))
4060 for tbr in tbrs:
4061 for getter, regex in [(
4062 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4063 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4064 lambda x: x['accessibility'],
4065 lambda x: x['accessibilityData']['accessibilityData'],
4066 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4067 label = (try_get(tbr, getter, dict) or {}).get('label')
4068 if label:
4069 mobj = re.match(regex, label)
4070 if mobj:
4071 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4072 break
17322130 4073 sbr_tooltip = try_get(
4074 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4075 if sbr_tooltip:
4076 like_count, dislike_count = sbr_tooltip.split(' / ')
4077 info.update({
4078 'like_count': str_to_int(like_count),
4079 'dislike_count': str_to_int(dislike_count),
4080 })
4081 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4082 if vsir:
4083 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4084 info.update({
4085 'channel': self._get_text(vor, 'title'),
4086 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4087
4088 rows = try_get(
4089 vsir,
4090 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4091 list) or []
4092 multiple_songs = False
4093 for row in rows:
4094 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4095 multiple_songs = True
4096 break
4097 for row in rows:
4098 mrr = row.get('metadataRowRenderer') or {}
4099 mrr_title = mrr.get('title')
4100 if not mrr_title:
4101 continue
4102 mrr_title = self._get_text(mrr, 'title')
4103 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4104 if mrr_title == 'License':
4105 info['license'] = mrr_contents_text
4106 elif not multiple_songs:
4107 if mrr_title == 'Album':
4108 info['album'] = mrr_contents_text
4109 elif mrr_title == 'Artist':
4110 info['artist'] = mrr_contents_text
4111 elif mrr_title == 'Song':
4112 info['track'] = mrr_contents_text
545cc85d 4113
4114 fallbacks = {
4115 'channel': 'uploader',
4116 'channel_id': 'uploader_id',
4117 'channel_url': 'uploader_url',
4118 }
992f9a73 4119
17322130 4120 # The upload date for scheduled, live and past live streams / premieres in microformats
4121 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4122 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4123 upload_date = (
4124 unified_strdate(get_first(microformats, 'uploadDate'))
4125 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4126 if not upload_date or (
4127 not info.get('is_live')
4128 and not info.get('was_live')
4129 and info.get('live_status') != 'is_upcoming'
4130 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4131 ):
c26f9b99 4132 upload_date = strftime_or_none(
4133 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4134 info['upload_date'] = upload_date
992f9a73 4135
545cc85d 4136 for to, frm in fallbacks.items():
4137 if not info.get(to):
4138 info[to] = info.get(frm)
4139
4140 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4141 v = info.get(s_k)
4142 if v:
4143 info[d_k] = v
b84071c0 4144
c26f9b99 4145 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4146
4147 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4148 or get_first(video_details, 'isPrivate', expected_type=bool))
4149
4150 info['availability'] = (
4151 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4152 else self._availability(
4153 is_private=is_private,
4154 needs_premium=(
4155 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4156 or False if initial_data and is_private is not None else None),
4157 needs_subscription=(
4158 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4159 or False if initial_data and is_private is not None else None),
4160 needs_auth=info['age_limit'] >= 18,
4161 is_unlisted=None if is_private is None else (
4162 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4163 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4164
a2160aa4 4165 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4166
11f9be09 4167 self.mark_watched(video_id, player_responses)
d77ab8e2 4168
545cc85d 4169 return info
c5e8d7af 4170
a61fd4cf 4171
a6213a49 4172class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 4173
182bda88 4174 @staticmethod
4175 def passthrough_smuggled_data(func):
4176 def _smuggle(entries, smuggled_data):
4177 for entry in entries:
4178 # TODO: Convert URL to music.youtube instead.
4179 # Do we need to passthrough any other smuggled_data?
4180 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4181 yield entry
4182
4183 @functools.wraps(func)
4184 def wrapper(self, url):
4185 url, smuggled_data = unsmuggle_url(url, {})
4186 if self.is_music_url(url):
4187 smuggled_data['is_music_url'] = True
4188 info_dict = func(self, url, smuggled_data)
4189 if smuggled_data and info_dict.get('entries'):
4190 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4191 return info_dict
4192 return wrapper
4193
a6213a49 4194 def _extract_channel_id(self, webpage):
4195 channel_id = self._html_search_meta(
4196 'channelId', webpage, 'channel id', default=None)
4197 if channel_id:
4198 return channel_id
4199 channel_url = self._html_search_meta(
4200 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4201 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4202 'twitter:app:url:googleplay'), webpage, 'channel url')
4203 return self._search_regex(
4204 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4205 channel_url, 'channel id')
15f6397c 4206
8bdd16b4 4207 @staticmethod
cd7c66cf 4208 def _extract_basic_item_renderer(item):
4209 # Modified from _extract_grid_item_renderer
201c1459 4210 known_basic_renderers = (
a17526e4 4211 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4212 )
4213 for key, renderer in item.items():
201c1459 4214 if not isinstance(renderer, dict):
cd7c66cf 4215 continue
201c1459 4216 elif key in known_basic_renderers:
4217 return renderer
4218 elif key.startswith('grid') and key.endswith('Renderer'):
4219 return renderer
8bdd16b4 4220
8bdd16b4 4221 def _grid_entries(self, grid_renderer):
4222 for item in grid_renderer['items']:
4223 if not isinstance(item, dict):
39b62db1 4224 continue
cd7c66cf 4225 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4226 if not isinstance(renderer, dict):
4227 continue
052e1350 4228 title = self._get_text(renderer, 'title')
fe93e2c4 4229
8bdd16b4 4230 # playlist
4231 playlist_id = renderer.get('playlistId')
4232 if playlist_id:
4233 yield self.url_result(
4234 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4235 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4236 video_title=title)
201c1459 4237 continue
8bdd16b4 4238 # video
4239 video_id = renderer.get('videoId')
4240 if video_id:
4241 yield self._extract_video(renderer)
201c1459 4242 continue
8bdd16b4 4243 # channel
4244 channel_id = renderer.get('channelId')
4245 if channel_id:
8bdd16b4 4246 yield self.url_result(
4247 'https://www.youtube.com/channel/%s' % channel_id,
4248 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4249 continue
4250 # generic endpoint URL support
4251 ep_url = urljoin('https://www.youtube.com/', try_get(
4252 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4253 str))
201c1459 4254 if ep_url:
4255 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4256 if ie.suitable(ep_url):
4257 yield self.url_result(
4258 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4259 break
8bdd16b4 4260
16aa9ea4 4261 def _music_reponsive_list_entry(self, renderer):
4262 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4263 if video_id:
4264 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4265 ie=YoutubeIE.ie_key(), video_id=video_id)
4266 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4267 if playlist_id:
4268 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4269 if video_id:
4270 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4271 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4272 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4273 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4274 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4275 if browse_id:
4276 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4277 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4278
3d3dddc9 4279 def _shelf_entries_from_content(self, shelf_renderer):
4280 content = shelf_renderer.get('content')
4281 if not isinstance(content, dict):
8bdd16b4 4282 return
cd7c66cf 4283 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4284 if renderer:
4285 # TODO: add support for nested playlists so each shelf is processed
4286 # as separate playlist
4287 # TODO: this includes only first N items
86e5f3ed 4288 yield from self._grid_entries(renderer)
3d3dddc9 4289 renderer = content.get('horizontalListRenderer')
4290 if renderer:
4291 # TODO
4292 pass
8bdd16b4 4293
29f7c58a 4294 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4295 ep = try_get(
4296 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4297 str)
8bdd16b4 4298 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4299 if shelf_url:
29f7c58a 4300 # Skipping links to another channels, note that checking for
4301 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4302 # will not work
4303 if skip_channels and '/channels?' in shelf_url:
4304 return
052e1350 4305 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4306 yield self.url_result(shelf_url, video_title=title)
4307 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4308 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4309
8bdd16b4 4310 def _playlist_entries(self, video_list_renderer):
4311 for content in video_list_renderer['contents']:
4312 if not isinstance(content, dict):
4313 continue
4314 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4315 if not isinstance(renderer, dict):
4316 continue
4317 video_id = renderer.get('videoId')
4318 if not video_id:
4319 continue
4320 yield self._extract_video(renderer)
07aeced6 4321
3462ffa8 4322 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4323 renderer = traverse_obj(
4324 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
3462ffa8 4325 video_id = renderer.get('videoId')
4326 if not video_id:
4327 return
4328 yield self._extract_video(renderer)
4329
8bdd16b4 4330 def _video_entry(self, video_renderer):
4331 video_id = video_renderer.get('videoId')
4332 if video_id:
4333 return self._extract_video(video_renderer)
dacb3a86 4334
ad210f4f 4335 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4336 url = urljoin('https://youtube.com', traverse_obj(
4337 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4338 if url:
4339 return self.url_result(
4340 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4341
8bdd16b4 4342 def _post_thread_entries(self, post_thread_renderer):
4343 post_renderer = try_get(
4344 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4345 if not post_renderer:
4346 return
4347 # video attachment
4348 video_renderer = try_get(
895b0931 4349 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4350 video_id = video_renderer.get('videoId')
4351 if video_id:
4352 entry = self._extract_video(video_renderer)
8bdd16b4 4353 if entry:
4354 yield entry
895b0931 4355 # playlist attachment
4356 playlist_id = try_get(
14f25df2 4357 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4358 if playlist_id:
4359 yield self.url_result(
e28f1c0a 4360 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4361 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4362 # inline video links
4363 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4364 for run in runs:
4365 if not isinstance(run, dict):
4366 continue
4367 ep_url = try_get(
14f25df2 4368 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4369 if not ep_url:
4370 continue
4371 if not YoutubeIE.suitable(ep_url):
4372 continue
4373 ep_video_id = YoutubeIE._match_id(ep_url)
4374 if video_id == ep_video_id:
4375 continue
895b0931 4376 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4377
8bdd16b4 4378 def _post_thread_continuation_entries(self, post_thread_continuation):
4379 contents = post_thread_continuation.get('contents')
4380 if not isinstance(contents, list):
4381 return
4382 for content in contents:
4383 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4384 if isinstance(renderer, dict):
4385 yield from self._post_thread_entries(renderer)
8bdd16b4 4386 continue
6b0b0a28 4387 renderer = content.get('videoRenderer')
4388 if isinstance(renderer, dict):
4389 yield self._video_entry(renderer)
07aeced6 4390
39ed931e 4391 r''' # unused
4392 def _rich_grid_entries(self, contents):
4393 for content in contents:
4394 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4395 if video_renderer:
4396 entry = self._video_entry(video_renderer)
4397 if entry:
4398 yield entry
4399 '''
52efa4b3 4400
0a5095fe 4401 def _report_history_entries(self, renderer):
4402 for url in traverse_obj(renderer, (
7a32c70d 4403 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4404 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4405 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4406 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4407
a6213a49 4408 def _extract_entries(self, parent_renderer, continuation_list):
4409 # continuation_list is modified in-place with continuation_list = [continuation_token]
4410 continuation_list[:] = [None]
4411 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4412 for content in contents:
4413 if not isinstance(content, dict):
4414 continue
16aa9ea4 4415 is_renderer = traverse_obj(
4416 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4417 expected_type=dict)
a6213a49 4418 if not is_renderer:
0a5095fe 4419 if content.get('richItemRenderer'):
4420 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4421 yield entry
4422 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4423 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4424 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4425 yield from self._report_history_entries(table)
4426 continuation_list[0] = self._extract_continuation(table)
a6213a49 4427 continue
0a5095fe 4428
a6213a49 4429 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4430 for isr_content in isr_contents:
4431 if not isinstance(isr_content, dict):
8bdd16b4 4432 continue
69184e41 4433
a6213a49 4434 known_renderers = {
4435 'playlistVideoListRenderer': self._playlist_entries,
4436 'gridRenderer': self._grid_entries,
a17526e4 4437 'reelShelfRenderer': self._grid_entries,
4438 'shelfRenderer': self._shelf_entries,
16aa9ea4 4439 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4440 'backstagePostThreadRenderer': self._post_thread_entries,
4441 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4442 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4443 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4444 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4445 }
4446 for key, renderer in isr_content.items():
4447 if key not in known_renderers:
4448 continue
4449 for entry in known_renderers[key](renderer):
4450 if entry:
4451 yield entry
4452 continuation_list[0] = self._extract_continuation(renderer)
4453 break
70d5c17b 4454
4455 if not continuation_list[0]:
a6213a49 4456 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4457
a6213a49 4458 if not continuation_list[0]:
4459 continuation_list[0] = self._extract_continuation(parent_renderer)
4460
4461 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4462 continuation_list = [None]
4463 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4464 tab_content = try_get(tab, lambda x: x['content'], dict)
4465 if not tab_content:
4466 return
3462ffa8 4467 parent_renderer = (
29f7c58a 4468 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4469 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4470 yield from extract_entries(parent_renderer)
3462ffa8 4471 continuation = continuation_list[0]
d069eca7 4472
8bdd16b4 4473 for page_num in itertools.count(1):
4474 if not continuation:
4475 break
99e9e001 4476 headers = self.generate_api_headers(
4477 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4478 response = self._extract_response(
86e5f3ed 4479 item_id=f'{item_id} page {page_num}',
fe93e2c4 4480 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4481 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4482
4483 if not response:
8bdd16b4 4484 break
ac56cf38 4485 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4486 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4487 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4488
a1b535bd 4489 known_renderers = {
e4b98809 4490 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4491 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4492 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4493 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4494 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4495 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4496 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4497 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4498 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4499 'playlistVideoListContinuation': (self._playlist_entries, None),
4500 'gridContinuation': (self._grid_entries, None),
4501 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4502 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4503 }
1fb53b94 4504
4505 continuation_items = traverse_obj(response, (
4506 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4507 'appendContinuationItemsAction', 'continuationItems'
4508 ), 'continuationContents', get_all=False)
4509 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4510
a1b535bd 4511 video_items_renderer = None
1fb53b94 4512 for key in continuation_item.keys():
a1b535bd 4513 if key not in known_renderers:
8bdd16b4 4514 continue
1fb53b94 4515 func, parent_key = known_renderers[key]
4516 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4517 continuation_list = [None]
1fb53b94 4518 yield from func(video_items_renderer)
9ba5705a 4519 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4520
4521 if not video_items_renderer:
a1b535bd 4522 break
9558dcec 4523
8bdd16b4 4524 @staticmethod
7c219ea6 4525 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4526 for tab in tabs:
cd684175 4527 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4528 if renderer.get('selected') is True:
4529 return renderer
2b3c2546 4530 else:
7c219ea6 4531 if fatal:
4532 raise ExtractorError('Unable to find selected tab')
b82f815f 4533
61d3665d 4534 def _extract_uploader(self, data):
8bdd16b4 4535 uploader = {}
61d3665d 4536 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4537 owner = try_get(
4538 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4539 if owner:
61d3665d 4540 owner_text = owner.get('text')
4541 uploader['uploader'] = self._search_regex(
4542 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4543 uploader['uploader_id'] = try_get(
14f25df2 4544 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4545 uploader['uploader_url'] = urljoin(
4546 'https://www.youtube.com/',
14f25df2 4547 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
7a32c70d 4548 return filter_dict(uploader)
8bdd16b4 4549
ac56cf38 4550 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4551 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4552 tags = []
b60419c5 4553
8bdd16b4 4554 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4555 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4556 renderer = try_get(
4557 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4558 if renderer:
b60419c5 4559 channel_name = renderer.get('title')
4560 channel_url = renderer.get('channelUrl')
4561 channel_id = renderer.get('externalId')
39ed931e 4562 else:
64c0d954 4563 renderer = try_get(
4564 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4565
8bdd16b4 4566 if renderer:
4567 title = renderer.get('title')
ecc97af3 4568 description = renderer.get('description', '')
b60419c5 4569 playlist_id = channel_id
4570 tags = renderer.get('keywords', '').split()
b60419c5 4571
301d07fc 4572 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4573 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4574 def _get_uncropped(url):
4575 return url_or_none((url or '').split('=')[0] + '=s0')
4576
4577 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4578 if avatar_thumbnails:
4579 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4580 if uncropped_avatar:
4581 avatar_thumbnails.append({
4582 'url': uncropped_avatar,
4583 'id': 'avatar_uncropped',
4584 'preference': 1
4585 })
4586
4587 channel_banners = self._extract_thumbnails(
4588 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4589 for banner in channel_banners:
4590 banner['preference'] = -10
4591
4592 if channel_banners:
4593 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4594 if uncropped_banner:
4595 channel_banners.append({
4596 'url': uncropped_banner,
4597 'id': 'banner_uncropped',
4598 'preference': -5
4599 })
4600
4601 primary_thumbnails = self._extract_thumbnails(
a17526e4 4602 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4603
3462ffa8 4604 if playlist_id is None:
70d5c17b 4605 playlist_id = item_id
f0d785d3 4606
4607 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
c26f9b99 4608 last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
70d5c17b 4609 if title is None:
f0d785d3 4610 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4611 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4612 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4613
b60419c5 4614 metadata = {
4615 'playlist_id': playlist_id,
4616 'playlist_title': title,
4617 'playlist_description': description,
4618 'uploader': channel_name,
4619 'uploader_id': channel_id,
4620 'uploader_url': channel_url,
301d07fc 4621 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4622 'tags': tags,
f0d785d3 4623 'view_count': self._get_count(playlist_stats, 1),
4624 'availability': self._extract_availability(data),
4625 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4626 'playlist_count': self._get_count(playlist_stats, 0),
4627 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4628 }
4629 if not channel_id:
4630 metadata.update(self._extract_uploader(data))
4631 metadata.update({
4632 'channel': metadata['uploader'],
4633 'channel_id': metadata['uploader_id'],
4634 'channel_url': metadata['uploader_url']})
4635 return self.playlist_result(
d069eca7 4636 self._entries(
ac56cf38 4637 selected_tab, playlist_id, ytcfg,
4638 self._extract_account_syncid(ytcfg, data),
4639 self._extract_visitor_data(data, ytcfg)),
b60419c5 4640 **metadata)
73c4ac2c 4641
6e634cbe 4642 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4643 first_id = last_id = response = None
2be71994 4644 for page_num in itertools.count(1):
cd7c66cf 4645 videos = list(self._playlist_entries(playlist))
4646 if not videos:
4647 return
2be71994 4648 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4649 if start >= len(videos):
4650 return
24146491 4651 yield from videos[start:]
2be71994 4652 first_id = first_id or videos[0]['id']
4653 last_id = videos[-1]['id']
79360d99 4654 watch_endpoint = try_get(
4655 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4656 headers = self.generate_api_headers(
4657 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4658 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4659 query = {
4660 'playlistId': playlist_id,
4661 'videoId': watch_endpoint.get('videoId') or last_id,
4662 'index': watch_endpoint.get('index') or len(videos),
4663 'params': watch_endpoint.get('params') or 'OAE%3D'
4664 }
4665 response = self._extract_response(
4666 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4667 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4668 check_get_keys='contents'
4669 )
cd7c66cf 4670 playlist = try_get(
79360d99 4671 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4672
ac56cf38 4673 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4674 title = playlist.get('title') or try_get(
14f25df2 4675 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4676 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4677
4678 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4679 playlist_url = urljoin(url, try_get(
4680 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4681 str))
6e634cbe 4682
4683 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4684 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4685 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4686
4687 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4688 return self.url_result(
4689 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4690 video_title=title)
cd7c66cf 4691
8bdd16b4 4692 return self.playlist_result(
6e634cbe 4693 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4694 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4695
47193e02 4696 def _extract_availability(self, data):
4697 """
4698 Gets the availability of a given playlist/tab.
4699 Note: Unless YouTube tells us explicitly, we do not assume it is public
4700 @param data: response
4701 """
47193e02 4702 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
c26f9b99 4703
4704 player_header_privacy = traverse_obj(
4705 data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
4706
4707 badges = self._extract_badges(renderer)
47193e02 4708
4709 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
c26f9b99 4710 privacy_setting_icon = traverse_obj(
4711 renderer, (
4712 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4713 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4714 get_all=False, expected_type=str)
47193e02 4715
c26f9b99 4716 return (
4717 'public' if (
4718 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4719 or player_header_privacy == 'PUBLIC'
4720 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4721 else self._availability(
4722 is_private=(
4723 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4724 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4725 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4726 is_unlisted=(
4727 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4728 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4729 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
4730 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4731 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4732 needs_auth=False))
47193e02 4733
4734 @staticmethod
4735 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4736 sidebar_renderer = try_get(
4737 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4738 for item in sidebar_renderer:
4739 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4740 if renderer:
4741 return renderer
4742
ac56cf38 4743 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4744 """
4745 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4746 """
5d342002 4747 browse_id = params = None
47193e02 4748 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4749 if not renderer:
4750 return
4751 menu_renderer = try_get(
4752 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4753 for menu_item in menu_renderer:
4754 if not isinstance(menu_item, dict):
358de58c 4755 continue
47193e02 4756 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4757 text = try_get(
14f25df2 4758 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4759 if not text or text.lower() != 'show unavailable videos':
4760 continue
4761 browse_endpoint = try_get(
4762 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4763 browse_id = browse_endpoint.get('browseId')
4764 params = browse_endpoint.get('params')
4765 break
5d342002 4766
11f9be09 4767 headers = self.generate_api_headers(
99e9e001 4768 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4769 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4770 query = {
4771 'params': params or 'wgYCCAA=',
4772 'browseId': browse_id or 'VL%s' % item_id
4773 }
4774 return self._extract_response(
4775 item_id=item_id, headers=headers, query=query,
fe93e2c4 4776 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4777 note='Downloading API JSON with unavailable videos')
358de58c 4778
2762dbb1 4779 @functools.cached_property
a25bca9f 4780 def skip_webpage(self):
4781 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4782
ac56cf38 4783 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4784 webpage, data = None, None
4785 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4786 try:
be5c1ae8 4787 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4788 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4789 except ExtractorError as e:
4790 if isinstance(e.cause, network_exceptions):
14f25df2 4791 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4792 retry.error = e
4793 continue
4794 self._error_or_warning(e, fatal=fatal)
14fdfea9 4795 break
ac56cf38 4796
be5c1ae8 4797 try:
4798 self._extract_and_report_alerts(data)
4799 except ExtractorError as e:
4800 self._error_or_warning(e, fatal=fatal)
4801 break
ac56cf38 4802
be5c1ae8 4803 # Sometimes youtube returns a webpage with incomplete ytInitialData
4804 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4805 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4806 retry.error = ExtractorError('Incomplete yt initial data received')
4807 continue
ac56cf38 4808
cd7c66cf 4809 return webpage, data
4810
a25bca9f 4811 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4812 """Use if failed to extract ytcfg (and data) from initial webpage"""
4813 if not ytcfg and self.is_authenticated:
4814 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4815 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4816 raise ExtractorError(
4817 f'{msg}. If you are not downloading private content, or '
4818 'your cookies are only for the first account and channel,'
4819 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4820 expected=True)
4821 self.report_warning(msg, only_once=True)
4822
ac56cf38 4823 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4824 data = None
a25bca9f 4825 if not self.skip_webpage:
ac56cf38 4826 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4827 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4828 # Reject webpage data if redirected to home page without explicitly requesting
4829 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4830 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4831 if (url != 'https://www.youtube.com/feed/recommended'
4832 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4833 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4834 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4835 if fatal:
4836 raise ExtractorError(msg, expected=True)
4837 self.report_warning(msg, only_once=True)
ac56cf38 4838 if not data:
a25bca9f 4839 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4840 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4841 return data, ytcfg
4842
4843 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4844 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4845 resolve_response = self._extract_response(
4846 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4847 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4848 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4849 for ep_key, ep in endpoints.items():
4850 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4851 if params:
4852 return self._extract_response(
4853 item_id=item_id, query=params, ep=ep, headers=headers,
4854 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4855 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4856 err_note = 'Failed to resolve url (does the playlist exist?)'
4857 if fatal:
4858 raise ExtractorError(err_note, expected=True)
4859 self.report_warning(err_note, item_id)
4860
a6213a49 4861 _SEARCH_PARAMS = None
4862
af5c1c55 4863 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4864 data = {'query': query}
4865 if params is NO_DEFAULT:
4866 params = self._SEARCH_PARAMS
4867 if params:
4868 data['params'] = params
16aa9ea4 4869
4870 content_keys = (
4871 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4872 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4873 # ytmusic search
4874 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4875 ('continuationContents', ),
4876 )
a25bca9f 4877 display_id = f'query "{query}"'
86e5f3ed 4878 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4879 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4880 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4881
a61fd4cf 4882 continuation_list = [None]
a25bca9f 4883 search = None
a6213a49 4884 for page_num in itertools.count(1):
a61fd4cf 4885 data.update(continuation_list[0] or {})
a25bca9f 4886 headers = self.generate_api_headers(
4887 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4888 search = self._extract_response(
a25bca9f 4889 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4890 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4891 slr_contents = traverse_obj(search, *content_keys)
4892 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4893 if not continuation_list[0]:
a6213a49 4894 break
4895
4896
4897class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4898 IE_DESC = 'YouTube Tabs'
4899 _VALID_URL = r'''(?x:
4900 https?://
4901 (?:\w+\.)?
4902 (?:
4903 youtube(?:kids)?\.com|
4904 %(invidious)s
4905 )/
4906 (?:
4907 (?P<channel_type>channel|c|user|browse)/|
4908 (?P<not_channel>
4909 feed/|hashtag/|
4910 (?:playlist|watch)\?.*?\blist=
4911 )|
4912 (?!(?:%(reserved_names)s)\b) # Direct URLs
4913 )
4914 (?P<id>[^/?\#&]+)
4915 )''' % {
4916 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4917 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4918 }
4919 IE_NAME = 'youtube:tab'
4920
4921 _TESTS = [{
4922 'note': 'playlists, multipage',
4923 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4924 'playlist_mincount': 94,
4925 'info_dict': {
4926 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4927 'title': 'Igor Kleiner - Playlists',
a6213a49 4928 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4929 'uploader': 'Igor Kleiner',
a6213a49 4930 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4931 'channel': 'Igor Kleiner',
4932 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4933 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4934 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4935 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4936 'channel_follower_count': int
a6213a49 4937 },
4938 }, {
4939 'note': 'playlists, multipage, different order',
4940 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4941 'playlist_mincount': 94,
4942 'info_dict': {
4943 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4944 'title': 'Igor Kleiner - Playlists',
a6213a49 4945 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4946 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4947 'uploader': 'Igor Kleiner',
4948 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4949 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4950 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4951 'channel': 'Igor Kleiner',
4952 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4953 'channel_follower_count': int
a6213a49 4954 },
4955 }, {
4956 'note': 'playlists, series',
4957 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4958 'playlist_mincount': 5,
4959 'info_dict': {
4960 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4961 'title': '3Blue1Brown - Playlists',
4962 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4963 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4964 'uploader': '3Blue1Brown',
976ae3ea 4965 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4966 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4967 'channel': '3Blue1Brown',
4968 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4969 'tags': ['Mathematics'],
6c73052c 4970 'channel_follower_count': int
a6213a49 4971 },
4972 }, {
4973 'note': 'playlists, singlepage',
4974 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4975 'playlist_mincount': 4,
4976 'info_dict': {
4977 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4978 'title': 'ThirstForScience - Playlists',
4979 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4980 'uploader': 'ThirstForScience',
4981 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4982 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4983 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4984 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4985 'tags': 'count:13',
4986 'channel': 'ThirstForScience',
6c73052c 4987 'channel_follower_count': int
a6213a49 4988 }
4989 }, {
4990 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4991 'only_matching': True,
4992 }, {
4993 'note': 'basic, single video playlist',
4994 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4995 'info_dict': {
4996 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4997 'uploader': 'Sergey M.',
4998 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4999 'title': 'youtube-dl public playlist',
976ae3ea 5000 'description': '',
5001 'tags': [],
5002 'view_count': int,
5003 'modified_date': '20201130',
5004 'channel': 'Sergey M.',
5005 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5006 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5007 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5008 'availability': 'public',
a6213a49 5009 },
5010 'playlist_count': 1,
5011 }, {
5012 'note': 'empty playlist',
5013 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5014 'info_dict': {
5015 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5016 'uploader': 'Sergey M.',
5017 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5018 'title': 'youtube-dl empty playlist',
976ae3ea 5019 'tags': [],
5020 'channel': 'Sergey M.',
5021 'description': '',
5022 'modified_date': '20160902',
5023 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5024 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5025 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5026 'availability': 'public',
a6213a49 5027 },
5028 'playlist_count': 0,
5029 }, {
5030 'note': 'Home tab',
5031 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5032 'info_dict': {
5033 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5034 'title': 'lex will - Home',
5035 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5036 'uploader': 'lex will',
5037 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5038 'channel': 'lex will',
5039 'tags': ['bible', 'history', 'prophesy'],
5040 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5041 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5042 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5043 'channel_follower_count': int
a6213a49 5044 },
5045 'playlist_mincount': 2,
5046 }, {
5047 'note': 'Videos tab',
5048 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5049 'info_dict': {
5050 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5051 'title': 'lex will - Videos',
5052 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5053 'uploader': 'lex will',
5054 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5055 'tags': ['bible', 'history', 'prophesy'],
5056 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5057 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5058 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5059 'channel': 'lex will',
6c73052c 5060 'channel_follower_count': int
a6213a49 5061 },
5062 'playlist_mincount': 975,
5063 }, {
5064 'note': 'Videos tab, sorted by popular',
5065 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5066 'info_dict': {
5067 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5068 'title': 'lex will - Videos',
5069 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5070 'uploader': 'lex will',
5071 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5072 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5073 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5074 'channel': 'lex will',
5075 'tags': ['bible', 'history', 'prophesy'],
5076 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5077 'channel_follower_count': int
a6213a49 5078 },
5079 'playlist_mincount': 199,
5080 }, {
5081 'note': 'Playlists tab',
5082 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5083 'info_dict': {
5084 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5085 'title': 'lex will - Playlists',
5086 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5087 'uploader': 'lex will',
5088 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5089 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5090 'channel': 'lex will',
5091 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5092 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5093 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5094 'channel_follower_count': int
a6213a49 5095 },
5096 'playlist_mincount': 17,
5097 }, {
5098 'note': 'Community tab',
5099 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5100 'info_dict': {
5101 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5102 'title': 'lex will - Community',
5103 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5104 'uploader': 'lex will',
5105 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5106 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5107 'channel': 'lex will',
5108 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5109 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5110 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5111 'channel_follower_count': int
a6213a49 5112 },
5113 'playlist_mincount': 18,
5114 }, {
5115 'note': 'Channels tab',
5116 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5117 'info_dict': {
5118 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5119 'title': 'lex will - Channels',
5120 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5121 'uploader': 'lex will',
5122 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5123 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5124 'channel': 'lex will',
5125 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5126 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5127 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5128 'channel_follower_count': int
a6213a49 5129 },
5130 'playlist_mincount': 12,
5131 }, {
5132 'note': 'Search tab',
5133 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5134 'playlist_mincount': 40,
5135 'info_dict': {
5136 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5137 'title': '3Blue1Brown - Search - linear algebra',
5138 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5139 'uploader': '3Blue1Brown',
5140 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5141 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5142 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5143 'tags': ['Mathematics'],
5144 'channel': '3Blue1Brown',
5145 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 5146 'channel_follower_count': int
a6213a49 5147 },
5148 }, {
5149 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5150 'only_matching': True,
5151 }, {
5152 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5153 'only_matching': True,
5154 }, {
5155 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5156 'only_matching': True,
5157 }, {
5158 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5159 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5160 'info_dict': {
5161 'title': '29C3: Not my department',
5162 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5163 'uploader': 'Christiaan008',
5164 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5165 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5166 'tags': [],
5167 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5168 'view_count': int,
5169 'modified_date': '20150605',
5170 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5171 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5172 'channel': 'Christiaan008',
c26f9b99 5173 'availability': 'public',
a6213a49 5174 },
5175 'playlist_count': 96,
5176 }, {
5177 'note': 'Large playlist',
5178 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5179 'info_dict': {
5180 'title': 'Uploads from Cauchemar',
5181 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5182 'uploader': 'Cauchemar',
5183 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5184 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5185 'tags': [],
5186 'modified_date': r're:\d{8}',
5187 'channel': 'Cauchemar',
5188 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5189 'view_count': int,
5190 'description': '',
5191 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5192 'availability': 'public',
a6213a49 5193 },
5194 'playlist_mincount': 1123,
976ae3ea 5195 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5196 }, {
5197 'note': 'even larger playlist, 8832 videos',
5198 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5199 'only_matching': True,
5200 }, {
5201 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5202 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5203 'info_dict': {
5204 'title': 'Uploads from Interstellar Movie',
5205 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5206 'uploader': 'Interstellar Movie',
5207 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5208 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5209 'tags': [],
5210 'view_count': int,
5211 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5212 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5213 'channel': 'Interstellar Movie',
5214 'description': '',
5215 'modified_date': r're:\d{8}',
c26f9b99 5216 'availability': 'public',
a6213a49 5217 },
5218 'playlist_mincount': 21,
5219 }, {
5220 'note': 'Playlist with "show unavailable videos" button',
5221 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5222 'info_dict': {
5223 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5224 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5225 'uploader': 'Phim Siêu Nhân Nhật Bản',
5226 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5227 'view_count': int,
5228 'channel': 'Phim Siêu Nhân Nhật Bản',
5229 'tags': [],
5230 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5231 'description': '',
5232 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5233 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5234 'modified_date': r're:\d{8}',
c26f9b99 5235 'availability': 'public',
a6213a49 5236 },
5237 'playlist_mincount': 200,
976ae3ea 5238 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5239 }, {
5240 'note': 'Playlist with unavailable videos in page 7',
5241 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5242 'info_dict': {
5243 'title': 'Uploads from BlankTV',
5244 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5245 'uploader': 'BlankTV',
5246 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5247 'channel': 'BlankTV',
5248 'channel_url': 'https://www.youtube.com/c/blanktv',
5249 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5250 'view_count': int,
5251 'tags': [],
5252 'uploader_url': 'https://www.youtube.com/c/blanktv',
5253 'modified_date': r're:\d{8}',
5254 'description': '',
c26f9b99 5255 'availability': 'public',
a6213a49 5256 },
5257 'playlist_mincount': 1000,
976ae3ea 5258 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5259 }, {
5260 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5261 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5262 'info_dict': {
5263 'title': 'Data Analysis with Dr Mike Pound',
5264 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5265 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5266 'uploader': 'Computerphile',
5267 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5268 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5269 'tags': [],
5270 'view_count': int,
5271 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5272 'channel_url': 'https://www.youtube.com/user/Computerphile',
5273 'channel': 'Computerphile',
c26f9b99 5274 'availability': 'public',
a6213a49 5275 },
5276 'playlist_mincount': 11,
5277 }, {
5278 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5279 'only_matching': True,
5280 }, {
5281 'note': 'Playlist URL that does not actually serve a playlist',
5282 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5283 'info_dict': {
5284 'id': 'FqZTN594JQw',
5285 'ext': 'webm',
5286 'title': "Smiley's People 01 detective, Adventure Series, Action",
5287 'uploader': 'STREEM',
5288 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5289 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5290 'upload_date': '20150526',
5291 'license': 'Standard YouTube License',
5292 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5293 'categories': ['People & Blogs'],
5294 'tags': list,
5295 'view_count': int,
5296 'like_count': int,
a6213a49 5297 },
5298 'params': {
5299 'skip_download': True,
5300 },
5301 'skip': 'This video is not available.',
5302 'add_ie': [YoutubeIE.ie_key()],
5303 }, {
5304 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5305 'only_matching': True,
5306 }, {
5307 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5308 'only_matching': True,
5309 }, {
5310 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5311 'info_dict': {
12a1b225 5312 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5313 'ext': 'mp4',
976ae3ea 5314 'title': str,
a6213a49 5315 'uploader': 'Sky News',
5316 'uploader_id': 'skynews',
5317 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5318 'upload_date': r're:\d{8}',
976ae3ea 5319 'description': str,
a6213a49 5320 'categories': ['News & Politics'],
5321 'tags': list,
5322 'like_count': int,
6c73052c 5323 'release_timestamp': 1642502819,
976ae3ea 5324 'channel': 'Sky News',
5325 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5326 'age_limit': 0,
5327 'view_count': int,
6c73052c 5328 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5329 'playable_in_embed': True,
6c73052c 5330 'release_date': '20220118',
976ae3ea 5331 'availability': 'public',
5332 'live_status': 'is_live',
5333 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5334 'channel_follower_count': int
a6213a49 5335 },
5336 'params': {
5337 'skip_download': True,
5338 },
976ae3ea 5339 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5340 }, {
5341 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5342 'info_dict': {
5343 'id': 'a48o2S1cPoo',
5344 'ext': 'mp4',
5345 'title': 'The Young Turks - Live Main Show',
5346 'uploader': 'The Young Turks',
5347 'uploader_id': 'TheYoungTurks',
5348 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5349 'upload_date': '20150715',
5350 'license': 'Standard YouTube License',
5351 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5352 'categories': ['News & Politics'],
5353 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5354 'like_count': int,
a6213a49 5355 },
5356 'params': {
5357 'skip_download': True,
5358 },
5359 'only_matching': True,
5360 }, {
5361 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5362 'only_matching': True,
5363 }, {
5364 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5365 'only_matching': True,
5366 }, {
5367 'note': 'A channel that is not live. Should raise error',
5368 'url': 'https://www.youtube.com/user/numberphile/live',
5369 'only_matching': True,
5370 }, {
5371 'url': 'https://www.youtube.com/feed/trending',
5372 'only_matching': True,
5373 }, {
5374 'url': 'https://www.youtube.com/feed/library',
5375 'only_matching': True,
5376 }, {
5377 'url': 'https://www.youtube.com/feed/history',
5378 'only_matching': True,
5379 }, {
5380 'url': 'https://www.youtube.com/feed/subscriptions',
5381 'only_matching': True,
5382 }, {
5383 'url': 'https://www.youtube.com/feed/watch_later',
5384 'only_matching': True,
5385 }, {
5386 'note': 'Recommended - redirects to home page.',
5387 'url': 'https://www.youtube.com/feed/recommended',
5388 'only_matching': True,
5389 }, {
5390 'note': 'inline playlist with not always working continuations',
5391 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5392 'only_matching': True,
5393 }, {
5394 'url': 'https://www.youtube.com/course',
5395 'only_matching': True,
5396 }, {
5397 'url': 'https://www.youtube.com/zsecurity',
5398 'only_matching': True,
5399 }, {
5400 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5401 'only_matching': True,
5402 }, {
5403 'url': 'https://www.youtube.com/TheYoungTurks/live',
5404 'only_matching': True,
5405 }, {
5406 'url': 'https://www.youtube.com/hashtag/cctv9',
5407 'info_dict': {
5408 'id': 'cctv9',
5409 'title': '#cctv9',
976ae3ea 5410 'tags': [],
a6213a49 5411 },
5412 'playlist_mincount': 350,
5413 }, {
5414 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5415 'only_matching': True,
5416 }, {
5417 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5418 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5419 'only_matching': True
5420 }, {
5421 'note': '/browse/ should redirect to /channel/',
5422 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5423 'only_matching': True
5424 }, {
5425 'note': 'VLPL, should redirect to playlist?list=PL...',
5426 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5427 'info_dict': {
5428 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5429 'uploader': 'NoCopyrightSounds',
5430 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5431 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5432 'title': 'NCS : All Releases 💿',
976ae3ea 5433 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5434 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5435 'modified_date': r're:\d{8}',
5436 'view_count': int,
5437 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5438 'tags': [],
5439 'channel': 'NoCopyrightSounds',
c26f9b99 5440 'availability': 'public',
a6213a49 5441 },
5442 'playlist_mincount': 166,
976ae3ea 5443 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5444 }, {
5445 'note': 'Topic, should redirect to playlist?list=UU...',
5446 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5447 'info_dict': {
5448 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5449 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5450 'title': 'Uploads from Royalty Free Music - Topic',
5451 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5452 'tags': [],
5453 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5454 'channel': 'Royalty Free Music - Topic',
5455 'view_count': int,
5456 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5457 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5458 'modified_date': r're:\d{8}',
5459 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5460 'description': '',
c26f9b99 5461 'availability': 'public',
a6213a49 5462 },
5463 'expected_warnings': [
a6213a49 5464 'The URL does not have a videos tab',
976ae3ea 5465 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5466 ],
5467 'playlist_mincount': 101,
5468 }, {
5469 'note': 'Topic without a UU playlist',
5470 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5471 'info_dict': {
5472 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5473 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5474 'tags': [],
a6213a49 5475 },
5476 'expected_warnings': [
976ae3ea 5477 'the playlist redirect gave error',
a6213a49 5478 ],
5479 'playlist_mincount': 9,
5480 }, {
5481 'note': 'Youtube music Album',
5482 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5483 'info_dict': {
5484 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5485 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5486 'tags': [],
5487 'view_count': int,
5488 'description': '',
5489 'availability': 'unlisted',
5490 'modified_date': r're:\d{8}',
a6213a49 5491 },
5492 'playlist_count': 50,
5493 }, {
5494 'note': 'unlisted single video playlist',
5495 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5496 'info_dict': {
5497 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5498 'uploader': 'colethedj',
5499 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5500 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5501 'availability': 'unlisted',
5502 'tags': [],
12a1b225 5503 'modified_date': '20220418',
976ae3ea 5504 'channel': 'colethedj',
5505 'view_count': int,
5506 'description': '',
5507 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5508 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5509 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5510 },
5511 'playlist_count': 1,
5512 }, {
5513 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5514 'url': 'https://www.youtube.com/feed/recommended',
5515 'info_dict': {
5516 'id': 'recommended',
5517 'title': 'recommended',
6c73052c 5518 'tags': [],
a6213a49 5519 },
5520 'playlist_mincount': 50,
5521 'params': {
5522 'skip_download': True,
5523 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5524 },
5525 }, {
5526 'note': 'API Fallback: /videos tab, sorted by oldest first',
5527 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5528 'info_dict': {
5529 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5530 'title': 'Cody\'sLab - Videos',
5531 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5532 'uploader': 'Cody\'sLab',
5533 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5534 'channel': 'Cody\'sLab',
5535 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5536 'tags': [],
5537 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5538 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5539 'channel_follower_count': int
a6213a49 5540 },
5541 'playlist_mincount': 650,
5542 'params': {
5543 'skip_download': True,
5544 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5545 },
5546 }, {
5547 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5548 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5549 'info_dict': {
5550 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5551 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5552 'title': 'Uploads from Royalty Free Music - Topic',
5553 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5554 'modified_date': r're:\d{8}',
5555 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5556 'description': '',
5557 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5558 'tags': [],
5559 'channel': 'Royalty Free Music - Topic',
5560 'view_count': int,
5561 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
c26f9b99 5562 'availability': 'public',
a6213a49 5563 },
5564 'expected_warnings': [
976ae3ea 5565 'does not have a videos tab',
5566 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5567 ],
5568 'playlist_mincount': 101,
5569 'params': {
5570 'skip_download': True,
5571 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5572 },
7c219ea6 5573 }, {
5574 'note': 'non-standard redirect to regional channel',
5575 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5576 'only_matching': True
61d3665d 5577 }, {
5578 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5579 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5580 'info_dict': {
5581 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5582 'modified_date': '20220407',
5583 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5584 'tags': [],
5585 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5586 'uploader': 'pukkandan',
5587 'availability': 'unlisted',
5588 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5589 'channel': 'pukkandan',
5590 'description': 'Test for collaborative playlist',
5591 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5592 'view_count': int,
61d3665d 5593 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5594 },
5595 'playlist_mincount': 2
c26f9b99 5596 }, {
5597 'note': 'translated tab name',
5598 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5599 'info_dict': {
5600 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5601 'tags': [],
5602 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5603 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5604 'description': '',
5605 'title': 'cole-dlp-test-acc - 再生リスト',
5606 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5607 'uploader': 'cole-dlp-test-acc',
5608 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5609 'channel': 'cole-dlp-test-acc',
5610 },
5611 'playlist_mincount': 1,
5612 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5613 'expected_warnings': ['Preferring "ja"'],
5614 }, {
5615 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5616 'note': 'preferred lang set with playlist with translated video titles',
5617 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5618 'info_dict': {
5619 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5620 'tags': [],
5621 'view_count': int,
5622 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5623 'uploader': 'cole-dlp-test-acc',
5624 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5625 'channel': 'cole-dlp-test-acc',
5626 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5627 'description': 'test',
5628 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5629 'title': 'dlp test playlist',
5630 'availability': 'public',
5631 },
5632 'playlist_mincount': 1,
5633 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5634 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 5635 }, {
5636 # shorts audio pivot for 2GtVksBMYFM.
5637 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5638 'info_dict': {
5639 'id': 'sfv_audio_pivot',
5640 'title': 'sfv_audio_pivot',
5641 'tags': [],
5642 },
5643 'playlist_mincount': 50,
5644
a6213a49 5645 }]
5646
5647 @classmethod
5648 def suitable(cls, url):
86e5f3ed 5649 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5650
64f36541 5651 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5652
182bda88 5653 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5654 def _real_extract(self, url, smuggled_data):
cd7c66cf 5655 item_id = self._match_id(url)
14f25df2 5656 url = urllib.parse.urlunparse(
5657 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5658 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5659
fe03a6cd 5660 def get_mobj(url):
37e57a9f 5661 mobj = self._URL_RE.match(url).groupdict()
07cce701 5662 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5663 return mobj
5664
37e57a9f 5665 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5666 # Youtube returns incomplete data if tabname is not lower case
5667 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5668 if is_channel:
5669 if smuggled_data.get('is_music_url'):
37e57a9f 5670 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5671 item_id = item_id[2:]
37e57a9f 5672 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5673 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5674 mdata = self._extract_tab_endpoint(
37e57a9f 5675 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5676 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5677 get_all=False, expected_type=str)
ac56cf38 5678 if not murl:
37e57a9f 5679 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5680 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5681 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5682 pre = f'https://www.youtube.com/channel/{item_id}'
5683
64f36541 5684 original_tab_name = tab
fe03a6cd 5685 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5686 # Home URLs should redirect to /videos/
37e57a9f 5687 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5688 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5689 tab = '/videos'
5690
5691 url = ''.join((pre, tab, post))
5692 mobj = get_mobj(url)
cd7c66cf 5693
5694 # Handle both video/playlist URLs
201c1459 5695 qs = parse_qs(url)
86e5f3ed 5696 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5697
fe03a6cd 5698 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5699 if not playlist_id:
fe03a6cd 5700 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5701 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5702 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5703 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5704 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5705 mobj = get_mobj(url)
cd7c66cf 5706
5707 if video_id and playlist_id:
a06916d9 5708 if self.get_param('noplaylist'):
37e57a9f 5709 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5710 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5711 ie=YoutubeIE.ie_key(), video_id=video_id)
5712 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5713
ac56cf38 5714 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5715
7c219ea6 5716 # YouTube may provide a non-standard redirect to the regional channel
5717 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5718 redirect_url = traverse_obj(
5719 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5720 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5721 redirect_url = ''.join((
5722 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5723 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5724 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5725
37e57a9f 5726 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5727 if tabs:
5728 selected_tab = self._extract_selected_tab(tabs)
c26f9b99 5729 selected_tab_url = urljoin(
5730 url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5731 translated_tab_name = selected_tab.get('title', '').lower()
5732
5733 # Prefer tab name from tab url as it is always in en,
5734 # but only when preferred lang is set as it may not extract reliably in all cases.
5735 selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
5736 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
5737 or translated_tab_name)
5738
64f36541 5739 if selected_tab_name == 'home':
5740 selected_tab_name = 'featured'
5741 requested_tab_name = mobj['tab'][1:]
c26f9b99 5742
09f1580e 5743 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5744 if requested_tab_name == 'live': # Live tab should have redirected to the video
5745 raise UserNotLive(video_id=mobj['id'])
64f36541 5746 if requested_tab_name not in ('', selected_tab_name):
5747 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5748 if not original_tab_name:
5749 if item_id[:2] == 'UC':
5750 # Topic channels don't have /videos. Use the equivalent playlist instead
5751 pl_id = f'UU{item_id[2:]}'
5752 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5753 try:
5754 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5755 except ExtractorError:
5756 redirect_warning += ' and the playlist redirect gave error'
5757 else:
5758 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5759 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5760 if selected_tab_name and selected_tab_name != requested_tab_name:
5761 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5762 else:
5763 raise ExtractorError(redirect_warning, expected=True)
18db7548 5764
37e57a9f 5765 if redirect_warning:
64f36541 5766 self.to_screen(redirect_warning)
37e57a9f 5767 self.write_debug(f'Final URL: {url}')
18db7548 5768
358de58c 5769 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5770 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5771 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5772 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5773 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5774 if tabs:
ac56cf38 5775 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5776
37e57a9f 5777 playlist = traverse_obj(
5778 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5779 if playlist:
ac56cf38 5780 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5781
37e57a9f 5782 video_id = traverse_obj(
5783 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5784 if video_id:
09f1580e 5785 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5786 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5787 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5788 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5789
8bdd16b4 5790 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5791
c5e8d7af 5792
8bdd16b4 5793class YoutubePlaylistIE(InfoExtractor):
96565c7e 5794 IE_DESC = 'YouTube playlists'
8bdd16b4 5795 _VALID_URL = r'''(?x)(?:
5796 (?:https?://)?
5797 (?:\w+\.)?
5798 (?:
5799 (?:
5800 youtube(?:kids)?\.com|
d9190e44 5801 %(invidious)s
8bdd16b4 5802 )
5803 /.*?\?.*?\blist=
5804 )?
5805 (?P<id>%(playlist_id)s)
d9190e44
RH
5806 )''' % {
5807 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5808 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5809 }
8bdd16b4 5810 IE_NAME = 'youtube:playlist'
cdc628a4 5811 _TESTS = [{
8bdd16b4 5812 'note': 'issue #673',
5813 'url': 'PLBB231211A4F62143',
cdc628a4 5814 'info_dict': {
8bdd16b4 5815 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5816 'id': 'PLBB231211A4F62143',
976ae3ea 5817 'uploader': 'Wickman',
8bdd16b4 5818 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5819 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5820 'view_count': int,
5821 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5822 'modified_date': r're:\d{8}',
5823 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5824 'channel': 'Wickman',
5825 'tags': [],
5826 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5827 },
5828 'playlist_mincount': 29,
5829 }, {
5830 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5831 'info_dict': {
5832 'title': 'YDL_safe_search',
5833 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5834 },
5835 'playlist_count': 2,
5836 'skip': 'This playlist is private',
9558dcec 5837 }, {
8bdd16b4 5838 'note': 'embedded',
5839 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5840 'playlist_count': 4,
9558dcec 5841 'info_dict': {
8bdd16b4 5842 'title': 'JODA15',
5843 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5844 'uploader': 'milan',
5845 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5846 'description': '',
5847 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5848 'tags': [],
5849 'modified_date': '20140919',
5850 'view_count': int,
5851 'channel': 'milan',
5852 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5853 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
c26f9b99 5854 'availability': 'public',
976ae3ea 5855 },
5856 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5857 }, {
8bdd16b4 5858 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5859 'playlist_mincount': 455,
8bdd16b4 5860 'info_dict': {
5861 'title': '2018 Chinese New Singles (11/6 updated)',
5862 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5863 'uploader': 'LBK',
5864 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5865 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5866 'channel': 'LBK',
5867 'view_count': int,
5868 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5869 'tags': [],
5870 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5871 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5872 'modified_date': r're:\d{8}',
c26f9b99 5873 'availability': 'public',
976ae3ea 5874 },
5875 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5876 }, {
29f7c58a 5877 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5878 'only_matching': True,
5879 }, {
5880 # music album playlist
5881 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5882 'only_matching': True,
5883 }]
5884
5885 @classmethod
5886 def suitable(cls, url):
201c1459 5887 if YoutubeTabIE.suitable(url):
5888 return False
49a57e70 5889 from ..utils import parse_qs
201c1459 5890 qs = parse_qs(url)
5891 if qs.get('v', [None])[0]:
5892 return False
86e5f3ed 5893 return super().suitable(url)
29f7c58a 5894
5895 def _real_extract(self, url):
5896 playlist_id = self._match_id(url)
46953e7e 5897 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5898 url = update_url_query(
5899 'https://www.youtube.com/playlist',
5900 parse_qs(url) or {'list': playlist_id})
5901 if is_music_url:
5902 url = smuggle_url(url, {'is_music_url': True})
5903 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5904
5905
5906class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5907 IE_DESC = 'youtu.be'
29f7c58a 5908 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5909 _TESTS = [{
8bdd16b4 5910 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5911 'info_dict': {
5912 'id': 'yeWKywCrFtk',
5913 'ext': 'mp4',
5914 'title': 'Small Scale Baler and Braiding Rugs',
5915 'uploader': 'Backus-Page House Museum',
5916 'uploader_id': 'backuspagemuseum',
5917 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5918 'upload_date': '20161008',
5919 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5920 'categories': ['Nonprofits & Activism'],
5921 'tags': list,
5922 'like_count': int,
976ae3ea 5923 'age_limit': 0,
5924 'playable_in_embed': True,
5925 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5926 'channel': 'Backus-Page House Museum',
5927 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5928 'live_status': 'not_live',
5929 'view_count': int,
5930 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5931 'availability': 'public',
5932 'duration': 59,
12a1b225
A
5933 'comment_count': int,
5934 'channel_follower_count': int
8bdd16b4 5935 },
5936 'params': {
5937 'noplaylist': True,
5938 'skip_download': True,
5939 },
39e7107d 5940 }, {
8bdd16b4 5941 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5942 'only_matching': True,
cdc628a4
PH
5943 }]
5944
8bdd16b4 5945 def _real_extract(self, url):
5ad28e7f 5946 mobj = self._match_valid_url(url)
29f7c58a 5947 video_id = mobj.group('id')
5948 playlist_id = mobj.group('playlist_id')
8bdd16b4 5949 return self.url_result(
29f7c58a 5950 update_url_query('https://www.youtube.com/watch', {
5951 'v': video_id,
5952 'list': playlist_id,
5953 'feature': 'youtu.be',
5954 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5955
5956
b6ce9bb0 5957class YoutubeLivestreamEmbedIE(InfoExtractor):
5958 IE_DESC = 'YouTube livestream embeds'
5959 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5960 _TESTS = [{
5961 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5962 'only_matching': True,
5963 }]
5964
5965 def _real_extract(self, url):
5966 channel_id = self._match_id(url)
5967 return self.url_result(
5968 f'https://www.youtube.com/channel/{channel_id}/live',
5969 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5970
5971
8bdd16b4 5972class YoutubeYtUserIE(InfoExtractor):
96565c7e 5973 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5974 IE_NAME = 'youtube:user'
8bdd16b4 5975 _VALID_URL = r'ytuser:(?P<id>.+)'
5976 _TESTS = [{
5977 'url': 'ytuser:phihag',
5978 'only_matching': True,
5979 }]
5980
5981 def _real_extract(self, url):
5982 user_id = self._match_id(url)
5983 return self.url_result(
c586f9e8 5984 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5985 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5986
b05654f0 5987
3d3dddc9 5988class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5989 IE_NAME = 'youtube:favorites'
96565c7e 5990 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5991 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5992 _LOGIN_REQUIRED = True
5993 _TESTS = [{
5994 'url': ':ytfav',
5995 'only_matching': True,
5996 }, {
5997 'url': ':ytfavorites',
5998 'only_matching': True,
5999 }]
6000
6001 def _real_extract(self, url):
6002 return self.url_result(
6003 'https://www.youtube.com/playlist?list=LL',
6004 ie=YoutubeTabIE.ie_key())
6005
6006
ca5300c7 6007class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6008 IE_NAME = 'youtube:notif'
6009 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6010 _VALID_URL = r':ytnotif(?:ication)?s?'
6011 _LOGIN_REQUIRED = True
6012 _TESTS = [{
6013 'url': ':ytnotif',
6014 'only_matching': True,
6015 }, {
6016 'url': ':ytnotifications',
6017 'only_matching': True,
6018 }]
6019
6020 def _extract_notification_menu(self, response, continuation_list):
6021 notification_list = traverse_obj(
6022 response,
6023 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6024 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6025 expected_type=list) or []
6026 continuation_list[0] = None
6027 for item in notification_list:
6028 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6029 if entry:
6030 yield entry
6031 continuation = item.get('continuationItemRenderer')
6032 if continuation:
6033 continuation_list[0] = continuation
6034
6035 def _extract_notification_renderer(self, notification):
6036 video_id = traverse_obj(
6037 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6038 url = f'https://www.youtube.com/watch?v={video_id}'
6039 channel_id = None
6040 if not video_id:
6041 browse_ep = traverse_obj(
6042 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6043 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6044 post_id = self._search_regex(
6045 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6046 'post id', default=None)
6047 if not channel_id or not post_id:
6048 return
6049 # The direct /post url redirects to this in the browser
6050 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6051
6052 channel = traverse_obj(
6053 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6054 expected_type=str)
c7a7baaa 6055 notification_title = self._get_text(notification, 'shortMessage')
6056 if notification_title:
6057 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6058 # TODO: handle recommended videos
ca5300c7 6059 title = self._search_regex(
c7a7baaa 6060 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6061 'video title', default=None)
c26f9b99 6062 upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
ca5300c7 6063 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
6064 else None)
6065 return {
6066 '_type': 'url',
6067 'url': url,
6068 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6069 'video_id': video_id,
6070 'title': title,
6071 'channel_id': channel_id,
6072 'channel': channel,
6073 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6074 'upload_date': upload_date,
6075 }
6076
6077 def _notification_menu_entries(self, ytcfg):
6078 continuation_list = [None]
6079 response = None
6080 for page in itertools.count(1):
6081 ctoken = traverse_obj(
6082 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6083 response = self._extract_response(
6084 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6085 ep='notification/get_notification_menu', check_get_keys='actions',
6086 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6087 yield from self._extract_notification_menu(response, continuation_list)
6088 if not continuation_list[0]:
6089 break
6090
6091 def _real_extract(self, url):
6092 display_id = 'notifications'
6093 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6094 self._report_playlist_authcheck(ytcfg)
6095 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6096
6097
a6213a49 6098class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6099 IE_DESC = 'YouTube search'
78caa52a 6100 IE_NAME = 'youtube:search'
b05654f0 6101 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6102 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6103 _TESTS = [{
6104 'url': 'ytsearch5:youtube-dl test video',
6105 'playlist_count': 5,
6106 'info_dict': {
6107 'id': 'youtube-dl test video',
6108 'title': 'youtube-dl test video',
6109 }
6110 }]
b05654f0 6111
a61fd4cf 6112
5f7cb91a 6113class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6114 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6115 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6116 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6117 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6118 _TESTS = [{
6119 'url': 'ytsearchdate5:youtube-dl test video',
6120 'playlist_count': 5,
6121 'info_dict': {
6122 'id': 'youtube-dl test video',
6123 'title': 'youtube-dl test video',
6124 }
6125 }]
75dff0ee 6126
c9ae7b95 6127
a6213a49 6128class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6129 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6130 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6131 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6132 _TESTS = [{
6133 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6134 'playlist_mincount': 5,
6135 'info_dict': {
11f9be09 6136 'id': 'youtube-dl test video',
3462ffa8 6137 'title': 'youtube-dl test video',
6138 }
a61fd4cf 6139 }, {
6140 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6141 'playlist_mincount': 5,
6142 'info_dict': {
6143 'id': 'python',
6144 'title': 'python',
6145 }
ad210f4f 6146 }, {
6147 'url': 'https://www.youtube.com/results?search_query=%23cats',
6148 'playlist_mincount': 1,
6149 'info_dict': {
6150 'id': '#cats',
6151 'title': '#cats',
12a1b225
A
6152 # The test suite does not have support for nested playlists
6153 # 'entries': [{
6154 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6155 # 'title': '#cats',
6156 # }],
ad210f4f 6157 },
3462ffa8 6158 }, {
6159 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6160 'only_matching': True,
6161 }]
6162
6163 def _real_extract(self, url):
4dfbf869 6164 qs = parse_qs(url)
386e1dd9 6165 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6166 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6167
6168
16aa9ea4 6169class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6170 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6171 IE_NAME = 'youtube:music:search_url'
6172 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6173 _TESTS = [{
6174 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6175 'playlist_count': 16,
6176 'info_dict': {
6177 'id': 'royalty free music',
6178 'title': 'royalty free music',
6179 }
6180 }, {
6181 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6182 'playlist_mincount': 30,
6183 'info_dict': {
6184 'id': 'royalty free music - songs',
6185 'title': 'royalty free music - songs',
6186 },
6187 'params': {'extract_flat': 'in_playlist'}
6188 }, {
6189 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6190 'playlist_mincount': 30,
6191 'info_dict': {
6192 'id': 'royalty free music - community playlists',
6193 'title': 'royalty free music - community playlists',
6194 },
6195 'params': {'extract_flat': 'in_playlist'}
6196 }]
6197
6198 _SECTIONS = {
6199 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6200 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6201 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6202 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6203 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6204 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6205 }
6206
6207 def _real_extract(self, url):
6208 qs = parse_qs(url)
6209 query = (qs.get('search_query') or qs.get('q'))[0]
6210 params = qs.get('sp', (None,))[0]
6211 if params:
6212 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6213 else:
ac668111 6214 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6215 params = self._SECTIONS.get(section)
6216 if not params:
6217 section = None
6218 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6219 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6220
6221
182bda88 6222class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6223 """
25f14e9f 6224 Base class for feed extractors
82d02080 6225 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6226 """
b2e8bc1b 6227 _LOGIN_REQUIRED = True
82d02080 6228 _FEED_NAME = 'feeds'
a25bca9f 6229
6230 def _real_initialize(self):
6231 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6232
82d02080 6233 @classproperty
d7ae0639 6234 def IE_NAME(self):
82d02080 6235 return f'youtube:{self._FEED_NAME}'
04cc9617 6236
3853309f 6237 def _real_extract(self, url):
3d3dddc9 6238 return self.url_result(
182bda88 6239 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6240
6241
ef2f3c7f 6242class YoutubeWatchLaterIE(InfoExtractor):
6243 IE_NAME = 'youtube:watchlater'
96565c7e 6244 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6245 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6246 _TESTS = [{
8bdd16b4 6247 'url': ':ytwatchlater',
bc7a9cd8
S
6248 'only_matching': True,
6249 }]
25f14e9f
S
6250
6251 def _real_extract(self, url):
ef2f3c7f 6252 return self.url_result(
6253 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6254
6255
25f14e9f 6256class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6257 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6258 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6259 _FEED_NAME = 'recommended'
45db527f 6260 _LOGIN_REQUIRED = False
3d3dddc9 6261 _TESTS = [{
6262 'url': ':ytrec',
6263 'only_matching': True,
6264 }, {
6265 'url': ':ytrecommended',
6266 'only_matching': True,
6267 }, {
6268 'url': 'https://youtube.com',
6269 'only_matching': True,
6270 }]
1ed5b5c9 6271
1ed5b5c9 6272
25f14e9f 6273class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6274 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6275 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6276 _FEED_NAME = 'subscriptions'
3d3dddc9 6277 _TESTS = [{
6278 'url': ':ytsubs',
6279 'only_matching': True,
6280 }, {
6281 'url': ':ytsubscriptions',
6282 'only_matching': True,
6283 }]
1ed5b5c9 6284
1ed5b5c9 6285
25f14e9f 6286class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6287 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6288 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6289 _FEED_NAME = 'history'
3d3dddc9 6290 _TESTS = [{
6291 'url': ':ythistory',
6292 'only_matching': True,
6293 }]
1ed5b5c9
JMF
6294
6295
6e634cbe 6296class YoutubeStoriesIE(InfoExtractor):
6297 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6298 IE_NAME = 'youtube:stories'
6299 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6300 _TESTS = [{
6301 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6302 'only_matching': True,
6303 }]
6304
6305 def _real_extract(self, url):
6306 playlist_id = f'RLTD{self._match_id(url)}'
6307 return self.url_result(
50ac0e54 6308 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6309 ie=YoutubeTabIE, video_id=playlist_id)
6310
6311
80eb0bd9 6312class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 6313 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 6314 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 6315 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 6316 _TESTS = [{
1dd18a88 6317 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 6318 'only_matching': True,
6319 }]
6320
6321 @staticmethod
6322 def _generate_audio_pivot_params(video_id):
6323 """
6324 Generates sfv_audio_pivot browse params for this video id
6325 """
6326 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6327 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6328
6329 def _real_extract(self, url):
6330 video_id = self._match_id(url)
6331 return self.url_result(
6332 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6333 ie=YoutubeTabIE)
6334
6335
15870e90
PH
6336class YoutubeTruncatedURLIE(InfoExtractor):
6337 IE_NAME = 'youtube:truncated_url'
6338 IE_DESC = False # Do not list
975d35db 6339 _VALID_URL = r'''(?x)
b95aab84
PH
6340 (?:https?://)?
6341 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6342 (?:watch\?(?:
c4808c60 6343 feature=[a-z_]+|
b95aab84
PH
6344 annotation_id=annotation_[^&]+|
6345 x-yt-cl=[0-9]+|
c1708b89 6346 hl=[^&]*|
287be8c6 6347 t=[0-9]+
b95aab84
PH
6348 )?
6349 |
6350 attribution_link\?a=[^&]+
6351 )
6352 $
975d35db 6353 '''
15870e90 6354
c4808c60 6355 _TESTS = [{
2d3d2997 6356 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6357 'only_matching': True,
dc2fc736 6358 }, {
2d3d2997 6359 'url': 'https://www.youtube.com/watch?',
dc2fc736 6360 'only_matching': True,
b95aab84
PH
6361 }, {
6362 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6363 'only_matching': True,
6364 }, {
6365 'url': 'https://www.youtube.com/watch?feature=foo',
6366 'only_matching': True,
c1708b89
PH
6367 }, {
6368 'url': 'https://www.youtube.com/watch?hl=en-GB',
6369 'only_matching': True,
287be8c6
PH
6370 }, {
6371 'url': 'https://www.youtube.com/watch?t=2372',
6372 'only_matching': True,
c4808c60
PH
6373 }]
6374
15870e90
PH
6375 def _real_extract(self, url):
6376 raise ExtractorError(
78caa52a
PH
6377 'Did you forget to quote the URL? Remember that & is a meta '
6378 'character in most shells, so you want to put the URL in quotes, '
3867038a 6379 'like youtube-dl '
2d3d2997 6380 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6381 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6382 expected=True)
772fd5cc
PH
6383
6384
471d0367 6385class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6386 IE_NAME = 'youtube:clip'
471d0367 6387 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6388 _TESTS = [{
6389 # FIXME: Other metadata should be extracted from the clip, not from the base video
6390 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6391 'info_dict': {
6392 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6393 'ext': 'mp4',
6394 'section_start': 29.0,
6395 'section_end': 39.7,
6396 'duration': 10.7,
12a1b225
A
6397 'age_limit': 0,
6398 'availability': 'public',
6399 'categories': ['Gaming'],
6400 'channel': 'Scott The Woz',
6401 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6402 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6403 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6404 'like_count': int,
6405 'playable_in_embed': True,
6406 'tags': 'count:17',
6407 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6408 'title': 'Mobile Games on Console - Scott The Woz',
6409 'upload_date': '20210920',
6410 'uploader': 'Scott The Woz',
6411 'uploader_id': 'scottthewoz',
6412 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6413 'view_count': int,
6414 'live_status': 'not_live',
6415 'channel_follower_count': int
471d0367 6416 }
6417 }]
3cd786db 6418
6419 def _real_extract(self, url):
471d0367 6420 clip_id = self._match_id(url)
6421 _, data = self._extract_webpage(url, clip_id)
6422
6423 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6424 if not video_id:
6425 raise ExtractorError('Unable to find video ID')
6426
6427 clip_data = traverse_obj(data, (
6428 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6429 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6430 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6431 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6432
6433 return {
6434 '_type': 'url_transparent',
6435 'url': f'https://www.youtube.com/watch?v={video_id}',
6436 'ie_key': YoutubeIE.ie_key(),
6437 'id': clip_id,
6438 'section_start': int(clip_data['startTimeMs']) / 1000,
6439 'section_end': int(clip_data['endTimeMs']) / 1000,
6440 }
3cd786db 6441
6442
772fd5cc
PH
6443class YoutubeTruncatedIDIE(InfoExtractor):
6444 IE_NAME = 'youtube:truncated_id'
6445 IE_DESC = False # Do not list
b95aab84 6446 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6447
6448 _TESTS = [{
6449 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6450 'only_matching': True,
6451 }]
6452
6453 def _real_extract(self, url):
6454 video_id = self._match_id(url)
6455 raise ExtractorError(
86e5f3ed 6456 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6457 expected=True)