]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/dplay] Add MotorTrendOnDemand extractor (#5151)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
c26f9b99 5import enum
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
720c3099 9import math
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
46383212 13import sys
f8271158 14import threading
8a784c74 15import time
e0df6211 16import traceback
14f25df2 17import urllib.error
ac668111 18import urllib.parse
c5e8d7af 19
b05654f0 20from .common import InfoExtractor, SearchInfoExtractor
25836db6 21from .openload import PhantomJSwrapper
14f25df2 22from ..compat import functools
545cc85d 23from ..jsinterp import JSInterpreter
4bb4a188 24from ..utils import (
f8271158 25 NO_DEFAULT,
26 ExtractorError,
4d37720a 27 LazyList,
693f0600 28 UserNotLive,
720c3099 29 bug_reports_message,
82d02080 30 classproperty,
c5e8d7af 31 clean_html,
d92f5d5a 32 datetime_from_str,
11f9be09 33 dict_get,
7a32c70d 34 filter_dict,
2d30521a 35 float_or_none,
11f9be09 36 format_field,
ff91cf74 37 get_first,
dd27fd17 38 int_or_none,
641ad5d8 39 is_html,
34921b43 40 join_nonempty,
48416bc4 41 js_to_json,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
4dfbf869 49 parse_qs,
dca3ff4a 50 qualities,
3995d37d 51 remove_start,
cf7e015f 52 smuggle_url,
dbdaaa23 53 str_or_none,
c93d53f5 54 str_to_int,
f3aa3c3f 55 strftime_or_none,
7c365c21 56 traverse_obj,
556dbe7f 57 try_get,
c5e8d7af
PH
58 unescapeHTML,
59 unified_strdate,
f0d785d3 60 unified_timestamp,
cf7e015f 61 unsmuggle_url,
8bdd16b4 62 update_url_query,
21c340b8 63 url_or_none,
fe93e2c4 64 urljoin,
7c365c21 65 variadic,
c5e8d7af
PH
66)
67
962ffcf8 68# any clients starting with _ cannot be explicitly requested by the user
000c15a4 69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
a0c830f4 75 'clientVersion': '2.20220801.00.00',
000c15a4 76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 85 'clientVersion': '1.20220731.00.00',
000c15a4 86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
a0c830f4 96 'clientVersion': '1.20220727.01.00',
000c15a4 97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
e7e94f2a 101 'web_creator': {
18c7683d 102 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
a0c830f4 106 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
000c15a4 111 'android': {
18c7683d 112 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
50ac0e54 116 'clientVersion': '17.31.35',
117 'androidSdkVersion': 30,
118 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 119 }
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 122 'REQUIRE_JS_PLAYER': False
000c15a4 123 },
124 'android_embedded': {
18c7683d 125 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 129 'clientVersion': '17.31.35',
130 'androidSdkVersion': 30,
131 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 132 },
133 },
b6de707d 134 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
135 'REQUIRE_JS_PLAYER': False
000c15a4 136 },
137 'android_music': {
18c7683d 138 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 139 'INNERTUBE_CONTEXT': {
140 'client': {
141 'clientName': 'ANDROID_MUSIC',
a0c830f4 142 'clientVersion': '5.16.51',
50ac0e54 143 'androidSdkVersion': 30,
144 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 145 }
146 },
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 148 'REQUIRE_JS_PLAYER': False
000c15a4 149 },
e7e94f2a 150 'android_creator': {
18c7683d 151 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
50ac0e54 155 'clientVersion': '22.30.100',
156 'androidSdkVersion': 30,
157 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
158 },
159 },
b6de707d 160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
e7e94f2a 162 },
18c7683d 163 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 165 'ios': {
18c7683d 166 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
224b5a35 170 'clientVersion': '17.33.2',
18c7683d 171 'deviceModel': 'iPhone14,3',
224b5a35 172 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 173 }
174 },
b6de707d 175 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
176 'REQUIRE_JS_PLAYER': False
000c15a4 177 },
178 'ios_embedded': {
000c15a4 179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 182 'clientVersion': '17.33.2',
18c7683d 183 'deviceModel': 'iPhone14,3',
224b5a35 184 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 185 },
186 },
b6de707d 187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
000c15a4 189 },
190 'ios_music': {
18c7683d 191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
224b5a35
SF
195 'clientVersion': '5.21',
196 'deviceModel': 'iPhone14,3',
197 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 198 },
199 },
b6de707d 200 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
201 'REQUIRE_JS_PLAYER': False
000c15a4 202 },
e7e94f2a
D
203 'ios_creator': {
204 'INNERTUBE_CONTEXT': {
205 'client': {
206 'clientName': 'IOS_CREATOR',
224b5a35
SF
207 'clientVersion': '22.33.101',
208 'deviceModel': 'iPhone14,3',
209 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
210 },
211 },
b6de707d 212 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
213 'REQUIRE_JS_PLAYER': False
e7e94f2a 214 },
3619f78d 215 # mweb has 'ultralow' formats
216 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 217 'mweb': {
18c7683d 218 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 219 'INNERTUBE_CONTEXT': {
220 'client': {
221 'clientName': 'MWEB',
a0c830f4 222 'clientVersion': '2.20220801.00.00',
000c15a4 223 }
224 },
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
226 },
227 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
228 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
229 'tv_embedded': {
230 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
231 'INNERTUBE_CONTEXT': {
232 'client': {
233 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
234 'clientVersion': '2.0',
235 },
236 },
237 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
238 },
000c15a4 239}
240
241
e7870111
D
242def _split_innertube_client(client_name):
243 variant, *base = client_name.rsplit('.', 1)
244 if base:
245 return variant, base[0], variant
246 base, *variant = client_name.split('_', 1)
247 return client_name, base, variant[0] if variant else None
248
249
000c15a4 250def build_innertube_clients():
2e4cacd0 251 THIRD_PARTY = {
e7870111 252 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 253 }
e7870111 254 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 255 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 256
257 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 258 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 259 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 260 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 261 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 262
e7870111 263 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 264 ytcfg['priority'] = 10 * priority(base_client)
265
e48b3875 266 if not variant:
e7870111
D
267 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
268 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
269 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 embedscreen['priority'] -= 3
271 elif variant == 'embedded':
e48b3875 272 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 273 ytcfg['priority'] -= 2
e48b3875 274 else:
000c15a4 275 ytcfg['priority'] -= 3
276
277
278build_innertube_clients()
279
280
c26f9b99 281class BadgeType(enum.Enum):
282 AVAILABILITY_UNLISTED = enum.auto()
283 AVAILABILITY_PRIVATE = enum.auto()
284 AVAILABILITY_PUBLIC = enum.auto()
285 AVAILABILITY_PREMIUM = enum.auto()
286 AVAILABILITY_SUBSCRIPTION = enum.auto()
287 LIVE_NOW = enum.auto()
288
289
de7f3446 290class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 291 """Provide base functions for Youtube extractors"""
e00eb564 292
3462ffa8 293 _RESERVED_NAMES = (
3cd786db 294 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 295 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 296 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 297 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 298
3619f78d 299 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
300
52efa4b3 301 # _NETRC_MACHINE = 'youtube'
3619f78d 302
b2e8bc1b
JMF
303 # If True it will raise an error if no login info is provided
304 _LOGIN_REQUIRED = False
305
d9190e44
RH
306 _INVIDIOUS_SITES = (
307 # invidious-redirect websites
308 r'(?:www\.)?redirect\.invidious\.io',
309 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 310 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
311 r'(?:www\.)?invidious\.pussthecat\.org',
312 r'(?:www\.)?invidious\.zee\.li',
313 r'(?:www\.)?invidious\.ethibox\.fr',
314 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
315 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
316 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
317 # youtube-dl invidious instances list
318 r'(?:(?:www|no)\.)?invidiou\.sh',
319 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
320 r'(?:www\.)?invidious\.kabi\.tk',
321 r'(?:www\.)?invidious\.mastodon\.host',
322 r'(?:www\.)?invidious\.zapashcanon\.fr',
323 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
324 r'(?:www\.)?invidious\.tinfoil-hat\.net',
325 r'(?:www\.)?invidious\.himiko\.cloud',
326 r'(?:www\.)?invidious\.reallyancient\.tech',
327 r'(?:www\.)?invidious\.tube',
328 r'(?:www\.)?invidiou\.site',
329 r'(?:www\.)?invidious\.site',
330 r'(?:www\.)?invidious\.xyz',
331 r'(?:www\.)?invidious\.nixnet\.xyz',
332 r'(?:www\.)?invidious\.048596\.xyz',
333 r'(?:www\.)?invidious\.drycat\.fr',
334 r'(?:www\.)?inv\.skyn3t\.in',
335 r'(?:www\.)?tube\.poal\.co',
336 r'(?:www\.)?tube\.connect\.cafe',
337 r'(?:www\.)?vid\.wxzm\.sx',
338 r'(?:www\.)?vid\.mint\.lgbt',
339 r'(?:www\.)?vid\.puffyan\.us',
340 r'(?:www\.)?yewtu\.be',
341 r'(?:www\.)?yt\.elukerio\.org',
342 r'(?:www\.)?yt\.lelux\.fi',
343 r'(?:www\.)?invidious\.ggc-project\.de',
344 r'(?:www\.)?yt\.maisputain\.ovh',
345 r'(?:www\.)?ytprivate\.com',
346 r'(?:www\.)?invidious\.13ad\.de',
347 r'(?:www\.)?invidious\.toot\.koeln',
348 r'(?:www\.)?invidious\.fdn\.fr',
349 r'(?:www\.)?watch\.nettohikari\.com',
350 r'(?:www\.)?invidious\.namazso\.eu',
351 r'(?:www\.)?invidious\.silkky\.cloud',
352 r'(?:www\.)?invidious\.exonip\.de',
353 r'(?:www\.)?invidious\.riverside\.rocks',
354 r'(?:www\.)?invidious\.blamefran\.net',
355 r'(?:www\.)?invidious\.moomoo\.de',
356 r'(?:www\.)?ytb\.trom\.tf',
357 r'(?:www\.)?yt\.cyberhost\.uk',
358 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
359 r'(?:www\.)?qklhadlycap4cnod\.onion',
360 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
361 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
362 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
363 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
364 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
365 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
366 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
367 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
368 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
369 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
370 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
371 r'(?:www\.)?piped\.kavin\.rocks',
372 r'(?:www\.)?piped\.silkky\.cloud',
373 r'(?:www\.)?piped\.tokhmi\.xyz',
374 r'(?:www\.)?piped\.moomoo\.me',
375 r'(?:www\.)?il\.ax',
376 r'(?:www\.)?piped\.syncpundit\.com',
377 r'(?:www\.)?piped\.mha\.fi',
378 r'(?:www\.)?piped\.mint\.lgbt',
379 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
380 )
381
c26f9b99 382 # extracted from account/account_menu ep
383 # XXX: These are the supported YouTube UI and API languages,
384 # which is slightly different from languages supported for translation in YouTube studio
385 _SUPPORTED_LANG_CODES = [
386 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
387 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
388 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
389 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
390 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
391 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
392 ]
393
a057779d 394 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
395
c26f9b99 396 @functools.cached_property
397 def _preferred_lang(self):
398 """
399 Returns a language code supported by YouTube for the user preferred language.
400 Returns None if no preferred language set.
401 """
402 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
403 if not preferred_lang:
404 return
405 if preferred_lang not in self._SUPPORTED_LANG_CODES:
406 raise ExtractorError(
407 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
408 expected=True)
409 elif preferred_lang != 'en':
410 self.report_warning(
411 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
412 return preferred_lang
413
cce889b9 414 def _initialize_consent(self):
415 cookies = self._get_cookies('https://www.youtube.com/')
416 if cookies.get('__Secure-3PSID'):
417 return
418 consent_id = None
419 consent = cookies.get('CONSENT')
420 if consent:
421 if 'YES' in consent.value:
422 return
423 consent_id = self._search_regex(
424 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
425 if not consent_id:
426 consent_id = random.randint(100, 999)
427 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 428
f3aa3c3f 429 def _initialize_pref(self):
430 cookies = self._get_cookies('https://www.youtube.com/')
431 pref_cookie = cookies.get('PREF')
432 pref = {}
433 if pref_cookie:
434 try:
14f25df2 435 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 436 except ValueError:
437 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 438 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 439 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 440
b2e8bc1b 441 def _real_initialize(self):
f3aa3c3f 442 self._initialize_pref()
cce889b9 443 self._initialize_consent()
a25bca9f 444 self._check_login_required()
445
446 def _check_login_required(self):
24146491 447 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 448 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 449
b7c47b74 450 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
451 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 452
000c15a4 453 def _get_default_ytcfg(self, client='web'):
454 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 455
000c15a4 456 def _get_innertube_host(self, client='web'):
457 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 458
000c15a4 459 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 460 # try_get but with fallback to default ytcfg client values when present
461 _func = lambda y: try_get(y, getter, expected_type)
462 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
463
000c15a4 464 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 465 return self._ytcfg_get_safe(
466 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 467 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 468
000c15a4 469 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 470 return self._ytcfg_get_safe(
471 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 472 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 473
2ae778b8 474 def _select_api_hostname(self, req_api_hostname, default_client=None):
475 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
476 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
477
000c15a4 478 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 479 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 480
000c15a4 481 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 482 context = get_first(
483 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 484 # Enforce language and tz for extraction
485 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 486 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 487 return context
488
cf87314d 489 _SAPISID = None
490
109dd3b2 491 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 492 time_now = round(time.time())
cf87314d 493 if self._SAPISID is None:
494 yt_cookies = self._get_cookies('https://www.youtube.com')
495 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
496 # See: https://github.com/yt-dlp/yt-dlp/issues/393
497 sapisid_cookie = dict_get(
498 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
499 if sapisid_cookie and sapisid_cookie.value:
500 self._SAPISID = sapisid_cookie.value
501 self.write_debug('Extracted SAPISID cookie')
502 # SAPISID cookie is required if not already present
503 if not yt_cookies.get('SAPISID'):
504 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
505 self._set_cookie(
506 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
507 else:
508 self._SAPISID = False
509 if not self._SAPISID:
510 return None
1974e99f 511 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
512 sapisidhash = hashlib.sha1(
86e5f3ed 513 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 514 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
515
516 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 517 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 518 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 519
109dd3b2 520 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 521 data.update(query)
11f9be09 522 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 523 real_headers.update({'content-type': 'application/json'})
524 if headers:
525 real_headers.update(headers)
2ae778b8 526 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
527 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 528 return self._download_json(
2ae778b8 529 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 530 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 531 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 532 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 533
65141660 534 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
535 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 536
99e9e001 537 @staticmethod
538 def _extract_session_index(*data):
539 """
540 Index of current account in account list.
541 See: https://github.com/yt-dlp/yt-dlp/pull/519
542 """
543 for ytcfg in data:
544 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
545 if session_index is not None:
546 return session_index
547
548 # Deprecated?
549 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 550 if ytcfg:
14f25df2 551 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
552 if token:
553 return token
99e9e001 554 if webpage:
555 return self._search_regex(
556 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
557 'identity token', default=None, fatal=False)
a1c5d2ca
M
558
559 @staticmethod
fe93e2c4 560 def _extract_account_syncid(*args):
8ea3f7b9 561 """
562 Extract syncId required to download private playlists of secondary channels
fe93e2c4 563 @params response and/or ytcfg
8ea3f7b9 564 """
fe93e2c4 565 for data in args:
566 # ytcfg includes channel_syncid if on secondary channel
14f25df2 567 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 568 if delegated_sid:
569 return delegated_sid
570 sync_ids = (try_get(
571 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 572 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 573 if len(sync_ids) >= 2 and sync_ids[1]:
574 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
575 # and just "user_syncid||" for primary channel. We only want the channel_syncid
576 return sync_ids[0]
a1c5d2ca 577
ac56cf38 578 @staticmethod
579 def _extract_visitor_data(*args):
580 """
581 Extracts visitorData from an API response or ytcfg
582 Appears to be used to track session state
583 """
9222c381 584 return get_first(
6c73052c 585 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 586 expected_type=str)
ac56cf38 587
2762dbb1 588 @functools.cached_property
99e9e001 589 def is_authenticated(self):
590 return bool(self._generate_sapisidhash_header())
591
11f9be09 592 def extract_ytcfg(self, video_id, webpage):
8c54a305 593 if not webpage:
594 return {}
29f7c58a 595 return self._parse_json(
596 self._search_regex(
597 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 598 default='{}'), video_id, fatal=False) or {}
599
11f9be09 600 def generate_api_headers(
99e9e001 601 self, *, ytcfg=None, account_syncid=None, session_index=None,
602 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
603
2ae778b8 604 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 605 headers = {
14f25df2 606 'X-YouTube-Client-Name': str(
11f9be09 607 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
608 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 609 'Origin': origin,
610 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
611 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 612 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
613 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 614 }
615 if session_index is None:
314ee305 616 session_index = self._extract_session_index(ytcfg)
617 if account_syncid or session_index is not None:
618 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 619
109dd3b2 620 auth = self._generate_sapisidhash_header(origin)
f4f751af 621 if auth is not None:
622 headers['Authorization'] = auth
109dd3b2 623 headers['X-Origin'] = origin
7a32c70d 624 return filter_dict(headers)
29f7c58a 625
a25bca9f 626 def _download_ytcfg(self, client, video_id):
627 url = {
628 'web': 'https://www.youtube.com',
629 'web_music': 'https://music.youtube.com',
630 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
631 }.get(client)
632 if not url:
633 return {}
634 webpage = self._download_webpage(
635 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
636 return self.extract_ytcfg(video_id, webpage) or {}
637
2d6659b9 638 @staticmethod
639 def _build_api_continuation_query(continuation, ctp=None):
640 query = {
641 'continuation': continuation
642 }
643 # TODO: Inconsistency with clickTrackingParams.
644 # Currently we have a fixed ctp contained within context (from ytcfg)
645 # and a ctp in root query for continuation.
646 if ctp:
647 query['clickTracking'] = {'clickTrackingParams': ctp}
648 return query
649
2d6659b9 650 @classmethod
651 def _extract_next_continuation_data(cls, renderer):
652 next_continuation = try_get(
653 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
654 lambda x: x['continuation']['reloadContinuationData']), dict)
655 if not next_continuation:
656 return
657 continuation = next_continuation.get('continuation')
658 if not continuation:
659 return
660 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 661 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 662
663 @classmethod
664 def _extract_continuation_ep_data(cls, continuation_ep: dict):
665 if isinstance(continuation_ep, dict):
666 continuation = try_get(
14f25df2 667 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 668 if not continuation:
669 return
670 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 671 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 672
673 @classmethod
674 def _extract_continuation(cls, renderer):
675 next_continuation = cls._extract_next_continuation_data(renderer)
676 if next_continuation:
677 return next_continuation
fe93e2c4 678
7a32c70d 679 return traverse_obj(renderer, (
680 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
681 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
682 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 683
fe93e2c4 684 @classmethod
685 def _extract_alerts(cls, data):
109dd3b2 686 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
687 if not isinstance(alert_dict, dict):
688 continue
689 for alert in alert_dict.values():
690 alert_type = alert.get('type')
691 if not alert_type:
692 continue
052e1350 693 message = cls._get_text(alert, 'text')
109dd3b2 694 if message:
695 yield alert_type, message
696
c0ac49bc 697 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 698 errors, warnings = [], []
109dd3b2 699 for alert_type, alert_message in alerts:
641ad5d8 700 if alert_type.lower() == 'error' and fatal:
109dd3b2 701 errors.append([alert_type, alert_message])
a057779d 702 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 703 warnings.append([alert_type, alert_message])
704
705 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 706 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 707 if errors:
708 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
709
710 def _extract_and_report_alerts(self, data, *args, **kwargs):
711 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
712
47193e02 713 def _extract_badges(self, renderer: dict):
c26f9b99 714 privacy_icon_map = {
715 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
716 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
717 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
718 }
719
720 badge_style_map = {
721 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
722 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
723 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
724 }
725
726 label_map = {
727 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
728 'private': BadgeType.AVAILABILITY_PRIVATE,
729 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
730 'live': BadgeType.LIVE_NOW,
731 'premium': BadgeType.AVAILABILITY_PREMIUM
732 }
733
734 badges = []
735 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
736 badge_type = (
737 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
738 or badge_style_map.get(traverse_obj(badge, 'style'))
739 )
740 if badge_type:
741 badges.append({'type': badge_type})
742 continue
743
744 # fallback, won't work in some languages
745 label = traverse_obj(badge, 'label', expected_type=str, default='')
746 for match, label_badge_type in label_map.items():
747 if match in label.lower():
748 badges.append({'type': badge_type})
749 continue
750
47193e02 751 return badges
752
c26f9b99 753 @staticmethod
754 def _has_badge(badges, badge_type):
755 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
756
47193e02 757 @staticmethod
052e1350 758 def _get_text(data, *path_list, max_runs=None):
759 for path in path_list or [None]:
760 if path is None:
761 obj = [data]
762 else:
763 obj = traverse_obj(data, path, default=[])
764 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
765 obj = [obj]
766 for item in obj:
14f25df2 767 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 768 if text:
769 return text
770 runs = try_get(item, lambda x: x['runs'], list) or []
771 if not runs and isinstance(item, list):
772 runs = item
773
774 runs = runs[:min(len(runs), max_runs or len(runs))]
775 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
776 if text:
777 return text
47193e02 778
f0d785d3 779 def _get_count(self, data, *path_list):
780 count_text = self._get_text(data, *path_list) or ''
781 count = parse_count(count_text)
782 if count is None:
783 count = str_to_int(
784 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
785 return count
786
a709d873 787 @staticmethod
788 def _extract_thumbnails(data, *path_list):
789 """
790 Extract thumbnails from thumbnails dict
791 @param path_list: path list to level that contains 'thumbnails' key
792 """
793 thumbnails = []
794 for path in path_list or [()]:
795 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
796 thumbnail_url = url_or_none(thumbnail.get('url'))
797 if not thumbnail_url:
798 continue
799 # Sometimes youtube gives a wrong thumbnail URL. See:
800 # https://github.com/yt-dlp/yt-dlp/issues/233
801 # https://github.com/ytdl-org/youtube-dl/issues/28023
802 if 'maxresdefault' in thumbnail_url:
803 thumbnail_url = thumbnail_url.split('?')[0]
804 thumbnails.append({
805 'url': thumbnail_url,
806 'height': int_or_none(thumbnail.get('height')),
807 'width': int_or_none(thumbnail.get('width')),
808 })
809 return thumbnails
810
f3aa3c3f 811 @staticmethod
812 def extract_relative_time(relative_time_text):
813 """
814 Extracts a relative time from string and converts to dt object
f0d785d3 815 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 816 """
f0d785d3 817 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 818 if mobj:
f0d785d3 819 start = mobj.group('start')
820 if start:
821 return datetime_from_str(start)
f3aa3c3f 822 try:
f0d785d3 823 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 824 except ValueError:
825 return None
826
c26f9b99 827 def _parse_time_text(self, text):
828 if not text:
829 return
f3aa3c3f 830 dt = self.extract_relative_time(text)
831 timestamp = None
832 if isinstance(dt, datetime.datetime):
833 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 834
835 if timestamp is None:
836 timestamp = (
837 unified_timestamp(text) or unified_timestamp(
838 self._search_regex(
17322130 839 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 840 text.lower(), 'time text', default=None)))
f0d785d3 841
c26f9b99 842 if text and timestamp is None and self._preferred_lang in (None, 'en'):
843 self.report_warning(
844 f'Cannot parse localized time text "{text}"', only_once=True)
845 return timestamp
f3aa3c3f 846
109dd3b2 847 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
848 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 849 default_client='web'):
be5c1ae8 850 for retry in self.RetryManager():
109dd3b2 851 try:
852 response = self._call_api(
853 ep=ep, fatal=True, headers=headers,
be5c1ae8 854 video_id=item_id, query=query, note=note,
109dd3b2 855 context=self._extract_context(ytcfg, default_client),
856 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 857 api_hostname=api_hostname, default_client=default_client)
109dd3b2 858 except ExtractorError as e:
be5c1ae8 859 if not isinstance(e.cause, network_exceptions):
860 return self._error_or_warning(e, fatal=fatal)
861 elif not isinstance(e.cause, urllib.error.HTTPError):
862 retry.error = e
863 continue
109dd3b2 864
be5c1ae8 865 first_bytes = e.cause.read(512)
866 if not is_html(first_bytes):
867 yt_error = try_get(
868 self._parse_json(
869 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
870 lambda x: x['error']['message'], str)
871 if yt_error:
872 self._report_alerts([('ERROR', yt_error)], fatal=False)
873 # Downloading page may result in intermittent 5xx HTTP error
874 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
875 # We also want to catch all other network exceptions since errors in later pages can be troublesome
876 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
877 if e.cause.code not in (403, 429):
878 retry.error = e
879 continue
880 return self._error_or_warning(e, fatal=fatal)
881
882 try:
883 self._extract_and_report_alerts(response, only_once=True)
884 except ExtractorError as e:
885 # YouTube servers may return errors we want to retry on in a 200 OK response
886 # See: https://github.com/yt-dlp/yt-dlp/issues/839
887 if 'unknown error' in e.msg.lower():
888 retry.error = e
889 continue
890 return self._error_or_warning(e, fatal=fatal)
891 # Youtube sometimes sends incomplete data
892 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
893 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 894 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 895 continue
896
897 return response
109dd3b2 898
9297939e 899 @staticmethod
900 def is_music_url(url):
901 return re.match(r'https?://music\.youtube\.com/', url) is not None
902
30a074c2 903 def _extract_video(self, renderer):
904 video_id = renderer.get('videoId')
052e1350 905 title = self._get_text(renderer, 'title')
906 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 907 duration = parse_duration(self._get_text(
908 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 909 if duration is None:
910 duration = parse_duration(self._search_regex(
911 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
912 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
913 video_id, default=None, group='duration'))
914
f0d785d3 915 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 916
052e1350 917 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 918 channel_id = traverse_obj(
a44ca5a4 919 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
920 expected_type=str, get_all=False)
c26f9b99 921 time_text = self._get_text(renderer, 'publishedTimeText') or ''
f3aa3c3f 922 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
923 overlay_style = traverse_obj(
a44ca5a4 924 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
925 get_all=False, expected_type=str)
f3aa3c3f 926 badges = self._extract_badges(renderer)
a709d873 927 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 928 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 929 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
930 expected_type=str)) or ''
fd2ad7cb 931 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 932 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 933 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 934
30a074c2 935 return {
39ed931e 936 '_type': 'url',
30a074c2 937 'ie_key': YoutubeIE.ie_key(),
938 'id': video_id,
fd2ad7cb 939 'url': url,
30a074c2 940 'title': title,
941 'description': description,
942 'duration': duration,
943 'view_count': view_count,
944 'uploader': uploader,
f3aa3c3f 945 'channel_id': channel_id,
a709d873 946 'thumbnails': thumbnails,
c26f9b99 947 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
a44ca5a4 948 if self._configuration_arg('approximate_date', ie_key='youtubetab')
949 else None),
f3aa3c3f 950 'live_status': ('is_upcoming' if scheduled_timestamp is not None
951 else 'was_live' if 'streamed' in time_text.lower()
c26f9b99 952 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
f3aa3c3f 953 else None),
954 'release_timestamp': scheduled_timestamp,
c26f9b99 955 'availability':
956 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
957 else self._availability(
958 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
959 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
960 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
961 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
30a074c2 962 }
963
0c148415 964
360e1ca5 965class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 966 IE_DESC = 'YouTube'
cb7dfeea 967 _VALID_URL = r"""(?x)^
c5e8d7af 968 (
edb53e2d 969 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 970 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
971 (?:www\.)?deturl\.com/www\.youtube\.com|
972 (?:www\.)?pwnyoutube\.com|
973 (?:www\.)?hooktube\.com|
974 (?:www\.)?yourepeat\.com|
975 tube\.majestyc\.net|
976 %(invidious)s|
977 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
978 (?:.*?\#/)? # handle anchor (#/) redirect urls
979 (?: # the various things that can precede the ID:
b6ce9bb0 980 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 981 |(?: # or the v= param in all its forms
f7000f3a 982 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 983 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 984 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
985 v=
986 )
f4b05232 987 ))
cbaed4bb
S
988 |(?:
989 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
990 vid\.plus| # or vid.plus/xxxx
991 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 992 %(invidious)s
cbaed4bb 993 )/
edb53e2d 994 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 995 )
c5e8d7af 996 )? # all until now is optional -> you can pass the naked ID
201c1459 997 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 998 (?(1).+)? # if we found the ID, everything can follow
9297939e 999 (?:\#|$)""" % {
d9190e44 1000 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1001 }
7c6eb424 1002 _EMBED_REGEX = [
1003 r'''(?x)
1004 (?:
0ca0f881 1005 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1006 data-video-url=|
1007 <embed[^>]+?src=|
1008 embedSWF\(?:\s*|
1009 <object[^>]+data=|
1010 new\s+SWFObject\(
1011 )
1012 (["\'])
1013 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1014 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1015 \1''',
1016 # https://wordpress.org/plugins/lazy-load-for-videos/
1017 r'''(?xs)
1018 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1019 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1020 ]
1021
e40c758c 1022 _PLAYER_INFO_RE = (
cc2db878 1023 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1024 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1025 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1026 )
2c62dc26 1027 _formats = {
c2d3cb4c 1028 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1029 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1030 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1031 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1032 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1033 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1034 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1035 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1036 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1037 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1038 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1039 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1040 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1041 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1042 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1043 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1044 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1045 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1046
1047
1048 # 3D videos
c2d3cb4c 1049 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1050 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1051 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1052 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1053 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1054 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1055 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1056
96fb5605 1057 # Apple HTTP Live Streaming
11f12195 1058 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1059 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1060 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1061 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1062 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1063 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1064 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1065 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1066
1067 # DASH mp4 video
d23028a8
S
1068 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1069 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1070 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1071 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1072 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1073 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1074 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1075 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1076 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1077 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1078 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1079 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1080
f6f1fc92 1081 # Dash mp4 audio
d23028a8
S
1082 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1083 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1084 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1085 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1086 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1087 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1088 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1089
1090 # Dash webm
d23028a8
S
1091 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1092 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1093 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1094 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1095 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1096 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1097 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1098 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1100 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1101 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1102 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1103 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1104 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1105 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1106 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1107 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1108 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1109 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1110 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1111 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1112 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1113
1114 # Dash webm audio
d23028a8
S
1115 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1116 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1117
0857baad 1118 # Dash webm audio with opus inside
d23028a8
S
1119 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1120 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1121 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1122
ce6b9a2d
PH
1123 # RTMP (unnamed)
1124 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1125
1126 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1127 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1128 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1129 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1130 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1131 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1132 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1133 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1134 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1135 }
29f7c58a 1136 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1137
fd5c4aab
S
1138 _GEO_BYPASS = False
1139
78caa52a 1140 IE_NAME = 'youtube'
2eb88d95
PH
1141 _TESTS = [
1142 {
2d3d2997 1143 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1144 'info_dict': {
1145 'id': 'BaW_jenozKc',
1146 'ext': 'mp4',
3867038a 1147 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1148 'uploader': 'Philipp Hagemeister',
1149 'uploader_id': 'phihag',
ec85ded8 1150 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1151 'channel': 'Philipp Hagemeister',
dd4c4492
S
1152 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1153 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1154 'upload_date': '20121002',
ff9f925b 1155 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1156 'categories': ['Science & Technology'],
3867038a 1157 'tags': ['youtube-dl'],
556dbe7f 1158 'duration': 10,
dbdaaa23 1159 'view_count': int,
3e7c1224 1160 'like_count': int,
ff9f925b 1161 'availability': 'public',
1162 'playable_in_embed': True,
1163 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1164 'live_status': 'not_live',
1165 'age_limit': 0,
7c80519c 1166 'start_time': 1,
297a564b 1167 'end_time': 9,
12a1b225 1168 'comment_count': int,
6c73052c 1169 'channel_follower_count': int
2eb88d95 1170 }
0e853ca4 1171 },
fccd3771 1172 {
4bc3a23e
PH
1173 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1174 'note': 'Embed-only video (#1746)',
1175 'info_dict': {
1176 'id': 'yZIXLfi8CZQ',
1177 'ext': 'mp4',
1178 'upload_date': '20120608',
1179 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1180 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1181 'uploader': 'SET India',
94bfcd23 1182 'uploader_id': 'setindia',
ec85ded8 1183 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1184 'age_limit': 18,
545cc85d 1185 },
1186 'skip': 'Private video',
fccd3771 1187 },
11b56058 1188 {
8bdd16b4 1189 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1190 'note': 'Use the first video ID in the URL',
1191 'info_dict': {
1192 'id': 'BaW_jenozKc',
1193 'ext': 'mp4',
3867038a 1194 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1195 'uploader': 'Philipp Hagemeister',
1196 'uploader_id': 'phihag',
ec85ded8 1197 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1198 'channel': 'Philipp Hagemeister',
1199 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1200 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1201 'upload_date': '20121002',
976ae3ea 1202 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1203 'categories': ['Science & Technology'],
3867038a 1204 'tags': ['youtube-dl'],
556dbe7f 1205 'duration': 10,
dbdaaa23 1206 'view_count': int,
11b56058 1207 'like_count': int,
976ae3ea 1208 'availability': 'public',
1209 'playable_in_embed': True,
1210 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1211 'live_status': 'not_live',
1212 'age_limit': 0,
12a1b225 1213 'comment_count': int,
6c73052c 1214 'channel_follower_count': int
34a7de29
S
1215 },
1216 'params': {
1217 'skip_download': True,
1218 },
11b56058 1219 },
dd27fd17 1220 {
2d3d2997 1221 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1222 'note': '256k DASH audio (format 141) via DASH manifest',
1223 'info_dict': {
1224 'id': 'a9LDPn-MO4I',
1225 'ext': 'm4a',
1226 'upload_date': '20121002',
1227 'uploader_id': '8KVIDEO',
ec85ded8 1228 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1229 'description': '',
1230 'uploader': '8KVIDEO',
1231 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1232 },
4bc3a23e
PH
1233 'params': {
1234 'youtube_include_dash_manifest': True,
1235 'format': '141',
4919603f 1236 },
de3c7fe0 1237 'skip': 'format 141 not served anymore',
dd27fd17 1238 },
8bdd16b4 1239 # DASH manifest with encrypted signature
1240 {
1241 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1242 'info_dict': {
1243 'id': 'IB3lcPjvWLA',
1244 'ext': 'm4a',
1245 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1246 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1247 'duration': 244,
1248 'uploader': 'AfrojackVEVO',
1249 'uploader_id': 'AfrojackVEVO',
1250 'upload_date': '20131011',
cc2db878 1251 'abr': 129.495,
976ae3ea 1252 'like_count': int,
1253 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1254 'playable_in_embed': True,
1255 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1256 'view_count': int,
1257 'track': 'The Spark',
1258 'live_status': 'not_live',
1259 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1260 'channel': 'Afrojack',
1261 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1262 'tags': 'count:19',
1263 'availability': 'public',
1264 'categories': ['Music'],
1265 'age_limit': 0,
1266 'alt_title': 'The Spark',
6c73052c 1267 'channel_follower_count': int
8bdd16b4 1268 },
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141/bestaudio[ext=m4a]',
1272 },
1273 },
65c2fde2 1274 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1275 {
65c2fde2 1276 'note': 'Embed allowed age-gate video',
2d3d2997 1277 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1278 'info_dict': {
1279 'id': 'HtVdAasjOgU',
1280 'ext': 'mp4',
1281 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1282 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1283 'duration': 142,
c522adb1
JMF
1284 'uploader': 'The Witcher',
1285 'uploader_id': 'WitcherGame',
ec85ded8 1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1287 'upload_date': '20140605',
34952f09 1288 'age_limit': 18,
976ae3ea 1289 'categories': ['Gaming'],
1290 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1291 'availability': 'needs_auth',
1292 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1293 'like_count': int,
1294 'channel': 'The Witcher',
1295 'live_status': 'not_live',
1296 'tags': 'count:17',
1297 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1298 'playable_in_embed': True,
1299 'view_count': int,
6c73052c 1300 'channel_follower_count': int
c522adb1
JMF
1301 },
1302 },
65c2fde2 1303 {
1304 'note': 'Age-gate video with embed allowed in public site',
1305 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1306 'info_dict': {
1307 'id': 'HsUATh_Nc2U',
1308 'ext': 'mp4',
1309 'title': 'Godzilla 2 (Official Video)',
1310 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1311 'upload_date': '20200408',
1312 'uploader_id': 'FlyingKitty900',
1313 'uploader': 'FlyingKitty',
1314 'age_limit': 18,
976ae3ea 1315 'availability': 'needs_auth',
1316 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1317 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1318 'channel': 'FlyingKitty',
1319 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1320 'view_count': int,
1321 'categories': ['Entertainment'],
1322 'live_status': 'not_live',
1323 'tags': ['Flyingkitty', 'godzilla 2'],
1324 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1325 'like_count': int,
1326 'duration': 177,
1327 'playable_in_embed': True,
6c73052c 1328 'channel_follower_count': int
65c2fde2 1329 },
1330 },
1331 {
1332 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1333 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1334 'info_dict': {
1335 'id': 'Tq92D6wQ1mg',
1336 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1337 'ext': 'mp4',
17322130 1338 'upload_date': '20191228',
65c2fde2 1339 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1340 'uploader': 'Projekt Melody',
1341 'description': 'md5:17eccca93a786d51bc67646756894066',
1342 'age_limit': 18,
976ae3ea 1343 'like_count': int,
1344 'availability': 'needs_auth',
1345 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1346 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1347 'view_count': int,
1348 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1349 'channel': 'Projekt Melody',
1350 'live_status': 'not_live',
1351 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1352 'playable_in_embed': True,
1353 'categories': ['Entertainment'],
1354 'duration': 106,
1355 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1356 'comment_count': int,
6c73052c 1357 'channel_follower_count': int
65c2fde2 1358 },
1359 },
1360 {
1361 'note': 'Non-Agegated non-embeddable video',
1362 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1363 'info_dict': {
1364 'id': 'MeJVWBSsPAY',
1365 'ext': 'mp4',
1366 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1367 'uploader': 'Herr Lurik',
1368 'uploader_id': 'st3in234',
1369 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1370 'upload_date': '20130730',
976ae3ea 1371 'track': 'Such mich find mich',
1372 'age_limit': 0,
1373 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1374 'like_count': int,
1375 'playable_in_embed': False,
1376 'creator': 'OOMPH!',
1377 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1378 'view_count': int,
1379 'alt_title': 'Such mich find mich',
1380 'duration': 210,
1381 'channel': 'Herr Lurik',
1382 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1383 'categories': ['Music'],
1384 'availability': 'public',
1385 'uploader_url': 'http://www.youtube.com/user/st3in234',
1386 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1387 'live_status': 'not_live',
1388 'artist': 'OOMPH!',
6c73052c 1389 'channel_follower_count': int
65c2fde2 1390 },
1391 },
1392 {
1393 'note': 'Non-bypassable age-gated video',
1394 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1395 'only_matching': True,
1396 },
8bdd16b4 1397 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1398 # YouTube Red ad is not captured for creator
1399 {
1400 'url': '__2ABJjxzNo',
1401 'info_dict': {
1402 'id': '__2ABJjxzNo',
1403 'ext': 'mp4',
1404 'duration': 266,
1405 'upload_date': '20100430',
1406 'uploader_id': 'deadmau5',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1408 'creator': 'deadmau5',
1409 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1410 'uploader': 'deadmau5',
1411 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1412 'alt_title': 'Some Chords',
976ae3ea 1413 'availability': 'public',
1414 'tags': 'count:14',
1415 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1416 'view_count': int,
1417 'live_status': 'not_live',
1418 'channel': 'deadmau5',
1419 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1420 'like_count': int,
1421 'track': 'Some Chords',
1422 'artist': 'deadmau5',
1423 'playable_in_embed': True,
1424 'age_limit': 0,
1425 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1426 'categories': ['Music'],
1427 'album': 'Some Chords',
6c73052c 1428 'channel_follower_count': int
8bdd16b4 1429 },
1430 'expected_warnings': [
1431 'DASH manifest missing',
1432 ]
1433 },
067aa17e 1434 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1435 {
1436 'url': 'lqQg6PlCWgI',
1437 'info_dict': {
1438 'id': 'lqQg6PlCWgI',
1439 'ext': 'mp4',
556dbe7f 1440 'duration': 6085,
90227264 1441 'upload_date': '20150827',
cbe2bd91 1442 'uploader_id': 'olympic',
ec85ded8 1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1444 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1445 'uploader': 'Olympics',
cbe2bd91 1446 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1447 'like_count': int,
1448 'release_timestamp': 1343767800,
1449 'playable_in_embed': True,
1450 'categories': ['Sports'],
1451 'release_date': '20120731',
1452 'channel': 'Olympics',
1453 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1454 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1455 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1456 'age_limit': 0,
1457 'availability': 'public',
1458 'live_status': 'was_live',
1459 'view_count': int,
1460 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1461 'channel_follower_count': int
cbe2bd91
PH
1462 },
1463 'params': {
1464 'skip_download': 'requires avconv',
e52a40ab 1465 }
cbe2bd91 1466 },
6271f1ca
PH
1467 # Non-square pixels
1468 {
1469 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1470 'info_dict': {
1471 'id': '_b-2C3KPAM0',
1472 'ext': 'mp4',
1473 'stretched_ratio': 16 / 9.,
556dbe7f 1474 'duration': 85,
6271f1ca
PH
1475 'upload_date': '20110310',
1476 'uploader_id': 'AllenMeow',
ec85ded8 1477 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1478 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1479 'uploader': '孫ᄋᄅ',
6271f1ca 1480 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1481 'playable_in_embed': True,
1482 'channel': '孫ᄋᄅ',
1483 'age_limit': 0,
1484 'tags': 'count:11',
1485 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1486 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1487 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1488 'view_count': int,
1489 'categories': ['People & Blogs'],
1490 'like_count': int,
1491 'live_status': 'not_live',
1492 'availability': 'unlisted',
12a1b225 1493 'comment_count': int,
6c73052c 1494 'channel_follower_count': int
6271f1ca 1495 },
06b491eb
S
1496 },
1497 # url_encoded_fmt_stream_map is empty string
1498 {
1499 'url': 'qEJwOuvDf7I',
1500 'info_dict': {
1501 'id': 'qEJwOuvDf7I',
f57b7835 1502 'ext': 'webm',
06b491eb
S
1503 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1504 'description': '',
1505 'upload_date': '20150404',
1506 'uploader_id': 'spbelect',
1507 'uploader': 'Наблюдатели Петербурга',
1508 },
1509 'params': {
1510 'skip_download': 'requires avconv',
e323cf3f
S
1511 },
1512 'skip': 'This live event has ended.',
06b491eb 1513 },
067aa17e 1514 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1515 {
1516 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1517 'info_dict': {
1518 'id': 'FIl7x6_3R5Y',
eb6793ba 1519 'ext': 'webm',
da77d856
S
1520 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1521 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1522 'duration': 220,
da77d856
S
1523 'upload_date': '20150625',
1524 'uploader_id': 'dorappi2000',
ec85ded8 1525 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1526 'uploader': 'dorappi2000',
eb6793ba 1527 'formats': 'mincount:31',
da77d856 1528 },
eb6793ba 1529 'skip': 'not actual anymore',
2ee8f5d8 1530 },
8a1a26ce
YCH
1531 # DASH manifest with segment_list
1532 {
1533 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1534 'md5': '8ce563a1d667b599d21064e982ab9e31',
1535 'info_dict': {
1536 'id': 'CsmdDsKjzN8',
1537 'ext': 'mp4',
17ee98e1 1538 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1539 'uploader': 'Airtek',
1540 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1541 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1542 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1543 },
1544 'params': {
1545 'youtube_include_dash_manifest': True,
1546 'format': '135', # bestvideo
be49068d
S
1547 },
1548 'skip': 'This live event has ended.',
2ee8f5d8 1549 },
cf7e015f
S
1550 {
1551 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1552 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1553 'info_dict': {
545cc85d 1554 'id': 'jvGDaLqkpTg',
1555 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1556 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1557 },
1558 'playlist': [{
1559 'info_dict': {
545cc85d 1560 'id': 'jvGDaLqkpTg',
cf7e015f 1561 'ext': 'mp4',
545cc85d 1562 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1563 'description': 'md5:e03b909557865076822aa169218d6a5d',
1564 'duration': 10643,
1565 'upload_date': '20161111',
1566 'uploader': 'Team PGP',
1567 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1568 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1569 },
1570 }, {
1571 'info_dict': {
545cc85d 1572 'id': '3AKt1R1aDnw',
cf7e015f 1573 'ext': 'mp4',
545cc85d 1574 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1575 'description': 'md5:e03b909557865076822aa169218d6a5d',
1576 'duration': 10991,
1577 'upload_date': '20161111',
1578 'uploader': 'Team PGP',
1579 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1581 },
1582 }, {
1583 'info_dict': {
545cc85d 1584 'id': 'RtAMM00gpVc',
cf7e015f 1585 'ext': 'mp4',
545cc85d 1586 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1587 'description': 'md5:e03b909557865076822aa169218d6a5d',
1588 'duration': 10995,
1589 'upload_date': '20161111',
1590 'uploader': 'Team PGP',
1591 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1593 },
1594 }, {
1595 'info_dict': {
545cc85d 1596 'id': '6N2fdlP3C5U',
cf7e015f 1597 'ext': 'mp4',
545cc85d 1598 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1599 'description': 'md5:e03b909557865076822aa169218d6a5d',
1600 'duration': 10990,
1601 'upload_date': '20161111',
1602 'uploader': 'Team PGP',
1603 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1605 },
1606 }],
1607 'params': {
1608 'skip_download': True,
1609 },
65c2fde2 1610 'skip': 'Not multifeed anymore',
cbaed4bb 1611 },
f9f49d87 1612 {
067aa17e 1613 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1614 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1615 'info_dict': {
1616 'id': 'gVfLd0zydlo',
1617 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1618 },
1619 'playlist_count': 2,
be49068d 1620 'skip': 'Not multifeed anymore',
f9f49d87 1621 },
cbaed4bb 1622 {
2d3d2997 1623 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1624 'only_matching': True,
0e49d9a6 1625 },
6d4fc66b 1626 {
2d3d2997 1627 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1628 'only_matching': True,
1629 },
0e49d9a6 1630 {
067aa17e 1631 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1632 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1633 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1634 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1635 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1636 'info_dict': {
1637 'id': 'lsguqyKfVQg',
1638 'ext': 'mp4',
1639 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1640 'alt_title': 'Dark Walk',
0e49d9a6 1641 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1642 'duration': 133,
0e49d9a6
LL
1643 'upload_date': '20151119',
1644 'uploader_id': 'IronSoulElf',
ec85ded8 1645 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1646 'uploader': 'IronSoulElf',
11f9be09 1647 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1648 'track': 'Dark Walk',
1649 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1650 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1651 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1652 'categories': ['Film & Animation'],
1653 'view_count': int,
1654 'live_status': 'not_live',
1655 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1656 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1657 'tags': 'count:13',
1658 'availability': 'public',
1659 'channel': 'IronSoulElf',
1660 'playable_in_embed': True,
1661 'like_count': int,
1662 'age_limit': 0,
6c73052c 1663 'channel_follower_count': int
0e49d9a6
LL
1664 },
1665 'params': {
1666 'skip_download': True,
1667 },
1668 },
61f92af1 1669 {
067aa17e 1670 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1671 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1672 'only_matching': True,
1673 },
313dfc45
LL
1674 {
1675 # Video with yt:stretch=17:0
1676 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1677 'info_dict': {
1678 'id': 'Q39EVAstoRM',
1679 'ext': 'mp4',
1680 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1681 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1682 'upload_date': '20151107',
1683 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1684 'uploader': 'CH GAMER DROID',
1685 },
1686 'params': {
1687 'skip_download': True,
1688 },
be49068d 1689 'skip': 'This video does not exist.',
313dfc45 1690 },
201c1459 1691 {
1692 # Video with incomplete 'yt:stretch=16:'
1693 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1694 'only_matching': True,
1695 },
7caf9830
S
1696 {
1697 # Video licensed under Creative Commons
1698 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1699 'info_dict': {
1700 'id': 'M4gD1WSo5mA',
1701 'ext': 'mp4',
1702 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1703 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1704 'duration': 721,
17322130 1705 'upload_date': '20150128',
7caf9830 1706 'uploader_id': 'BerkmanCenter',
ec85ded8 1707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1708 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1709 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1710 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1711 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1712 'like_count': int,
1713 'age_limit': 0,
1714 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1715 'channel': 'The Berkman Klein Center for Internet & Society',
1716 'availability': 'public',
1717 'view_count': int,
1718 'categories': ['Education'],
1719 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1720 'live_status': 'not_live',
1721 'playable_in_embed': True,
12a1b225 1722 'comment_count': int,
6c73052c 1723 'channel_follower_count': int
7caf9830
S
1724 },
1725 'params': {
1726 'skip_download': True,
1727 },
1728 },
fd050249
S
1729 {
1730 # Channel-like uploader_url
1731 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1732 'info_dict': {
1733 'id': 'eQcmzGIKrzg',
1734 'ext': 'mp4',
1735 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1736 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1737 'duration': 4060,
17322130 1738 'upload_date': '20151120',
eb6793ba 1739 'uploader': 'Bernie Sanders',
fd050249 1740 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1741 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1742 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1743 'playable_in_embed': True,
1744 'tags': 'count:12',
1745 'like_count': int,
1746 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1747 'age_limit': 0,
1748 'availability': 'public',
1749 'categories': ['News & Politics'],
1750 'channel': 'Bernie Sanders',
1751 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1752 'view_count': int,
1753 'live_status': 'not_live',
1754 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1755 'comment_count': int,
6c73052c 1756 'channel_follower_count': int
fd050249
S
1757 },
1758 'params': {
1759 'skip_download': True,
1760 },
1761 },
040ac686
S
1762 {
1763 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1764 'only_matching': True,
7f29cf54
S
1765 },
1766 {
067aa17e 1767 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1768 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1769 'only_matching': True,
6496ccb4
S
1770 },
1771 {
1772 # Rental video preview
1773 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1774 'info_dict': {
1775 'id': 'uGpuVWrhIzE',
1776 'ext': 'mp4',
1777 'title': 'Piku - Trailer',
1778 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1779 'upload_date': '20150811',
1780 'uploader': 'FlixMatrix',
1781 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1782 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1783 'license': 'Standard YouTube License',
1784 },
1785 'params': {
1786 'skip_download': True,
1787 },
eb6793ba 1788 'skip': 'This video is not available.',
022a5d66 1789 },
12afdc2a
S
1790 {
1791 # YouTube Red video with episode data
1792 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1793 'info_dict': {
1794 'id': 'iqKdEhx-dD4',
1795 'ext': 'mp4',
1796 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1797 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1798 'duration': 2085,
12afdc2a
S
1799 'upload_date': '20170118',
1800 'uploader': 'Vsauce',
1801 'uploader_id': 'Vsauce',
1802 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1803 'series': 'Mind Field',
1804 'season_number': 1,
1805 'episode_number': 1,
976ae3ea 1806 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1807 'tags': 'count:12',
1808 'view_count': int,
1809 'availability': 'public',
1810 'age_limit': 0,
1811 'channel': 'Vsauce',
1812 'episode': 'Episode 1',
1813 'categories': ['Entertainment'],
1814 'season': 'Season 1',
1815 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1816 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1817 'like_count': int,
1818 'playable_in_embed': True,
1819 'live_status': 'not_live',
6c73052c 1820 'channel_follower_count': int
12afdc2a
S
1821 },
1822 'params': {
1823 'skip_download': True,
1824 },
1825 'expected_warnings': [
1826 'Skipping DASH manifest',
1827 ],
1828 },
c7121fa7
S
1829 {
1830 # The following content has been identified by the YouTube community
1831 # as inappropriate or offensive to some audiences.
1832 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1833 'info_dict': {
1834 'id': '6SJNVb0GnPI',
1835 'ext': 'mp4',
1836 'title': 'Race Differences in Intelligence',
1837 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1838 'duration': 965,
1839 'upload_date': '20140124',
1840 'uploader': 'New Century Foundation',
1841 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1842 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1843 },
1844 'params': {
1845 'skip_download': True,
1846 },
545cc85d 1847 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1848 },
022a5d66
S
1849 {
1850 # itag 212
1851 'url': '1t24XAntNCY',
1852 'only_matching': True,
fd5c4aab
S
1853 },
1854 {
1855 # geo restricted to JP
1856 'url': 'sJL6WA-aGkQ',
1857 'only_matching': True,
1858 },
cd5a74a2
S
1859 {
1860 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1861 'only_matching': True,
1862 },
bc2ca1bb 1863 {
1864 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1865 'only_matching': True,
1866 },
1867 {
1868 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1869 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1870 'only_matching': True,
1871 },
825cd268
RA
1872 {
1873 # DRM protected
1874 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1875 'only_matching': True,
4fe54c12
S
1876 },
1877 {
1878 # Video with unsupported adaptive stream type formats
1879 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1880 'info_dict': {
1881 'id': 'Z4Vy8R84T1U',
1882 'ext': 'mp4',
1883 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1884 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1885 'duration': 433,
1886 'upload_date': '20130923',
1887 'uploader': 'Amelia Putri Harwita',
1888 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1889 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1890 'formats': 'maxcount:10',
1891 },
1892 'params': {
1893 'skip_download': True,
1894 'youtube_include_dash_manifest': False,
1895 },
5429d6a9 1896 'skip': 'not actual anymore',
5caabd3c 1897 },
1898 {
822b9d9c 1899 # Youtube Music Auto-generated description
5caabd3c 1900 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1901 'info_dict': {
1902 'id': 'MgNrAu2pzNs',
1903 'ext': 'mp4',
1904 'title': 'Voyeur Girl',
1905 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1906 'upload_date': '20190312',
5429d6a9
S
1907 'uploader': 'Stephen - Topic',
1908 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1909 'artist': 'Stephen',
1910 'track': 'Voyeur Girl',
1911 'album': 'it\'s too much love to know my dear',
1912 'release_date': '20190313',
1913 'release_year': 2019,
976ae3ea 1914 'alt_title': 'Voyeur Girl',
1915 'view_count': int,
1916 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1917 'playable_in_embed': True,
1918 'like_count': int,
1919 'categories': ['Music'],
1920 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1921 'channel': 'Stephen',
1922 'availability': 'public',
1923 'creator': 'Stephen',
1924 'duration': 169,
1925 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1926 'age_limit': 0,
1927 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1928 'tags': 'count:11',
1929 'live_status': 'not_live',
6c73052c 1930 'channel_follower_count': int
5caabd3c 1931 },
1932 'params': {
1933 'skip_download': True,
1934 },
1935 },
66b48727
RA
1936 {
1937 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1938 'only_matching': True,
1939 },
011e75e6
S
1940 {
1941 # invalid -> valid video id redirection
1942 'url': 'DJztXj2GPfl',
1943 'info_dict': {
1944 'id': 'DJztXj2GPfk',
1945 'ext': 'mp4',
1946 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1947 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1948 'upload_date': '20090125',
1949 'uploader': 'Prochorowka',
1950 'uploader_id': 'Prochorowka',
1951 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1952 'artist': 'Panjabi MC',
1953 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1954 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1955 },
1956 'params': {
1957 'skip_download': True,
1958 },
545cc85d 1959 'skip': 'Video unavailable',
ea74e00b
DP
1960 },
1961 {
1962 # empty description results in an empty string
1963 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1964 'info_dict': {
1965 'id': 'x41yOUIvK2k',
1966 'ext': 'mp4',
1967 'title': 'IMG 3456',
1968 'description': '',
1969 'upload_date': '20170613',
1970 'uploader_id': 'ElevageOrVert',
1971 'uploader': 'ElevageOrVert',
976ae3ea 1972 'view_count': int,
1973 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1974 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1975 'like_count': int,
1976 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1977 'tags': [],
1978 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1979 'availability': 'public',
1980 'age_limit': 0,
1981 'categories': ['Pets & Animals'],
1982 'duration': 7,
1983 'playable_in_embed': True,
1984 'live_status': 'not_live',
1985 'channel': 'ElevageOrVert',
6c73052c 1986 'channel_follower_count': int
ea74e00b
DP
1987 },
1988 'params': {
1989 'skip_download': True,
1990 },
1991 },
a0566bbf 1992 {
29f7c58a 1993 # with '};' inside yt initial data (see [1])
1994 # see [2] for an example with '};' inside ytInitialPlayerResponse
1995 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1996 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1997 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1998 'info_dict': {
1999 'id': 'CHqg6qOn4no',
2000 'ext': 'mp4',
2001 'title': 'Part 77 Sort a list of simple types in c#',
2002 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2003 'upload_date': '20130831',
2004 'uploader_id': 'kudvenkat',
2005 'uploader': 'kudvenkat',
976ae3ea 2006 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2007 'like_count': int,
2008 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2009 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2010 'live_status': 'not_live',
2011 'categories': ['Education'],
2012 'availability': 'public',
2013 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2014 'tags': 'count:12',
2015 'playable_in_embed': True,
2016 'age_limit': 0,
2017 'view_count': int,
2018 'duration': 522,
2019 'channel': 'kudvenkat',
12a1b225 2020 'comment_count': int,
6c73052c 2021 'channel_follower_count': int
a0566bbf 2022 },
2023 'params': {
2024 'skip_download': True,
2025 },
2026 },
29f7c58a 2027 {
2028 # another example of '};' in ytInitialData
2029 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2030 'only_matching': True,
2031 },
2032 {
2033 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2034 'only_matching': True,
2035 },
545cc85d 2036 {
cc2db878 2037 # https://github.com/ytdl-org/youtube-dl/pull/28094
2038 'url': 'OtqTfy26tG0',
2039 'info_dict': {
2040 'id': 'OtqTfy26tG0',
2041 'ext': 'mp4',
2042 'title': 'Burn Out',
2043 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2044 'upload_date': '20141120',
2045 'uploader': 'The Cinematic Orchestra - Topic',
2046 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2048 'artist': 'The Cinematic Orchestra',
2049 'track': 'Burn Out',
2050 'album': 'Every Day',
976ae3ea 2051 'like_count': int,
2052 'live_status': 'not_live',
2053 'alt_title': 'Burn Out',
2054 'duration': 614,
2055 'age_limit': 0,
2056 'view_count': int,
2057 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2058 'creator': 'The Cinematic Orchestra',
2059 'channel': 'The Cinematic Orchestra',
2060 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2061 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2062 'availability': 'public',
2063 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2064 'categories': ['Music'],
2065 'playable_in_embed': True,
6c73052c 2066 'channel_follower_count': int
cc2db878 2067 },
2068 'params': {
2069 'skip_download': True,
2070 },
545cc85d 2071 },
bc2ca1bb 2072 {
2073 # controversial video, only works with bpctr when authenticated with cookies
2074 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2075 'only_matching': True,
2076 },
a1a7907b 2077 {
2078 # controversial video, requires bpctr/contentCheckOk
2079 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2080 'info_dict': {
2081 'id': 'SZJvDhaSDnc',
2082 'ext': 'mp4',
2083 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2084 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2085 'uploader': 'CBS Mornings',
11f9be09 2086 'uploader_id': 'CBSThisMorning',
a1a7907b 2087 'upload_date': '20140716',
976ae3ea 2088 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2089 'duration': 170,
2090 'categories': ['News & Politics'],
2091 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2092 'view_count': int,
2093 'channel': 'CBS Mornings',
2094 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2095 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2096 'age_limit': 18,
2097 'availability': 'needs_auth',
2098 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2099 'like_count': int,
2100 'live_status': 'not_live',
2101 'playable_in_embed': True,
6c73052c 2102 'channel_follower_count': int
a1a7907b 2103 }
2104 },
f7ad7160 2105 {
2106 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2107 'url': 'cBvYw8_A0vQ',
2108 'info_dict': {
2109 'id': 'cBvYw8_A0vQ',
2110 'ext': 'mp4',
2111 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2112 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2113 'upload_date': '20201120',
2114 'uploader': 'Walk around Japan',
2115 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2116 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2117 'duration': 1456,
2118 'categories': ['Travel & Events'],
2119 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2120 'view_count': int,
2121 'channel': 'Walk around Japan',
2122 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2123 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2124 'age_limit': 0,
2125 'availability': 'public',
2126 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2127 'live_status': 'not_live',
2128 'playable_in_embed': True,
6c73052c 2129 'channel_follower_count': int
f7ad7160 2130 },
2131 'params': {
2132 'skip_download': True,
2133 },
0fb983f6 2134 }, {
2135 # Has multiple audio streams
2136 'url': 'WaOKSUlf4TM',
2137 'only_matching': True
9297939e 2138 }, {
2139 # Requires Premium: has format 141 when requested using YTM url
2140 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2141 'only_matching': True
2142 }, {
120916da 2143 # multiple subtitles with same lang_code
2144 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2145 'only_matching': True,
109dd3b2 2146 }, {
2147 # Force use android client fallback
2148 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2149 'info_dict': {
2150 'id': 'YOelRv7fMxY',
11f9be09 2151 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2152 'ext': '3gp',
2153 'upload_date': '20210624',
2154 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2155 'uploader': 'colinfurze',
11f9be09 2156 'uploader_id': 'colinfurze',
109dd3b2 2157 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2158 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2159 'duration': 596,
2160 'categories': ['Entertainment'],
2161 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2162 'view_count': int,
2163 'channel': 'colinfurze',
2164 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2165 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2166 'age_limit': 0,
2167 'availability': 'public',
2168 'like_count': int,
2169 'live_status': 'not_live',
2170 'playable_in_embed': True,
6c73052c 2171 'channel_follower_count': int
109dd3b2 2172 },
2173 'params': {
2174 'format': '17', # 3gp format available on android
2175 'extractor_args': {'youtube': {'player_client': ['android']}},
2176 },
120916da 2177 },
109dd3b2 2178 {
2179 # Skip download of additional client configs (remix client config in this case)
2180 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2181 'only_matching': True,
2182 'params': {
2183 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2184 },
8fc54b12 2185 }, {
2186 # shorts
2187 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2188 'only_matching': True,
9222c381 2189 }, {
2190 'note': 'Storyboards',
2191 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2192 'info_dict': {
2193 'id': '5KLPxDtMqe8',
2194 'ext': 'mhtml',
2195 'format_id': 'sb0',
2196 'title': 'Your Brain is Plastic',
2197 'uploader_id': 'scishow',
2198 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2199 'upload_date': '20140324',
2200 'uploader': 'SciShow',
976ae3ea 2201 'like_count': int,
2202 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2203 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2204 'view_count': int,
2205 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2206 'playable_in_embed': True,
2207 'tags': 'count:12',
2208 'uploader_url': 'http://www.youtube.com/user/scishow',
2209 'availability': 'public',
2210 'channel': 'SciShow',
2211 'live_status': 'not_live',
2212 'duration': 248,
2213 'categories': ['Education'],
2214 'age_limit': 0,
6c73052c 2215 'channel_follower_count': int
9222c381 2216 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2217 }, {
2218 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2219 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2220 'info_dict': {
2221 'id': '2NUZ8W2llS4',
2222 'ext': 'mp4',
2223 'title': 'The NP that test your phone performance 🙂',
2224 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2225 'uploader': 'Leon Nguyen',
2226 'uploader_id': 'VNSXIII',
2227 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2228 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2229 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2230 'duration': 21,
2231 'view_count': int,
2232 'age_limit': 0,
2233 'categories': ['Gaming'],
2234 'tags': 'count:23',
2235 'playable_in_embed': True,
2236 'live_status': 'not_live',
2237 'upload_date': '20220103',
2238 'like_count': int,
2239 'availability': 'public',
2240 'channel': 'Leon Nguyen',
2241 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2242 'comment_count': int,
992f9a73 2243 'channel_follower_count': int
2244 }
1ff88b7a 2245 }, {
2246 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2247 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2248 'info_dict': {
2249 'id': '2NUZ8W2llS4',
2250 'ext': 'mp4',
2251 'title': 'The NP that test your phone performance 🙂',
2252 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2253 'uploader': 'Leon Nguyen',
2254 'uploader_id': 'VNSXIII',
2255 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2256 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2257 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2258 'duration': 21,
2259 'view_count': int,
2260 'age_limit': 0,
2261 'categories': ['Gaming'],
2262 'tags': 'count:23',
2263 'playable_in_embed': True,
2264 'live_status': 'not_live',
2265 'upload_date': '20220102',
2266 'like_count': int,
2267 'availability': 'public',
2268 'channel': 'Leon Nguyen',
2269 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2270 'comment_count': int,
2271 'channel_follower_count': int
2272 },
2273 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2274 }, {
2275 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2276 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2277 'info_dict': {
2278 'id': 'mzZzzBU6lrM',
2279 'ext': 'mp4',
2280 'title': 'I Met GeorgeNotFound In Real Life...',
2281 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2282 'uploader': 'Quackity',
2283 'uploader_id': 'QuackityHQ',
2284 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2285 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2286 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2287 'duration': 955,
2288 'view_count': int,
2289 'age_limit': 0,
2290 'categories': ['Entertainment'],
2291 'tags': 'count:26',
2292 'playable_in_embed': True,
2293 'live_status': 'not_live',
2294 'release_timestamp': 1641172509,
2295 'release_date': '20220103',
2296 'upload_date': '20220103',
2297 'like_count': int,
2298 'availability': 'public',
2299 'channel': 'Quackity',
2300 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2301 'channel_follower_count': int
2302 }
2303 },
2304 { # continuous livestream. Microformat upload date should be preferred.
2305 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2306 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2307 'info_dict': {
2308 'id': 'kgx4WGK0oNU',
2309 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2310 'ext': 'mp4',
2311 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2312 'availability': 'public',
2313 'age_limit': 0,
2314 'release_timestamp': 1637975704,
2315 'upload_date': '20210619',
2316 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2317 'live_status': 'is_live',
2318 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2319 'uploader': '阿鲍Abao',
2320 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2321 'channel': 'Abao in Tokyo',
2322 'channel_follower_count': int,
2323 'release_date': '20211127',
2324 'tags': 'count:39',
2325 'categories': ['People & Blogs'],
2326 'like_count': int,
2327 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2328 'view_count': int,
2329 'playable_in_embed': True,
2330 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2331 },
2332 'params': {'skip_download': True}
6e634cbe 2333 }, {
2334 # Story. Requires specific player params to work.
ee27297f 2335 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2336 'info_dict': {
ee27297f 2337 'id': 'vv8qTUWmulI',
6e634cbe 2338 'ext': 'mp4',
ee27297f 2339 'availability': 'unlisted',
2340 'view_count': int,
2341 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2342 'upload_date': '20220526',
2343 'categories': ['Education'],
2344 'title': 'Story',
2345 'channel': 'IT\'S HISTORY',
2346 'description': '',
2347 'uploader_id': 'BlastfromthePast',
2348 'duration': 12,
2349 'uploader': 'IT\'S HISTORY',
6e634cbe 2350 'playable_in_embed': True,
6e634cbe 2351 'age_limit': 0,
6e634cbe 2352 'live_status': 'not_live',
ee27297f 2353 'tags': [],
2354 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2355 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2356 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2357 },
2358 'skip': 'stories get removed after some period of time',
ee27297f 2359 }, {
2360 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2361 'info_dict': {
2362 'id': 'tjjjtzRLHvA',
2363 'ext': 'mp4',
2364 'title': 'ハッシュタグ無し };if window.ytcsi',
2365 'upload_date': '20220323',
2366 'like_count': int,
2367 'availability': 'unlisted',
2368 'channel': 'nao20010128nao',
2369 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2370 'age_limit': 0,
2371 'uploader': 'nao20010128nao',
2372 'uploader_id': 'nao20010128nao',
2373 'categories': ['Music'],
6e634cbe 2374 'view_count': int,
2375 'description': '',
ee27297f 2376 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2377 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2378 'live_status': 'not_live',
2379 'playable_in_embed': True,
2380 'channel_follower_count': int,
2381 'duration': 6,
2382 'tags': [],
2383 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2384 }
c26f9b99 2385 }, {
2386 # Prefer primary title+description language metadata by default
2387 # Do not prefer translated description if primary is empty
2388 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2389 'info_dict': {
2390 'id': 'el3E4MbxRqQ',
2391 'ext': 'mp4',
2392 'title': 'dlp test video 2 - primary sv no desc',
2393 'description': '',
2394 'channel': 'cole-dlp-test-acc',
2395 'tags': [],
2396 'view_count': int,
2397 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2398 'like_count': int,
2399 'playable_in_embed': True,
2400 'availability': 'unlisted',
2401 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2402 'age_limit': 0,
2403 'duration': 5,
2404 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2405 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2406 'live_status': 'not_live',
2407 'upload_date': '20220908',
2408 'categories': ['People & Blogs'],
2409 'uploader': 'cole-dlp-test-acc',
2410 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2411 },
2412 'params': {'skip_download': True}
2413 }, {
2414 # Extractor argument: prefer translated title+description
2415 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2416 'info_dict': {
2417 'id': 'gHKT4uU8Zng',
2418 'ext': 'mp4',
2419 'channel': 'cole-dlp-test-acc',
2420 'tags': [],
2421 'duration': 5,
2422 'live_status': 'not_live',
2423 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2424 'upload_date': '20220728',
2425 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2426 'view_count': int,
2427 'categories': ['People & Blogs'],
2428 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2429 'title': 'dlp test video title translated (fr)',
2430 'availability': 'public',
2431 'uploader': 'cole-dlp-test-acc',
2432 'age_limit': 0,
2433 'description': 'dlp test video description translated (fr)',
2434 'playable_in_embed': True,
2435 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2436 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2437 },
2438 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2439 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2440 }, {
2441 'note': '6 channel audio',
2442 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2443 'only_matching': True,
6e634cbe 2444 }
2eb88d95
PH
2445 ]
2446
f2e8dbcc 2447 _WEBPAGE_TESTS = [
2448 # YouTube <object> embed
2449 {
2450 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2451 'md5': '873c81d308b979f0e23ee7e620b312a3',
2452 'info_dict': {
2453 'id': 'msN87y-iEx0',
2454 'ext': 'mp4',
2455 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2456 'upload_date': '20080526',
2457 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2458 'uploader': 'Christopher Sykes',
2459 'uploader_id': 'ChristopherJSykes',
2460 'age_limit': 0,
2461 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2462 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2463 'playable_in_embed': True,
2464 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2465 'like_count': int,
2466 'comment_count': int,
2467 'channel': 'Christopher Sykes',
2468 'live_status': 'not_live',
2469 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2470 'availability': 'public',
2471 'duration': 195,
2472 'view_count': int,
2473 'categories': ['Science & Technology'],
2474 'channel_follower_count': int,
2475 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2476 },
2477 'params': {
2478 'skip_download': True,
2479 }
2480 },
2481 ]
2482
201c1459 2483 @classmethod
2484 def suitable(cls, url):
4dfbf869 2485 from ..utils import parse_qs
2486
201c1459 2487 qs = parse_qs(url)
2488 if qs.get('list', [None])[0]:
2489 return False
86e5f3ed 2490 return super().suitable(url)
201c1459 2491
e0df6211 2492 def __init__(self, *args, **kwargs):
86e5f3ed 2493 super().__init__(*args, **kwargs)
545cc85d 2494 self._code_cache = {}
83799698 2495 self._player_cache = {}
e0df6211 2496
4d37720a 2497 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2498 lock = threading.Lock()
185bf310 2499 start_time = time.time()
adbc4ec4
THD
2500 formats = [f for f in formats if f.get('is_from_start')]
2501
185bf310 2502 def refetch_manifest(format_id, delay):
2503 nonlocal formats, start_time, is_live
2504 if time.time() <= start_time + delay:
adbc4ec4
THD
2505 return
2506
2507 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2508 video_details = traverse_obj(
2509 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2510 microformats = traverse_obj(
2511 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2512 expected_type=dict, default=[])
4d37720a
L
2513 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2514 is_live = live_status == 'is_live'
185bf310 2515 start_time = time.time()
adbc4ec4 2516
185bf310 2517 def mpd_feed(format_id, delay):
adbc4ec4
THD
2518 """
2519 @returns (manifest_url, manifest_stream_number, is_live) or None
2520 """
2521 with lock:
185bf310 2522 refetch_manifest(format_id, delay)
adbc4ec4
THD
2523
2524 f = next((f for f in formats if f['format_id'] == format_id), None)
2525 if not f:
185bf310 2526 if not is_live:
2527 self.to_screen(f'{video_id}: Video is no longer live')
2528 else:
2529 self.report_warning(
2530 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2531 return None
2532 return f['manifest_url'], f['manifest_stream_number'], is_live
2533
2534 for f in formats:
4d37720a
L
2535 f['is_live'] = is_live
2536 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2537 live_start_time, mpd_feed, not is_live and f.copy())
2538 if is_live:
2539 f['fragments'] = gen
2540 f['protocol'] = 'http_dash_segments_generator'
2541 else:
2542 f['fragments'] = LazyList(gen({}))
2543 del f['is_from_start']
adbc4ec4 2544
4d37720a 2545 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2546 FETCH_SPAN, MAX_DURATION = 5, 432000
2547
2548 mpd_url, stream_number, is_live = None, None, True
2549
2550 begin_index = 0
2551 download_start_time = ctx.get('start') or time.time()
2552
2553 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2554 if lack_early_segments:
2555 self.report_warning(bug_reports_message(
2556 'Starting download from the last 120 hours of the live stream since '
2557 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2558 lack_early_segments = True
2559
2560 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2561 fragments, fragment_base_url = None, None
2562
a539f065 2563 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2564 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2565 # Obtain from MPD's maximum seq value
2566 old_mpd_url = mpd_url
185bf310 2567 last_error = ctx.pop('last_error', None)
14f25df2 2568 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2569 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2570 or (mpd_url, stream_number, False))
2571 if not refresh_sequence:
2572 if expire_fast and not is_live:
2573 return False, last_seq
2574 elif old_mpd_url == mpd_url:
2575 return True, last_seq
4d37720a
L
2576 if manifestless_orig_fmt:
2577 fmt_info = manifestless_orig_fmt
2578 else:
2579 try:
2580 fmts, _ = self._extract_mpd_formats_and_subtitles(
2581 mpd_url, None, note=False, errnote=False, fatal=False)
2582 except ExtractorError:
2583 fmts = None
2584 if not fmts:
2585 no_fragment_score += 2
2586 return False, last_seq
2587 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2588 fragments = fmt_info['fragments']
2589 fragment_base_url = fmt_info['fragment_base_url']
2590 assert fragment_base_url
2591
2592 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2593 return True, _last_seq
2594
4d37720a 2595 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2596 while is_live:
2597 fetch_time = time.time()
2598 if no_fragment_score > 30:
2599 return
2600 if last_segment_url:
2601 # Obtain from "X-Head-Seqnum" header value from each segment
2602 try:
2603 urlh = self._request_webpage(
2604 last_segment_url, None, note=False, errnote=False, fatal=False)
2605 except ExtractorError:
2606 urlh = None
2607 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2608 if last_seq is None:
a539f065 2609 no_fragment_score += 2
adbc4ec4
THD
2610 last_segment_url = None
2611 continue
2612 else:
a539f065
LNO
2613 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2614 no_fragment_score += 2
185bf310 2615 if not should_continue:
adbc4ec4
THD
2616 continue
2617
2618 if known_idx > last_seq:
2619 last_segment_url = None
2620 continue
2621
2622 last_seq += 1
2623
2624 if begin_index < 0 and known_idx < 0:
2625 # skip from the start when it's negative value
2626 known_idx = last_seq + begin_index
2627 if lack_early_segments:
2628 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2629 try:
2630 for idx in range(known_idx, last_seq):
2631 # do not update sequence here or you'll get skipped some part of it
a539f065 2632 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2633 if not should_continue:
adbc4ec4
THD
2634 known_idx = idx - 1
2635 raise ExtractorError('breaking out of outer loop')
2636 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2637 yield {
2638 'url': last_segment_url,
36195c44 2639 'fragment_count': last_seq,
adbc4ec4
THD
2640 }
2641 if known_idx == last_seq:
2642 no_fragment_score += 5
2643 else:
2644 no_fragment_score = 0
2645 known_idx = last_seq
2646 except ExtractorError:
2647 continue
2648
4d37720a
L
2649 if manifestless_orig_fmt:
2650 # Stop at the first iteration if running for post-live manifestless;
2651 # fragment count no longer increase since it starts
2652 break
2653
adbc4ec4
THD
2654 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2655
b6de707d 2656 def _extract_player_url(self, *ytcfgs, webpage=None):
2657 player_url = traverse_obj(
2658 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2659 get_all=False, expected_type=str)
11f9be09 2660 if not player_url:
b6de707d 2661 return
60f393e4 2662 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2663
b6de707d 2664 def _download_player_url(self, video_id, fatal=False):
2665 res = self._download_webpage(
2666 'https://www.youtube.com/iframe_api',
2667 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2668 if res:
2669 player_version = self._search_regex(
2670 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2671 if player_version:
2672 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2673
60064c53
PH
2674 def _signature_cache_id(self, example_sig):
2675 """ Return a string representation of a signature """
14f25df2 2676 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2677
e40c758c
S
2678 @classmethod
2679 def _extract_player_info(cls, player_url):
2680 for player_re in cls._PLAYER_INFO_RE:
2681 id_m = re.search(player_re, player_url)
2682 if id_m:
2683 break
2684 else:
c081b35c 2685 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2686 return id_m.group('id')
e40c758c 2687
404f611f 2688 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2689 player_id = self._extract_player_info(player_url)
2690 if player_id not in self._code_cache:
1276a43a 2691 code = self._download_webpage(
109dd3b2 2692 player_url, video_id, fatal=fatal,
2693 note='Downloading player ' + player_id,
2694 errnote='Download of %s failed' % player_url)
1276a43a 2695 if code:
2696 self._code_cache[player_id] = code
404f611f 2697 return self._code_cache.get(player_id)
109dd3b2 2698
e40c758c 2699 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2700 player_id = self._extract_player_info(player_url)
e0df6211 2701
c4417ddb 2702 # Read from filesystem cache
86e5f3ed 2703 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2704 assert os.path.basename(func_id) == func_id
a0e07d31 2705
ae61d108 2706 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2707 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2708
580ce007 2709 if not cache_spec:
2710 code = self._load_player(video_id, player_url)
404f611f 2711 if code:
109dd3b2 2712 res = self._parse_sig_js(code)
ac668111 2713 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2714 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2715 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2716
2717 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2718
60064c53 2719 def _print_sig_code(self, func, example_sig):
404f611f 2720 if not self.get_param('youtube_print_sig_code'):
2721 return
2722
edf3e38e
PH
2723 def gen_sig_code(idxs):
2724 def _genslice(start, end, step):
78caa52a 2725 starts = '' if start == 0 else str(start)
8bcc8756 2726 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2727 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2728 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2729
2730 step = None
7af808a5
PH
2731 # Quelch pyflakes warnings - start will be set when step is set
2732 start = '(Never used)'
edf3e38e
PH
2733 for i, prev in zip(idxs[1:], idxs[:-1]):
2734 if step is not None:
2735 if i - prev == step:
2736 continue
2737 yield _genslice(start, prev, step)
2738 step = None
2739 continue
2740 if i - prev in [-1, 1]:
2741 step = i - prev
2742 start = prev
2743 continue
2744 else:
78caa52a 2745 yield 's[%d]' % prev
edf3e38e 2746 if step is None:
78caa52a 2747 yield 's[%d]' % i
edf3e38e
PH
2748 else:
2749 yield _genslice(start, i, step)
2750
ac668111 2751 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2752 cache_res = func(test_string)
edf3e38e 2753 cache_spec = [ord(c) for c in cache_res]
78caa52a 2754 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2755 signature_id_tuple = '(%s)' % (
14f25df2 2756 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2757 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2758 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2759 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2760
e0df6211
PH
2761 def _parse_sig_js(self, jscode):
2762 funcname = self._search_regex(
abefc03f
S
2763 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2764 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2765 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2766 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2767 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2768 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2769 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2770 # Obsolete patterns
2771 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2772 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2773 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2774 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2775 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2776 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2777 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2778 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2779 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2780
2781 jsi = JSInterpreter(jscode)
2782 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2783 return lambda s: initial_function([s])
2784
580ce007 2785 def _cached(self, func, *cache_id):
2786 def inner(*args, **kwargs):
2787 if cache_id not in self._player_cache:
2788 try:
2789 self._player_cache[cache_id] = func(*args, **kwargs)
2790 except ExtractorError as e:
2791 self._player_cache[cache_id] = e
2792 except Exception as e:
2793 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2794
2795 ret = self._player_cache[cache_id]
2796 if isinstance(ret, Exception):
2797 raise ret
2798 return ret
2799 return inner
2800
545cc85d 2801 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2802 """Turn the encrypted s field into a working signature"""
580ce007 2803 extract_sig = self._cached(
2804 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2805 func = extract_sig(video_id, player_url, s)
2806 self._print_sig_code(func, s)
2807 return func(s)
404f611f 2808
2809 def _decrypt_nsig(self, s, video_id, player_url):
2810 """Turn the encrypted n field into a working signature"""
2811 if player_url is None:
2812 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2813 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2814
b505e851 2815 try:
2816 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2817 except ExtractorError as e:
2818 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2819 if self.get_param('youtube_print_sig_code'):
2820 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2821
25836db6 2822 try:
2823 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2824 ret = extract_nsig(jsi, func_code)(s)
2825 except JSInterpreter.Exception as e:
2826 try:
992dc6b4 2827 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2828 except ExtractorError:
2829 raise e
2830 self.report_warning(
2831 f'Native nsig extraction failed: Trying with PhantomJS\n'
2832 f' n = {s} ; player = {player_url}', video_id)
2833 self.write_debug(e)
2834
2835 args, func_body = func_code
2836 ret = jsi.execute(
2837 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2838 video_id=video_id, note='Executing signature code').strip()
580ce007 2839
2840 self.write_debug(f'Decrypted nsig {s} => {ret}')
2841 return ret
2842
90a1df30 2843 def _extract_n_function_name(self, jscode):
2844 funcname, idx = self._search_regex(
2845 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2846 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2847 if not idx:
2848 return funcname
2849
2850 return json.loads(js_to_json(self._search_regex(
2851 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2852 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2853
580ce007 2854 def _extract_n_function_code(self, video_id, player_url):
404f611f 2855 player_id = self._extract_player_info(player_url)
05deb747 2856 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 2857 jscode = func_code or self._load_player(video_id, player_url)
2858 jsi = JSInterpreter(jscode)
404f611f 2859
2860 if func_code:
580ce007 2861 return jsi, player_id, func_code
404f611f 2862
b505e851 2863 func_name = self._extract_n_function_name(jscode)
2864
2865 # For redundancy
2866 func_code = self._search_regex(
2867 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2868 # NB: The end of the regex is intentionally kept strict
2869 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2870 jscode, 'nsig function', group=('var', 'code'), default=None)
2871 if func_code:
2872 func_code = ([func_code[0]], func_code[1])
2873 else:
2874 self.write_debug('Extracting nsig function with jsinterp')
2875 func_code = jsi.extract_function_code(func_name)
2876
580ce007 2877 self.cache.store('youtube-nsig', player_id, func_code)
2878 return jsi, player_id, func_code
2879
2880 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2881 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2882
580ce007 2883 def extract_nsig(s):
25836db6 2884 try:
2885 ret = func([s])
2886 except JSInterpreter.Exception:
2887 raise
2888 except Exception as e:
2889 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2890
f6ca640b 2891 if ret.startswith('enhanced_except_'):
25836db6 2892 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2893 return ret
580ce007 2894
2895 return extract_nsig
e0df6211 2896
109dd3b2 2897 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2898 """
2899 Extract signatureTimestamp (sts)
2900 Required to tell API what sig/player version is in use.
2901 """
2902 sts = None
2903 if isinstance(ytcfg, dict):
2904 sts = int_or_none(ytcfg.get('STS'))
2905
2906 if not sts:
2907 # Attempt to extract from player
2908 if player_url is None:
2909 error_msg = 'Cannot extract signature timestamp without player_url.'
2910 if fatal:
2911 raise ExtractorError(error_msg)
2912 self.report_warning(error_msg)
2913 return
404f611f 2914 code = self._load_player(video_id, player_url, fatal=fatal)
2915 if code:
109dd3b2 2916 sts = int_or_none(self._search_regex(
2917 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2918 'JS player signature timestamp', group='sts', fatal=fatal))
2919 return sts
2920
11f9be09 2921 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2922 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2923 label = 'fully ' if is_full else ''
2924 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2925 expected_type=url_or_none)
2926 if not url:
2927 self.report_warning(f'Unable to mark {label}watched')
2928 return
14f25df2 2929 parsed_url = urllib.parse.urlparse(url)
2930 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2931
2932 # cpn generation algorithm is reverse engineered from base.js.
2933 # In fact it works even with dummy cpn.
2934 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2935 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2936
2937 # # more consistent results setting it to right before the end
2938 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2939
2940 qs.update({
2941 'ver': ['2'],
2942 'cpn': [cpn],
2943 'cmt': video_length,
2944 'el': 'detailpage', # otherwise defaults to "shorts"
2945 })
2946
2947 if is_full:
2948 # these seem to mark watchtime "history" in the real world
2949 # they're required, so send in a single value
2950 qs.update({
2951 'st': video_length,
2952 'et': video_length,
2953 })
2954
14f25df2 2955 url = urllib.parse.urlunparse(
2956 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2957
2958 self._download_webpage(
2959 url, video_id, f'Marking {label}watched',
2960 'Unable to mark watched', fatal=False)
d77ab8e2 2961
bfd973ec 2962 @classmethod
2963 def _extract_from_webpage(cls, url, webpage):
2964 # Invidious Instances
2965 # https://github.com/yt-dlp/yt-dlp/issues/195
2966 # https://github.com/iv-org/invidious/pull/1730
2967 mobj = re.search(
2968 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2969 webpage)
2970 if mobj:
2971 yield cls.url_result(mobj.group('url'), cls)
2972 raise cls.StopExtraction()
2973
2974 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2975
2976 # lazyYT YouTube embed
bfd973ec 2977 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2978 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2979
2980 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2981 for m in re.findall(r'''(?x)<div[^>]+
2982 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2983 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2984 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2985
97665381
PH
2986 @classmethod
2987 def extract_id(cls, url):
ae61d108 2988 video_id = cls.get_temp_id(url)
2989 if not video_id:
2990 raise ExtractorError(f'Invalid URL: {url}')
2991 return video_id
c5e8d7af 2992
7c365c21 2993 def _extract_chapters_from_json(self, data, duration):
2994 chapter_list = traverse_obj(
2995 data, (
2996 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2997 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2998 ), expected_type=list)
2999
3000 return self._extract_chapters(
3001 chapter_list,
3002 chapter_time=lambda chapter: float_or_none(
3003 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3004 chapter_title=lambda chapter: traverse_obj(
3005 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3006 duration=duration)
3007
3008 def _extract_chapters_from_engagement_panel(self, data, duration):
3009 content_list = traverse_obj(
8bdd16b4 3010 data,
7c365c21 3011 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 3012 expected_type=list, default=[])
052e1350 3013 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3014 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3015
1890fc63 3016 return next(filter(None, (
3017 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3018 chapter_time, chapter_title, duration)
3019 for contents in content_list)), [])
7c365c21 3020
1890fc63 3021 def _extract_chapters_from_description(self, description, duration):
3022 return self._extract_chapters(
3023 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
3024 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3025 duration=duration, strict=False)
84213ea8 3026
1890fc63 3027 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3028 if not duration:
3029 return
3030 chapter_list = [{
3031 'start_time': chapter_time(chapter),
3032 'title': chapter_title(chapter),
3033 } for chapter in chapter_list or []]
3034 if not strict:
3035 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3036
a3976e07 3037 chapters = [{'start_time': 0}]
1890fc63 3038 for idx, chapter in enumerate(chapter_list):
a3976e07 3039 if chapter['start_time'] is None:
1890fc63 3040 self.report_warning(f'Incomplete chapter {idx}')
3041 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 3042 chapters.append(chapter)
709ee214 3043 elif chapter not in chapters:
3044 self.report_warning(
3045 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
a3976e07 3046 return chapters[1:]
84213ea8 3047
a1c5d2ca
M
3048 def _extract_comment(self, comment_renderer, parent=None):
3049 comment_id = comment_renderer.get('commentId')
3050 if not comment_id:
3051 return
fe93e2c4 3052
052e1350 3053 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 3054
c26f9b99 3055 # Timestamp is an estimate calculated from the current time and time_text
3056 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3057 timestamp = self._parse_time_text(time_text)
3058
052e1350 3059 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 3060 author_id = try_get(comment_renderer,
14f25df2 3061 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 3062
49bd8c66 3063 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 3064 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 3065 author_thumbnail = try_get(comment_renderer,
14f25df2 3066 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
3067
3068 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 3069 is_favorited = 'creatorHeart' in (try_get(
3070 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
3071 return {
3072 'id': comment_id,
3073 'text': text,
d92f5d5a 3074 'timestamp': timestamp,
a1c5d2ca
M
3075 'time_text': time_text,
3076 'like_count': votes,
97524332 3077 'is_favorited': is_favorited,
a1c5d2ca
M
3078 'author': author,
3079 'author_id': author_id,
3080 'author_thumbnail': author_thumbnail,
3081 'author_is_uploader': author_is_uploader,
3082 'parent': parent or 'root'
3083 }
3084
46383212 3085 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3086
3087 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3088
3089 def extract_header(contents):
2d6659b9 3090 _continuation = None
3091 for content in contents:
46383212 3092 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3093 expected_comment_count = self._get_count(
3094 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3095
2d6659b9 3096 if expected_comment_count:
46383212 3097 tracker['est_total'] = expected_comment_count
3098 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3099 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3100
3101 sort_menu_item = try_get(
3102 comments_header_renderer,
3103 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3104 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3105
3106 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3107 if not _continuation:
3108 continue
3109
46383212 3110 sort_text = str_or_none(sort_menu_item.get('title'))
3111 if not sort_text:
2d6659b9 3112 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3113 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3114 break
a2160aa4 3115 return _continuation
a1c5d2ca 3116
2d6659b9 3117 def extract_thread(contents):
a1c5d2ca 3118 if not parent:
46383212 3119 tracker['current_page_thread'] = 0
a1c5d2ca 3120 for content in contents:
46383212 3121 if not parent and tracker['total_parent_comments'] >= max_parents:
3122 yield
a1c5d2ca 3123 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3124 comment_renderer = get_first(
3125 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3126 expected_type=dict, default={})
a1c5d2ca 3127
a1c5d2ca
M
3128 comment = self._extract_comment(comment_renderer, parent)
3129 if not comment:
3130 continue
46383212 3131
3132 tracker['running_total'] += 1
3133 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3134 yield comment
46383212 3135
a1c5d2ca
M
3136 # Attempt to get the replies
3137 comment_replies_renderer = try_get(
3138 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3139
3140 if comment_replies_renderer:
46383212 3141 tracker['current_page_thread'] += 1
a1c5d2ca 3142 comment_entries_iter = self._comment_entries(
99e9e001 3143 comment_replies_renderer, ytcfg, video_id,
46383212 3144 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3145 yield from itertools.islice(comment_entries_iter, min(
3146 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3147
46383212 3148 # Keeps track of counts across recursive calls
3149 if not tracker:
3150 tracker = dict(
3151 running_total=0,
3152 est_total=0,
3153 current_page_thread=0,
3154 total_parent_comments=0,
3155 total_reply_comments=0)
3156
3157 # TODO: Deprecated
2d6659b9 3158 # YouTube comments have a max depth of 2
46383212 3159 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3160 if max_depth:
da4db748 3161 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3162 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3163 if max_depth == 1 and parent:
3164 return
a1c5d2ca 3165
46383212 3166 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3167 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3168
46383212 3169 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3170
46383212 3171 response = None
6e634cbe 3172 is_forced_continuation = False
2d6659b9 3173 is_first_continuation = parent is None
6e634cbe 3174 if is_first_continuation and not continuation:
3175 # Sometimes you can get comments by generating the continuation yourself,
3176 # even if YouTube initially reports them being disabled - e.g. stories comments.
3177 # Note: if the comment section is actually disabled, YouTube may return a response with
3178 # required check_get_keys missing. So we will disable that check initially in this case.
3179 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3180 is_forced_continuation = True
a1c5d2ca
M
3181
3182 for page_num in itertools.count(0):
3183 if not continuation:
3184 break
46383212 3185 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3186 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3187 if page_num == 0:
3188 if is_first_continuation:
3189 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3190 else:
2d6659b9 3191 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3192 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3193 else:
3194 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3195 ' ' if parent else '', ' replies' if parent else '',
3196 page_num, comment_prog_str)
3197
3198 response = self._extract_response(
fe93e2c4 3199 item_id=None, query=continuation,
2d6659b9 3200 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 3201 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3202 is_forced_continuation = False
46383212 3203 continuation_contents = traverse_obj(
3204 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3205
2d6659b9 3206 continuation = None
46383212 3207 for continuation_section in continuation_contents:
3208 continuation_items = traverse_obj(
3209 continuation_section,
3210 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3211 get_all=False, expected_type=list) or []
3212 if is_first_continuation:
3213 continuation = extract_header(continuation_items)
3214 is_first_continuation = False
2d6659b9 3215 if continuation:
a1c5d2ca 3216 break
46383212 3217 continue
a1c5d2ca 3218
46383212 3219 for entry in extract_thread(continuation_items):
3220 if not entry:
3221 return
3222 yield entry
3223 continuation = self._extract_continuation({'contents': continuation_items})
3224 if continuation:
2d6659b9 3225 break
a1c5d2ca 3226
6e634cbe 3227 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3228 if message and not parent and tracker['running_total'] == 0:
3229 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3230
3231 @staticmethod
3232 def _generate_comment_continuation(video_id):
3233 """
3234 Generates initial comment section continuation token from given video id
3235 """
3236 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3237 return base64.b64encode(token.encode()).decode()
3238
a2160aa4 3239 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3240 """Entry for comment extraction"""
2d6659b9 3241 def _real_comment_extract(contents):
aae16f6e 3242 renderer = next((
3243 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3244 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3245 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3246
a2160aa4 3247 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3248 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3249
109dd3b2 3250 @staticmethod
99e9e001 3251 def _get_checkok_params():
3252 return {'contentCheckOk': True, 'racyCheckOk': True}
3253
3254 @classmethod
3255 def _generate_player_context(cls, sts=None):
109dd3b2 3256 context = {
3257 'html5Preference': 'HTML5_PREF_WANTS',
3258 }
3259 if sts is not None:
3260 context['signatureTimestamp'] = sts
3261 return {
3262 'playbackContext': {
3263 'contentPlaybackContext': context
a1a7907b 3264 },
99e9e001 3265 **cls._get_checkok_params()
109dd3b2 3266 }
3267
e7e94f2a
D
3268 @staticmethod
3269 def _is_agegated(player_response):
3270 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3271 return True
e7e94f2a
D
3272
3273 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3274 AGE_GATE_REASONS = (
3275 'confirm your age', 'age-restricted', 'inappropriate', # reason
3276 'age_verification_required', 'age_check_required', # status
3277 )
3278 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3279
3280 @staticmethod
3281 def _is_unplayable(player_response):
3282 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3283
50ac0e54 3284 _STORY_PLAYER_PARAMS = '8AEB'
3285
3286 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3287
11f9be09 3288 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3289 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3290 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3291 headers = self.generate_api_headers(
99e9e001 3292 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3293
6e634cbe 3294 yt_query = {
3295 'videoId': video_id,
6e634cbe 3296 }
50ac0e54 3297 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3298 yt_query['params'] = self._STORY_PLAYER_PARAMS
3299
11f9be09 3300 yt_query.update(self._generate_player_context(sts))
3301 return self._extract_response(
3302 item_id=video_id, ep='player', query=yt_query,
379e44ed 3303 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3304 default_client=client,
11f9be09 3305 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3306 ) or None
3307
11f9be09 3308 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3309 requested_clients = []
d0d012d4 3310 default = ['android', 'web']
000c15a4 3311 allowed_clients = sorted(
86e5f3ed 3312 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3313 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3314 for client in self._configuration_arg('player_client'):
3315 if client in allowed_clients:
3316 requested_clients.append(client)
d0d012d4 3317 elif client == 'default':
3318 requested_clients.extend(default)
b4c055ba 3319 elif client == 'all':
3320 requested_clients.extend(allowed_clients)
3321 else:
3322 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3323 if not requested_clients:
d0d012d4 3324 requested_clients = default
cf7e015f 3325
11f9be09 3326 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3327 requested_clients.extend(
e7e94f2a 3328 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3329
11f9be09 3330 return orderedSet(requested_clients)
cf7e015f 3331
50ac0e54 3332 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3333 initial_pr = None
3334 if webpage:
b7c47b74 3335 initial_pr = self._search_json(
3336 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3337
ae729626 3338 all_clients = set(clients)
c0bc527b 3339 clients = clients[::-1]
b6de707d 3340 prs = []
e7e94f2a 3341
ae729626 3342 def append_client(*client_names):
e7870111 3343 """ Append the first client name that exists but not already used """
ae729626 3344 for client_name in client_names:
e7870111
D
3345 actual_client = _split_innertube_client(client_name)[0]
3346 if actual_client in INNERTUBE_CLIENTS:
3347 if actual_client not in all_clients:
ae729626 3348 clients.append(client_name)
e7870111
D
3349 all_clients.add(actual_client)
3350 return
e7e94f2a 3351
379e44ed 3352 # Android player_response does not have microFormats which are needed for
3353 # extraction of some data. So we return the initial_pr with formats
3354 # stripped out even if not requested by the user
3355 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3356 if initial_pr:
3357 pr = dict(initial_pr)
3358 pr['streamingData'] = None
b6de707d 3359 prs.append(pr)
379e44ed 3360
3361 last_error = None
b6de707d 3362 tried_iframe_fallback = False
3363 player_url = None
c0bc527b 3364 while clients:
e7870111 3365 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3366 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3367 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3368 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3369
b6de707d 3370 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3371 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3372 if 'js' in self._configuration_arg('player_skip'):
3373 require_js_player = False
3374 player_url = None
3375
3376 if not player_url and not tried_iframe_fallback and require_js_player:
3377 player_url = self._download_player_url(video_id)
3378 tried_iframe_fallback = True
3379
379e44ed 3380 try:
3381 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3382 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3383 except ExtractorError as e:
3384 if last_error:
3385 self.report_warning(last_error)
3386 last_error = e
3387 continue
3388
11f9be09 3389 if pr:
a3e96421 3390 # YouTube may return a different video player response than expected.
3391 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3392 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3393 if pr_video_id and pr_video_id != video_id:
3394 self.report_warning(
c7dcf0b3 3395 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3396 else:
3397 prs.append(pr)
c0bc527b 3398
e7e94f2a 3399 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3400 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3401 append_client(f'{base_client}_creator')
e7e94f2a 3402 elif self._is_agegated(pr):
e7870111
D
3403 if variant == 'tv_embedded':
3404 append_client(f'{base_client}_embedded')
3405 elif not variant:
3406 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3407
379e44ed 3408 if last_error:
b6de707d 3409 if not len(prs):
379e44ed 3410 raise last_error
3411 self.report_warning(last_error)
b6de707d 3412 return prs, player_url
11f9be09 3413
4d37720a
L
3414 def _needs_live_processing(self, live_status, duration):
3415 if (live_status == 'is_live' and self.get_param('live_from_start')
3416 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3417 return live_status
3418
3419 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
a0bb6ce5 3420 itags, stream_ids = {}, []
b25cac65 3421 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3422 q = qualities([
2a9c6dcd 3423 # Normally tiny is the smallest video-only formats. But
3424 # audio-only formats with unknown quality may get tagged as tiny
3425 'tiny',
3426 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3427 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3428 ])
11f9be09 3429 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3430
545cc85d 3431 for fmt in streaming_formats:
727029c5 3432 if fmt.get('targetDurationSec'):
545cc85d 3433 continue
321bf820 3434
cc2db878 3435 itag = str_or_none(fmt.get('itag'))
9297939e 3436 audio_track = fmt.get('audioTrack') or {}
3437 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3438 if stream_id in stream_ids:
3439 continue
3440
cc2db878 3441 quality = fmt.get('quality')
2a9c6dcd 3442 height = int_or_none(fmt.get('height'))
d3fc8074 3443 if quality == 'tiny' or not quality:
3444 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3445 # The 3gp format (17) in android client has a quality of "small",
3446 # but is actually worse than other formats
3447 if itag == '17':
3448 quality = 'tiny'
3449 if quality:
3450 if itag:
3451 itag_qualities[itag] = quality
3452 if height:
3453 res_qualities[height] = quality
cc2db878 3454 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3455 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3456 # number of fragment that would subsequently requested with (`&sq=N`)
3457 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3458 continue
3459
545cc85d 3460 fmt_url = fmt.get('url')
3461 if not fmt_url:
14f25df2 3462 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3463 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3464 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3465 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3466 continue
52023f12 3467 try:
3468 fmt_url += '&%s=%s' % (
3469 traverse_obj(sc, ('sp', -1)) or 'signature',
3470 self._decrypt_signature(encrypted_sig, video_id, player_url)
3471 )
3472 except ExtractorError as e:
580ce007 3473 self.report_warning('Signature extraction failed: Some formats may be missing',
3474 video_id=video_id, only_once=True)
52023f12 3475 self.write_debug(e, only_once=True)
201e9eaa 3476 continue
545cc85d 3477
404f611f 3478 query = parse_qs(fmt_url)
3479 throttled = False
b2916526 3480 if query.get('n'):
404f611f 3481 try:
580ce007 3482 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3483 fmt_url = update_url_query(fmt_url, {
580ce007 3484 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3485 })
404f611f 3486 except ExtractorError as e:
25836db6 3487 phantomjs_hint = ''
3488 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3489 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3490 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3491 if player_url:
3492 self.report_warning(
3493 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3494 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3495 self.write_debug(e, only_once=True)
3496 else:
3497 self.report_warning(
3498 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3499 video_id=video_id, only_once=True)
404f611f 3500 throttled = True
3501
545cc85d 3502 if itag:
a0bb6ce5 3503 itags[itag] = 'https'
9297939e 3504 stream_ids.append(stream_id)
3505
0ad92dfb 3506 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3507 language_preference = (
3508 10 if audio_track.get('audioIsDefault') and 10
3509 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3510 else -1)
0ad92dfb 3511 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3512 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3513 # Make sure to avoid false positives with small duration differences.
62b58c09 3514 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3515 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3516 if is_damaged:
0f06bcd7 3517 self.report_warning(
3518 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3519 dct = {
3520 'asr': int_or_none(fmt.get('audioSampleRate')),
3521 'filesize': int_or_none(fmt.get('contentLength')),
3522 'format_id': itag,
34921b43 3523 'format_note': join_nonempty(
26e8e044 3524 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3525 ' (default)' if language_preference > 0 else ''),
404f611f 3526 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3527 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3528 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3529 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3530 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3531 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3532 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3533 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3534 'height': height,
dca3ff4a 3535 'quality': q(quality),
727029c5 3536 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3537 'tbr': tbr,
545cc85d 3538 'url': fmt_url,
2a9c6dcd 3539 'width': int_or_none(fmt.get('width')),
ab6df717 3540 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3541 'desc' if language_preference < -1 else ''),
3542 'language_preference': language_preference,
a405b38f 3543 # Strictly de-prioritize damaged and 3gp formats
3544 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3545 }
60bdb7bd 3546 mime_mobj = re.match(
3547 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3548 if mime_mobj:
3549 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3550 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3551 no_audio = dct.get('acodec') == 'none'
3552 no_video = dct.get('vcodec') == 'none'
3553 if no_audio:
3554 dct['vbr'] = tbr
3555 if no_video:
3556 dct['abr'] = tbr
3557 if no_audio or no_video:
545cc85d 3558 dct['downloader_options'] = {
3559 # Youtube throttles chunks >~10M
3560 'http_chunk_size': 10485760,
bf1317d2 3561 }
7c60c33e 3562 if dct.get('ext'):
3563 dct['container'] = dct['ext'] + '_dash'
11f9be09 3564 yield dct
545cc85d 3565
4d37720a
L
3566 needs_live_processing = self._needs_live_processing(live_status, duration)
3567 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3568
3569 skip_manifests = set(self._configuration_arg('skip'))
3570 if (not self.get_param('youtube_include_hls_manifest', True)
3571 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3572 or needs_live_processing and skip_bad_formats):
3573 skip_manifests.add('hls')
3574
0f06bcd7 3575 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3576 skip_manifests.add('dash')
3577 if self._configuration_arg('include_live_dash'):
3578 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3579 'Use include_incomplete_formats extractor argument instead')
3580 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3581 skip_manifests.add('dash')
5d3a0e79 3582
a0bb6ce5 3583 def process_manifest_format(f, proto, itag):
3584 if itag in itags:
3585 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3586 return False
3587 itag = f'{itag}-{proto}'
3588 if itag:
3589 f['format_id'] = itag
3590 itags[itag] = proto
3591
b25cac65 3592 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3593 if f['quality'] == -1 and f.get('height'):
3594 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3595 return True
2a9c6dcd 3596
c646d76f 3597 subtitles = {}
11f9be09 3598 for sd in streaming_data:
4d37720a 3599 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3600 if hls_manifest_url:
4d37720a
L
3601 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3602 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3603 subtitles = self._merge_subtitles(subs, subtitles)
3604 for f in fmts:
a0bb6ce5 3605 if process_manifest_format(f, 'hls', self._search_regex(
3606 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3607 yield f
545cc85d 3608
4d37720a 3609 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3610 if dash_manifest_url:
c646d76f 3611 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3612 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3613 for f in formats:
a0bb6ce5 3614 if process_manifest_format(f, 'dash', f['format_id']):
3615 f['filesize'] = int_or_none(self._search_regex(
3616 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3617 if needs_live_processing:
adbc4ec4
THD
3618 f['is_from_start'] = True
3619
a0bb6ce5 3620 yield f
c646d76f 3621 yield subtitles
11f9be09 3622
720c3099 3623 def _extract_storyboard(self, player_responses, duration):
3624 spec = get_first(
3625 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3626 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3627 if not base_url:
720c3099 3628 return
720c3099 3629 L = len(spec) - 1
3630 for i, args in enumerate(spec):
3631 args = args.split('#')
3632 counts = list(map(int_or_none, args[:5]))
3633 if len(args) != 8 or not all(counts):
3634 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3635 continue
3636 width, height, frame_count, cols, rows = counts
3637 N, sigh = args[6:]
3638
3639 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3640 fragment_count = frame_count / (cols * rows)
3641 fragment_duration = duration / fragment_count
3642 yield {
3643 'format_id': f'sb{i}',
3644 'format_note': 'storyboard',
3645 'ext': 'mhtml',
3646 'protocol': 'mhtml',
3647 'acodec': 'none',
3648 'vcodec': 'none',
3649 'url': url,
3650 'width': width,
3651 'height': height,
45e8a04e 3652 'fps': frame_count / duration,
3653 'rows': rows,
3654 'columns': cols,
720c3099 3655 'fragments': [{
b3edc806 3656 'url': url.replace('$M', str(j)),
720c3099 3657 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3658 } for j in range(math.ceil(fragment_count))],
3659 }
3660
adbc4ec4 3661 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3662 webpage = None
3663 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3664 query = {'bpctr': '9999999999', 'has_verified': '1'}
3665 if smuggled_data.get('is_story'):
3666 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3667 webpage = self._download_webpage(
50ac0e54 3668 webpage_url, video_id, fatal=False, query=query)
11f9be09 3669
3670 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3671
b6de707d 3672 player_responses, player_url = self._extract_player_responses(
11f9be09 3673 self._get_requested_clients(url, smuggled_data),
50ac0e54 3674 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3675
adbc4ec4
THD
3676 return webpage, master_ytcfg, player_responses, player_url
3677
a1b2d843 3678 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3679 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3680 is_live = get_first(video_details, 'isLive')
3681 if is_live is None:
3682 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
3683 live_content = get_first(video_details, 'isLiveContent')
3684 is_upcoming = get_first(video_details, 'isUpcoming')
3685 if is_live is None and is_upcoming or live_content is False:
3686 is_live = False
3687 if is_upcoming is None and (live_content or is_live):
3688 is_upcoming = False
3689 post_live = get_first(video_details, 'isPostLiveDvr')
3690 live_status = ('post_live' if post_live
3691 else 'is_live' if is_live
3692 else 'is_upcoming' if is_upcoming
3693 else None if None in (is_live, is_upcoming, live_content)
3694 else 'was_live' if live_content else 'not_live')
adbc4ec4
THD
3695
3696 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
4d37720a 3697 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
adbc4ec4 3698
4d37720a 3699 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
3700
3701 def _real_extract(self, url):
3702 url, smuggled_data = unsmuggle_url(url, {})
3703 video_id = self._match_id(url)
3704
3705 base_url = self.http_scheme() + '//www.youtube.com/'
3706 webpage_url = base_url + 'watch?v=' + video_id
3707
3708 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3709
11f9be09 3710 playability_statuses = traverse_obj(
3711 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3712
3713 trailer_video_id = get_first(
3714 playability_statuses,
3715 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3716 expected_type=str)
3717 if trailer_video_id:
3718 return self.url_result(
3719 trailer_video_id, self.ie_key(), trailer_video_id)
3720
3721 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3722 if webpage else (lambda x: None))
3723
3724 video_details = traverse_obj(
3725 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3726 microformats = traverse_obj(
3727 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3728 expected_type=dict, default=[])
c26f9b99 3729
3730 translated_title = self._get_text(microformats, (..., 'title'))
3731 video_title = (self._preferred_lang and translated_title
3732 or get_first(video_details, 'title') # primary
3733 or translated_title
3734 or search_meta(['og:title', 'twitter:title', 'title']))
3735 translated_description = self._get_text(microformats, (..., 'description'))
3736 original_description = get_first(video_details, 'shortDescription')
3737 video_description = (
3738 self._preferred_lang and translated_description
3739 # If original description is blank, it will be an empty string.
3740 # Do not prefer translated description in this case.
3741 or original_description if original_description is not None else translated_description)
11f9be09 3742
d89257f3 3743 multifeed_metadata_list = get_first(
3744 player_responses,
3745 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3746 expected_type=str)
3747 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3748 if self.get_param('noplaylist'):
11f9be09 3749 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3750 else:
3751 entries = []
3752 feed_ids = []
3753 for feed in multifeed_metadata_list.split(','):
3754 # Unquote should take place before split on comma (,) since textual
3755 # fields may contain comma as well (see
3756 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3757 feed_data = urllib.parse.parse_qs(
ac668111 3758 urllib.parse.unquote_plus(feed))
d89257f3 3759
3760 def feed_entry(name):
3761 return try_get(
14f25df2 3762 feed_data, lambda x: x[name][0], str)
d89257f3 3763
3764 feed_id = feed_entry('id')
3765 if not feed_id:
3766 continue
3767 feed_title = feed_entry('title')
3768 title = video_title
3769 if feed_title:
3770 title += ' (%s)' % feed_title
3771 entries.append({
3772 '_type': 'url_transparent',
3773 'ie_key': 'Youtube',
3774 'url': smuggle_url(
3775 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3776 {'force_singlefeed': True}),
3777 'title': title,
3778 })
3779 feed_ids.append(feed_id)
3780 self.to_screen(
3781 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3782 % (', '.join(feed_ids), video_id))
3783 return self.playlist_result(
3784 entries, video_id, video_title, video_description)
11f9be09 3785
a1b2d843 3786 duration = int_or_none(
3787 get_first(video_details, 'lengthSeconds')
3788 or get_first(microformats, 'lengthSeconds')
3789 or parse_duration(search_meta('duration'))) or None
3790
4d37720a
L
3791 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3792 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3793 if live_status == 'post_live':
3794 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 3795
545cc85d 3796 if not formats:
11f9be09 3797 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3798 self.report_drm(video_id)
11f9be09 3799 pemr = get_first(
3800 playability_statuses,
3801 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3802 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3803 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3804 if subreason:
545cc85d 3805 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3806 countries = get_first(microformats, 'availableCountries')
545cc85d 3807 if not countries:
3808 regions_allowed = search_meta('regionsAllowed')
3809 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3810 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3811 reason += f'. {subreason}'
545cc85d 3812 if reason:
b7da73eb 3813 self.raise_no_formats(reason, expected=True)
bf1317d2 3814
11f9be09 3815 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3816 if not keywords and webpage:
3817 keywords = [
3818 unescapeHTML(m.group('content'))
3819 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3820 for keyword in keywords:
3821 if keyword.startswith('yt:stretch='):
201c1459 3822 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3823 if mobj:
3824 # NB: float is intentional for forcing float division
3825 w, h = (float(v) for v in mobj.groups())
3826 if w > 0 and h > 0:
3827 ratio = w / h
3828 for f in formats:
3829 if f.get('vcodec') != 'none':
3830 f['stretched_ratio'] = ratio
3831 break
a709d873 3832 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3833 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3834 if thumbnail_url:
3835 thumbnails.append({
3836 'url': thumbnail_url,
ff2751ac 3837 })
fccf5021 3838 original_thumbnails = thumbnails.copy()
3839
0ba692ac 3840 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3841 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3842 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3843 thumbnail_names = [
962ffcf8 3844 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3845 # in resolution, these are not the custom thumbnail. So de-prioritize them
3846 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3847 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3848 ]
cca80fe6 3849 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3850 thumbnails.extend({
3851 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3852 video_id=video_id, name=name, ext=ext,
4d37720a 3853 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 3854 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3855 for thumb in thumbnails:
cca80fe6 3856 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3857 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3858 self._remove_duplicate_formats(thumbnails)
fccf5021 3859 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3860
7ea65411 3861 category = get_first(microformats, 'category') or search_meta('genre')
3862 channel_id = str_or_none(
3863 get_first(video_details, 'channelId')
3864 or get_first(microformats, 'externalChannelId')
3865 or search_meta('channelId'))
7ea65411 3866 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3867
adbc4ec4
THD
3868 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3869 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3870 if not duration and live_end_time and live_start_time:
3871 duration = live_end_time - live_start_time
3872
4d37720a
L
3873 needs_live_processing = self._needs_live_processing(live_status, duration)
3874
3875 def is_bad_format(fmt):
3876 if needs_live_processing and not fmt.get('is_from_start'):
3877 return True
3878 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
3879 and fmt.get('protocol') == 'http_dash_segments'):
3880 return True
3881
3882 for fmt in filter(is_bad_format, formats):
3883 fmt['preference'] = (fmt.get('preference') or -1) - 10
3884 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
3885
3886 if needs_live_processing:
3887 self._prepare_live_from_start_formats(
3888 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 3889
720c3099 3890 formats.extend(self._extract_storyboard(player_responses, duration))
3891
31b532a1 3892 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3893 self._sort_formats(formats, (
3894 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3895
545cc85d 3896 info = {
3897 'id': video_id,
39ca3b5c 3898 'title': video_title,
545cc85d 3899 'formats': formats,
3900 'thumbnails': thumbnails,
fccf5021 3901 # The best thumbnail that we are sure exists. Prevents unnecessary
3902 # URL checking if user don't care about getting the best possible thumbnail
3903 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3904 'description': video_description,
11f9be09 3905 'uploader': get_first(video_details, 'author'),
545cc85d 3906 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3907 'uploader_url': owner_profile_url,
3908 'channel_id': channel_id,
a70635b8 3909 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3910 'duration': duration,
3911 'view_count': int_or_none(
11f9be09 3912 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3913 or search_meta('interactionCount')),
11f9be09 3914 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3915 'age_limit': 18 if (
11f9be09 3916 get_first(microformats, 'isFamilySafe') is False
545cc85d 3917 or search_meta('isFamilyFriendly') == 'false'
3918 or search_meta('og:restrictions:age') == '18+') else 0,
3919 'webpage_url': webpage_url,
3920 'categories': [category] if category else None,
3921 'tags': keywords,
11f9be09 3922 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 3923 'live_status': live_status,
adbc4ec4 3924 'release_timestamp': live_start_time,
545cc85d 3925 }
b477fc13 3926
c646d76f 3927 subtitles = {}
3944e7af 3928 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3929 if pctr:
ecdc9049 3930 def get_lang_code(track):
3931 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3932 or track.get('languageCode'))
3933
3934 # Converted into dicts to remove duplicates
3935 captions = {
3936 get_lang_code(sub): sub
3937 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3938 translation_languages = {
3939 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3940 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3941
774d79cc 3942 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3943 lang_subs = container.setdefault(lang_code, [])
545cc85d 3944 for fmt in self._SUBTITLE_FORMATS:
3945 query.update({
3946 'fmt': fmt,
3947 })
3948 lang_subs.append({
3949 'ext': fmt,
60f393e4 3950 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3951 'name': sub_name,
545cc85d 3952 })
7e72694b 3953
07b47084 3954 # NB: Constructing the full subtitle dictionary is slow
3955 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3956 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3957 for lang_code, caption_track in captions.items():
3958 base_url = caption_track.get('baseUrl')
1235d333 3959 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3960 if not base_url:
3961 continue
ecdc9049 3962 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3963 if caption_track.get('kind') != 'asr':
545cc85d 3964 if not lang_code:
3965 continue
3966 process_language(
ecdc9049 3967 subtitles, base_url, lang_code, lang_name, {})
3968 if not caption_track.get('isTranslatable'):
3969 continue
3944e7af 3970 for trans_code, trans_name in translation_languages.items():
3971 if not trans_code:
545cc85d 3972 continue
1235d333 3973 orig_trans_code = trans_code
ecdc9049 3974 if caption_track.get('kind') != 'asr':
07b47084 3975 if not get_translated_subs:
18e49408 3976 continue
ecdc9049 3977 trans_code += f'-{lang_code}'
a70635b8 3978 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3979 # Add an "-orig" label to the original language so that it can be distinguished.
3980 # The subs are returned without "-orig" as well for compatibility
1235d333 3981 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3982 process_language(
d49669ac 3983 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3984 # Setting tlang=lang returns damaged subtitles.
d49669ac 3985 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3986 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3987
3988 info['automatic_captions'] = automatic_captions
3989 info['subtitles'] = subtitles
7e72694b 3990
14f25df2 3991 parsed_url = urllib.parse.urlparse(url)
545cc85d 3992 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3993 query = urllib.parse.parse_qs(component)
545cc85d 3994 for k, v in query.items():
3995 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3996 d_k += '_time'
3997 if d_k not in info and k in s_ks:
3998 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3999
4000 # Youtube Music Auto-generated description
822b9d9c 4001 if video_description:
1890fc63 4002 mobj = re.search(
4003 r'''(?xs)
4004 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4005 (?P<album>[^\n]+)
4006 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4007 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4008 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4009 .+\nAuto-generated\ by\ YouTube\.\s*$
4010 ''', video_description)
822b9d9c 4011 if mobj:
822b9d9c
RA
4012 release_year = mobj.group('release_year')
4013 release_date = mobj.group('release_date')
4014 if release_date:
4015 release_date = release_date.replace('-', '')
4016 if not release_year:
545cc85d 4017 release_year = release_date[:4]
4018 info.update({
4019 'album': mobj.group('album'.strip()),
4020 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4021 'track': mobj.group('track').strip(),
4022 'release_date': release_date,
cc2db878 4023 'release_year': int_or_none(release_year),
545cc85d 4024 })
7e72694b 4025
545cc85d 4026 initial_data = None
4027 if webpage:
56ba69e4 4028 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 4029 if not initial_data:
99e9e001 4030 query = {'videoId': video_id}
4031 query.update(self._get_checkok_params())
109dd3b2 4032 initial_data = self._extract_response(
4033 item_id=video_id, ep='next', fatal=False,
99e9e001 4034 ytcfg=master_ytcfg, query=query,
4035 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4036 note='Downloading initial data API JSON')
545cc85d 4037
0df111a3 4038 info['comment_count'] = traverse_obj(initial_data, (
4039 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4040 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4041 ), (
4042 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4043 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4044 ), expected_type=int_or_none, get_all=False)
4045
19a03940 4046 try: # This will error if there is no livechat
c60ee3a2 4047 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4048 except (KeyError, IndexError, TypeError):
4049 pass
4050 else:
ecdc9049 4051 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4052 # url is needed to set cookies
4053 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4054 'video_id': video_id,
4055 'ext': 'json',
4d37720a
L
4056 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4057 else 'youtube_live_chat_replay'),
c60ee3a2 4058 }]
545cc85d 4059
4060 if initial_data:
7c365c21 4061 info['chapters'] = (
4062 self._extract_chapters_from_json(initial_data, duration)
4063 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4064 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4065 or None)
545cc85d 4066
17322130 4067 contents = traverse_obj(
4068 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4069 expected_type=list, default=[])
4070
4071 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4072 if vpir:
4073 stl = vpir.get('superTitleLink')
4074 if stl:
4075 stl = self._get_text(stl)
4076 if try_get(
4077 vpir,
4078 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4079 info['location'] = stl
4080 else:
affc4fef 4081 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4082 if mobj:
545cc85d 4083 info.update({
17322130 4084 'series': mobj.group(1),
4085 'season_number': int(mobj.group(2)),
4086 'episode_number': int(mobj.group(3)),
545cc85d 4087 })
17322130 4088 for tlb in (try_get(
4089 vpir,
4090 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4091 list) or []):
3ffb2f5b 4092 tbrs = variadic(
4093 traverse_obj(
4094 tlb, 'toggleButtonRenderer',
4095 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4096 default=[]))
4097 for tbr in tbrs:
4098 for getter, regex in [(
4099 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4100 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4101 lambda x: x['accessibility'],
4102 lambda x: x['accessibilityData']['accessibilityData'],
4103 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4104 label = (try_get(tbr, getter, dict) or {}).get('label')
4105 if label:
4106 mobj = re.match(regex, label)
4107 if mobj:
4108 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4109 break
17322130 4110 sbr_tooltip = try_get(
4111 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4112 if sbr_tooltip:
4113 like_count, dislike_count = sbr_tooltip.split(' / ')
4114 info.update({
4115 'like_count': str_to_int(like_count),
4116 'dislike_count': str_to_int(dislike_count),
4117 })
4118 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4119 if vsir:
4120 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4121 info.update({
4122 'channel': self._get_text(vor, 'title'),
4123 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4124
4125 rows = try_get(
4126 vsir,
4127 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4128 list) or []
4129 multiple_songs = False
4130 for row in rows:
4131 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4132 multiple_songs = True
4133 break
4134 for row in rows:
4135 mrr = row.get('metadataRowRenderer') or {}
4136 mrr_title = mrr.get('title')
4137 if not mrr_title:
4138 continue
4139 mrr_title = self._get_text(mrr, 'title')
4140 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4141 if mrr_title == 'License':
4142 info['license'] = mrr_contents_text
4143 elif not multiple_songs:
4144 if mrr_title == 'Album':
4145 info['album'] = mrr_contents_text
4146 elif mrr_title == 'Artist':
4147 info['artist'] = mrr_contents_text
4148 elif mrr_title == 'Song':
4149 info['track'] = mrr_contents_text
545cc85d 4150
4151 fallbacks = {
4152 'channel': 'uploader',
4153 'channel_id': 'uploader_id',
4154 'channel_url': 'uploader_url',
4155 }
992f9a73 4156
17322130 4157 # The upload date for scheduled, live and past live streams / premieres in microformats
4158 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4159 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4160 upload_date = (
4161 unified_strdate(get_first(microformats, 'uploadDate'))
4162 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4163 if not upload_date or (
4d37720a 4164 live_status in ('not_live', None)
1ff88b7a 4165 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4166 ):
c26f9b99 4167 upload_date = strftime_or_none(
4168 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4169 info['upload_date'] = upload_date
992f9a73 4170
545cc85d 4171 for to, frm in fallbacks.items():
4172 if not info.get(to):
4173 info[to] = info.get(frm)
4174
4175 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4176 v = info.get(s_k)
4177 if v:
4178 info[d_k] = v
b84071c0 4179
c26f9b99 4180 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4181
4182 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4183 or get_first(video_details, 'isPrivate', expected_type=bool))
4184
4185 info['availability'] = (
4186 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4187 else self._availability(
4188 is_private=is_private,
4189 needs_premium=(
4190 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4191 or False if initial_data and is_private is not None else None),
4192 needs_subscription=(
4193 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4194 or False if initial_data and is_private is not None else None),
4195 needs_auth=info['age_limit'] >= 18,
4196 is_unlisted=None if is_private is None else (
4197 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4198 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4199
a2160aa4 4200 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4201
11f9be09 4202 self.mark_watched(video_id, player_responses)
d77ab8e2 4203
545cc85d 4204 return info
c5e8d7af 4205
a61fd4cf 4206
a6213a49 4207class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 4208
182bda88 4209 @staticmethod
4210 def passthrough_smuggled_data(func):
4211 def _smuggle(entries, smuggled_data):
4212 for entry in entries:
4213 # TODO: Convert URL to music.youtube instead.
4214 # Do we need to passthrough any other smuggled_data?
4215 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4216 yield entry
4217
4218 @functools.wraps(func)
4219 def wrapper(self, url):
4220 url, smuggled_data = unsmuggle_url(url, {})
4221 if self.is_music_url(url):
4222 smuggled_data['is_music_url'] = True
4223 info_dict = func(self, url, smuggled_data)
4224 if smuggled_data and info_dict.get('entries'):
4225 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4226 return info_dict
4227 return wrapper
4228
a6213a49 4229 def _extract_channel_id(self, webpage):
4230 channel_id = self._html_search_meta(
4231 'channelId', webpage, 'channel id', default=None)
4232 if channel_id:
4233 return channel_id
4234 channel_url = self._html_search_meta(
4235 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4236 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4237 'twitter:app:url:googleplay'), webpage, 'channel url')
4238 return self._search_regex(
4239 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4240 channel_url, 'channel id')
15f6397c 4241
8bdd16b4 4242 @staticmethod
cd7c66cf 4243 def _extract_basic_item_renderer(item):
4244 # Modified from _extract_grid_item_renderer
201c1459 4245 known_basic_renderers = (
a17526e4 4246 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4247 )
4248 for key, renderer in item.items():
201c1459 4249 if not isinstance(renderer, dict):
cd7c66cf 4250 continue
201c1459 4251 elif key in known_basic_renderers:
4252 return renderer
4253 elif key.startswith('grid') and key.endswith('Renderer'):
4254 return renderer
8bdd16b4 4255
8bdd16b4 4256 def _grid_entries(self, grid_renderer):
4257 for item in grid_renderer['items']:
4258 if not isinstance(item, dict):
39b62db1 4259 continue
cd7c66cf 4260 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4261 if not isinstance(renderer, dict):
4262 continue
052e1350 4263 title = self._get_text(renderer, 'title')
fe93e2c4 4264
8bdd16b4 4265 # playlist
4266 playlist_id = renderer.get('playlistId')
4267 if playlist_id:
4268 yield self.url_result(
4269 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4270 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4271 video_title=title)
201c1459 4272 continue
8bdd16b4 4273 # video
4274 video_id = renderer.get('videoId')
4275 if video_id:
4276 yield self._extract_video(renderer)
201c1459 4277 continue
8bdd16b4 4278 # channel
4279 channel_id = renderer.get('channelId')
4280 if channel_id:
8bdd16b4 4281 yield self.url_result(
4282 'https://www.youtube.com/channel/%s' % channel_id,
4283 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4284 continue
4285 # generic endpoint URL support
4286 ep_url = urljoin('https://www.youtube.com/', try_get(
4287 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4288 str))
201c1459 4289 if ep_url:
4290 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4291 if ie.suitable(ep_url):
4292 yield self.url_result(
4293 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4294 break
8bdd16b4 4295
16aa9ea4 4296 def _music_reponsive_list_entry(self, renderer):
4297 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4298 if video_id:
4299 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4300 ie=YoutubeIE.ie_key(), video_id=video_id)
4301 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4302 if playlist_id:
4303 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4304 if video_id:
4305 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4306 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4307 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4308 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4309 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4310 if browse_id:
4311 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4312 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4313
3d3dddc9 4314 def _shelf_entries_from_content(self, shelf_renderer):
4315 content = shelf_renderer.get('content')
4316 if not isinstance(content, dict):
8bdd16b4 4317 return
cd7c66cf 4318 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4319 if renderer:
4320 # TODO: add support for nested playlists so each shelf is processed
4321 # as separate playlist
4322 # TODO: this includes only first N items
86e5f3ed 4323 yield from self._grid_entries(renderer)
3d3dddc9 4324 renderer = content.get('horizontalListRenderer')
4325 if renderer:
4326 # TODO
4327 pass
8bdd16b4 4328
29f7c58a 4329 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4330 ep = try_get(
4331 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4332 str)
8bdd16b4 4333 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4334 if shelf_url:
29f7c58a 4335 # Skipping links to another channels, note that checking for
4336 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4337 # will not work
4338 if skip_channels and '/channels?' in shelf_url:
4339 return
052e1350 4340 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4341 yield self.url_result(shelf_url, video_title=title)
4342 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4343 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4344
8bdd16b4 4345 def _playlist_entries(self, video_list_renderer):
4346 for content in video_list_renderer['contents']:
4347 if not isinstance(content, dict):
4348 continue
4349 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4350 if not isinstance(renderer, dict):
4351 continue
4352 video_id = renderer.get('videoId')
4353 if not video_id:
4354 continue
4355 yield self._extract_video(renderer)
07aeced6 4356
3462ffa8 4357 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4358 renderer = traverse_obj(
4359 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
3462ffa8 4360 video_id = renderer.get('videoId')
4361 if not video_id:
4362 return
4363 yield self._extract_video(renderer)
4364
8bdd16b4 4365 def _video_entry(self, video_renderer):
4366 video_id = video_renderer.get('videoId')
4367 if video_id:
4368 return self._extract_video(video_renderer)
dacb3a86 4369
ad210f4f 4370 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4371 url = urljoin('https://youtube.com', traverse_obj(
4372 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4373 if url:
4374 return self.url_result(
4375 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4376
8bdd16b4 4377 def _post_thread_entries(self, post_thread_renderer):
4378 post_renderer = try_get(
4379 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4380 if not post_renderer:
4381 return
4382 # video attachment
4383 video_renderer = try_get(
895b0931 4384 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4385 video_id = video_renderer.get('videoId')
4386 if video_id:
4387 entry = self._extract_video(video_renderer)
8bdd16b4 4388 if entry:
4389 yield entry
895b0931 4390 # playlist attachment
4391 playlist_id = try_get(
14f25df2 4392 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4393 if playlist_id:
4394 yield self.url_result(
e28f1c0a 4395 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4396 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4397 # inline video links
4398 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4399 for run in runs:
4400 if not isinstance(run, dict):
4401 continue
4402 ep_url = try_get(
14f25df2 4403 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4404 if not ep_url:
4405 continue
4406 if not YoutubeIE.suitable(ep_url):
4407 continue
4408 ep_video_id = YoutubeIE._match_id(ep_url)
4409 if video_id == ep_video_id:
4410 continue
895b0931 4411 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4412
8bdd16b4 4413 def _post_thread_continuation_entries(self, post_thread_continuation):
4414 contents = post_thread_continuation.get('contents')
4415 if not isinstance(contents, list):
4416 return
4417 for content in contents:
4418 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4419 if isinstance(renderer, dict):
4420 yield from self._post_thread_entries(renderer)
8bdd16b4 4421 continue
6b0b0a28 4422 renderer = content.get('videoRenderer')
4423 if isinstance(renderer, dict):
4424 yield self._video_entry(renderer)
07aeced6 4425
39ed931e 4426 r''' # unused
4427 def _rich_grid_entries(self, contents):
4428 for content in contents:
4429 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4430 if video_renderer:
4431 entry = self._video_entry(video_renderer)
4432 if entry:
4433 yield entry
4434 '''
52efa4b3 4435
0a5095fe 4436 def _report_history_entries(self, renderer):
4437 for url in traverse_obj(renderer, (
7a32c70d 4438 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4439 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4440 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4441 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4442
a6213a49 4443 def _extract_entries(self, parent_renderer, continuation_list):
4444 # continuation_list is modified in-place with continuation_list = [continuation_token]
4445 continuation_list[:] = [None]
4446 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4447 for content in contents:
4448 if not isinstance(content, dict):
4449 continue
16aa9ea4 4450 is_renderer = traverse_obj(
4451 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4452 expected_type=dict)
a6213a49 4453 if not is_renderer:
0a5095fe 4454 if content.get('richItemRenderer'):
4455 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4456 yield entry
4457 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4458 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4459 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4460 yield from self._report_history_entries(table)
4461 continuation_list[0] = self._extract_continuation(table)
a6213a49 4462 continue
0a5095fe 4463
a6213a49 4464 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4465 for isr_content in isr_contents:
4466 if not isinstance(isr_content, dict):
8bdd16b4 4467 continue
69184e41 4468
a6213a49 4469 known_renderers = {
4470 'playlistVideoListRenderer': self._playlist_entries,
4471 'gridRenderer': self._grid_entries,
a17526e4 4472 'reelShelfRenderer': self._grid_entries,
4473 'shelfRenderer': self._shelf_entries,
16aa9ea4 4474 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4475 'backstagePostThreadRenderer': self._post_thread_entries,
4476 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4477 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4478 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4479 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4480 }
4481 for key, renderer in isr_content.items():
4482 if key not in known_renderers:
4483 continue
4484 for entry in known_renderers[key](renderer):
4485 if entry:
4486 yield entry
4487 continuation_list[0] = self._extract_continuation(renderer)
4488 break
70d5c17b 4489
4490 if not continuation_list[0]:
a6213a49 4491 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4492
a6213a49 4493 if not continuation_list[0]:
4494 continuation_list[0] = self._extract_continuation(parent_renderer)
4495
4496 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4497 continuation_list = [None]
4498 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4499 tab_content = try_get(tab, lambda x: x['content'], dict)
4500 if not tab_content:
4501 return
3462ffa8 4502 parent_renderer = (
29f7c58a 4503 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4504 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4505 yield from extract_entries(parent_renderer)
3462ffa8 4506 continuation = continuation_list[0]
d069eca7 4507
8bdd16b4 4508 for page_num in itertools.count(1):
4509 if not continuation:
4510 break
99e9e001 4511 headers = self.generate_api_headers(
4512 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4513 response = self._extract_response(
86e5f3ed 4514 item_id=f'{item_id} page {page_num}',
fe93e2c4 4515 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4516 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4517
4518 if not response:
8bdd16b4 4519 break
ac56cf38 4520 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4521 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4522 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4523
a1b535bd 4524 known_renderers = {
e4b98809 4525 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4526 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4527 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4528 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4529 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4530 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4531 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4532 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4533 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4534 'playlistVideoListContinuation': (self._playlist_entries, None),
4535 'gridContinuation': (self._grid_entries, None),
4536 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4537 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4538 }
1fb53b94 4539
4540 continuation_items = traverse_obj(response, (
4541 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4542 'appendContinuationItemsAction', 'continuationItems'
4543 ), 'continuationContents', get_all=False)
4544 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4545
a1b535bd 4546 video_items_renderer = None
1fb53b94 4547 for key in continuation_item.keys():
a1b535bd 4548 if key not in known_renderers:
8bdd16b4 4549 continue
1fb53b94 4550 func, parent_key = known_renderers[key]
4551 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4552 continuation_list = [None]
1fb53b94 4553 yield from func(video_items_renderer)
9ba5705a 4554 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4555
4556 if not video_items_renderer:
a1b535bd 4557 break
9558dcec 4558
8bdd16b4 4559 @staticmethod
7c219ea6 4560 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4561 for tab in tabs:
cd684175 4562 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4563 if renderer.get('selected') is True:
4564 return renderer
2b3c2546 4565 else:
7c219ea6 4566 if fatal:
4567 raise ExtractorError('Unable to find selected tab')
b82f815f 4568
61d3665d 4569 def _extract_uploader(self, data):
8bdd16b4 4570 uploader = {}
61d3665d 4571 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4572 owner = try_get(
4573 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4574 if owner:
61d3665d 4575 owner_text = owner.get('text')
4576 uploader['uploader'] = self._search_regex(
4577 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4578 uploader['uploader_id'] = try_get(
14f25df2 4579 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4580 uploader['uploader_url'] = urljoin(
4581 'https://www.youtube.com/',
14f25df2 4582 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
7a32c70d 4583 return filter_dict(uploader)
8bdd16b4 4584
ac56cf38 4585 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4586 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4587 tags = []
b60419c5 4588
8bdd16b4 4589 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4590 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4591 renderer = try_get(
4592 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4593 if renderer:
b60419c5 4594 channel_name = renderer.get('title')
4595 channel_url = renderer.get('channelUrl')
4596 channel_id = renderer.get('externalId')
39ed931e 4597 else:
64c0d954 4598 renderer = try_get(
4599 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4600
8bdd16b4 4601 if renderer:
4602 title = renderer.get('title')
ecc97af3 4603 description = renderer.get('description', '')
b60419c5 4604 playlist_id = channel_id
4605 tags = renderer.get('keywords', '').split()
b60419c5 4606
301d07fc 4607 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4608 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4609 def _get_uncropped(url):
4610 return url_or_none((url or '').split('=')[0] + '=s0')
4611
4612 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4613 if avatar_thumbnails:
4614 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4615 if uncropped_avatar:
4616 avatar_thumbnails.append({
4617 'url': uncropped_avatar,
4618 'id': 'avatar_uncropped',
4619 'preference': 1
4620 })
4621
4622 channel_banners = self._extract_thumbnails(
4623 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4624 for banner in channel_banners:
4625 banner['preference'] = -10
4626
4627 if channel_banners:
4628 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4629 if uncropped_banner:
4630 channel_banners.append({
4631 'url': uncropped_banner,
4632 'id': 'banner_uncropped',
4633 'preference': -5
4634 })
4635
4636 primary_thumbnails = self._extract_thumbnails(
a17526e4 4637 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4638
3462ffa8 4639 if playlist_id is None:
70d5c17b 4640 playlist_id = item_id
f0d785d3 4641
4642 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
c26f9b99 4643 last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
70d5c17b 4644 if title is None:
f0d785d3 4645 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4646 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4647 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4648
b60419c5 4649 metadata = {
4650 'playlist_id': playlist_id,
4651 'playlist_title': title,
4652 'playlist_description': description,
4653 'uploader': channel_name,
4654 'uploader_id': channel_id,
4655 'uploader_url': channel_url,
301d07fc 4656 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4657 'tags': tags,
f0d785d3 4658 'view_count': self._get_count(playlist_stats, 1),
4659 'availability': self._extract_availability(data),
4660 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4661 'playlist_count': self._get_count(playlist_stats, 0),
4662 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4663 }
4664 if not channel_id:
4665 metadata.update(self._extract_uploader(data))
4666 metadata.update({
4667 'channel': metadata['uploader'],
4668 'channel_id': metadata['uploader_id'],
4669 'channel_url': metadata['uploader_url']})
4670 return self.playlist_result(
d069eca7 4671 self._entries(
ac56cf38 4672 selected_tab, playlist_id, ytcfg,
4673 self._extract_account_syncid(ytcfg, data),
4674 self._extract_visitor_data(data, ytcfg)),
b60419c5 4675 **metadata)
73c4ac2c 4676
6e634cbe 4677 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4678 first_id = last_id = response = None
2be71994 4679 for page_num in itertools.count(1):
cd7c66cf 4680 videos = list(self._playlist_entries(playlist))
4681 if not videos:
4682 return
2be71994 4683 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4684 if start >= len(videos):
4685 return
24146491 4686 yield from videos[start:]
2be71994 4687 first_id = first_id or videos[0]['id']
4688 last_id = videos[-1]['id']
79360d99 4689 watch_endpoint = try_get(
4690 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4691 headers = self.generate_api_headers(
4692 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4693 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4694 query = {
4695 'playlistId': playlist_id,
4696 'videoId': watch_endpoint.get('videoId') or last_id,
4697 'index': watch_endpoint.get('index') or len(videos),
4698 'params': watch_endpoint.get('params') or 'OAE%3D'
4699 }
4700 response = self._extract_response(
4701 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4702 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4703 check_get_keys='contents'
4704 )
cd7c66cf 4705 playlist = try_get(
79360d99 4706 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4707
ac56cf38 4708 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4709 title = playlist.get('title') or try_get(
14f25df2 4710 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4711 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4712
4713 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4714 playlist_url = urljoin(url, try_get(
4715 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4716 str))
6e634cbe 4717
4718 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4719 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4720 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4721
4722 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4723 return self.url_result(
4724 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4725 video_title=title)
cd7c66cf 4726
8bdd16b4 4727 return self.playlist_result(
6e634cbe 4728 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4729 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4730
47193e02 4731 def _extract_availability(self, data):
4732 """
4733 Gets the availability of a given playlist/tab.
4734 Note: Unless YouTube tells us explicitly, we do not assume it is public
4735 @param data: response
4736 """
47193e02 4737 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
c26f9b99 4738
4739 player_header_privacy = traverse_obj(
4740 data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
4741
4742 badges = self._extract_badges(renderer)
47193e02 4743
4744 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
c26f9b99 4745 privacy_setting_icon = traverse_obj(
4746 renderer, (
4747 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4748 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4749 get_all=False, expected_type=str)
47193e02 4750
c26f9b99 4751 return (
4752 'public' if (
4753 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4754 or player_header_privacy == 'PUBLIC'
4755 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4756 else self._availability(
4757 is_private=(
4758 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4759 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4760 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4761 is_unlisted=(
4762 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4763 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4764 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
4765 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4766 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4767 needs_auth=False))
47193e02 4768
4769 @staticmethod
4770 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4771 sidebar_renderer = try_get(
4772 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4773 for item in sidebar_renderer:
4774 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4775 if renderer:
4776 return renderer
4777
ac56cf38 4778 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4779 """
4780 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4781 """
5d342002 4782 browse_id = params = None
47193e02 4783 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4784 if not renderer:
4785 return
4786 menu_renderer = try_get(
4787 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4788 for menu_item in menu_renderer:
4789 if not isinstance(menu_item, dict):
358de58c 4790 continue
47193e02 4791 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4792 text = try_get(
14f25df2 4793 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4794 if not text or text.lower() != 'show unavailable videos':
4795 continue
4796 browse_endpoint = try_get(
4797 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4798 browse_id = browse_endpoint.get('browseId')
4799 params = browse_endpoint.get('params')
4800 break
5d342002 4801
11f9be09 4802 headers = self.generate_api_headers(
99e9e001 4803 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4804 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4805 query = {
4806 'params': params or 'wgYCCAA=',
4807 'browseId': browse_id or 'VL%s' % item_id
4808 }
4809 return self._extract_response(
4810 item_id=item_id, headers=headers, query=query,
fe93e2c4 4811 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4812 note='Downloading API JSON with unavailable videos')
358de58c 4813
2762dbb1 4814 @functools.cached_property
a25bca9f 4815 def skip_webpage(self):
4816 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4817
ac56cf38 4818 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4819 webpage, data = None, None
4820 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4821 try:
be5c1ae8 4822 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4823 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4824 except ExtractorError as e:
4825 if isinstance(e.cause, network_exceptions):
14f25df2 4826 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4827 retry.error = e
4828 continue
4829 self._error_or_warning(e, fatal=fatal)
14fdfea9 4830 break
ac56cf38 4831
be5c1ae8 4832 try:
4833 self._extract_and_report_alerts(data)
4834 except ExtractorError as e:
4835 self._error_or_warning(e, fatal=fatal)
4836 break
ac56cf38 4837
be5c1ae8 4838 # Sometimes youtube returns a webpage with incomplete ytInitialData
4839 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4840 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4841 retry.error = ExtractorError('Incomplete yt initial data received')
4842 continue
ac56cf38 4843
cd7c66cf 4844 return webpage, data
4845
a25bca9f 4846 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4847 """Use if failed to extract ytcfg (and data) from initial webpage"""
4848 if not ytcfg and self.is_authenticated:
4849 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4850 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4851 raise ExtractorError(
4852 f'{msg}. If you are not downloading private content, or '
4853 'your cookies are only for the first account and channel,'
4854 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4855 expected=True)
4856 self.report_warning(msg, only_once=True)
4857
ac56cf38 4858 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4859 data = None
a25bca9f 4860 if not self.skip_webpage:
ac56cf38 4861 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4862 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4863 # Reject webpage data if redirected to home page without explicitly requesting
4864 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4865 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4866 if (url != 'https://www.youtube.com/feed/recommended'
4867 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4868 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4869 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4870 if fatal:
4871 raise ExtractorError(msg, expected=True)
4872 self.report_warning(msg, only_once=True)
ac56cf38 4873 if not data:
a25bca9f 4874 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4875 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4876 return data, ytcfg
4877
4878 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4879 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4880 resolve_response = self._extract_response(
4881 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4882 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4883 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4884 for ep_key, ep in endpoints.items():
4885 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4886 if params:
4887 return self._extract_response(
4888 item_id=item_id, query=params, ep=ep, headers=headers,
4889 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4890 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4891 err_note = 'Failed to resolve url (does the playlist exist?)'
4892 if fatal:
4893 raise ExtractorError(err_note, expected=True)
4894 self.report_warning(err_note, item_id)
4895
a6213a49 4896 _SEARCH_PARAMS = None
4897
af5c1c55 4898 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4899 data = {'query': query}
4900 if params is NO_DEFAULT:
4901 params = self._SEARCH_PARAMS
4902 if params:
4903 data['params'] = params
16aa9ea4 4904
4905 content_keys = (
4906 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4907 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4908 # ytmusic search
4909 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4910 ('continuationContents', ),
4911 )
a25bca9f 4912 display_id = f'query "{query}"'
86e5f3ed 4913 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4914 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4915 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4916
a61fd4cf 4917 continuation_list = [None]
a25bca9f 4918 search = None
a6213a49 4919 for page_num in itertools.count(1):
a61fd4cf 4920 data.update(continuation_list[0] or {})
a25bca9f 4921 headers = self.generate_api_headers(
4922 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4923 search = self._extract_response(
a25bca9f 4924 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4925 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4926 slr_contents = traverse_obj(search, *content_keys)
4927 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4928 if not continuation_list[0]:
a6213a49 4929 break
4930
4931
4932class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4933 IE_DESC = 'YouTube Tabs'
4934 _VALID_URL = r'''(?x:
4935 https?://
4936 (?:\w+\.)?
4937 (?:
4938 youtube(?:kids)?\.com|
4939 %(invidious)s
4940 )/
4941 (?:
4942 (?P<channel_type>channel|c|user|browse)/|
4943 (?P<not_channel>
4944 feed/|hashtag/|
4945 (?:playlist|watch)\?.*?\blist=
4946 )|
4947 (?!(?:%(reserved_names)s)\b) # Direct URLs
4948 )
4949 (?P<id>[^/?\#&]+)
4950 )''' % {
4951 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4952 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4953 }
4954 IE_NAME = 'youtube:tab'
4955
4956 _TESTS = [{
4957 'note': 'playlists, multipage',
4958 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4959 'playlist_mincount': 94,
4960 'info_dict': {
4961 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4962 'title': 'Igor Kleiner - Playlists',
a6213a49 4963 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4964 'uploader': 'Igor Kleiner',
a6213a49 4965 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4966 'channel': 'Igor Kleiner',
4967 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4968 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4969 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4970 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4971 'channel_follower_count': int
a6213a49 4972 },
4973 }, {
4974 'note': 'playlists, multipage, different order',
4975 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4976 'playlist_mincount': 94,
4977 'info_dict': {
4978 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4979 'title': 'Igor Kleiner - Playlists',
a6213a49 4980 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4981 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4982 'uploader': 'Igor Kleiner',
4983 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4984 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4985 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4986 'channel': 'Igor Kleiner',
4987 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4988 'channel_follower_count': int
a6213a49 4989 },
4990 }, {
4991 'note': 'playlists, series',
4992 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4993 'playlist_mincount': 5,
4994 'info_dict': {
4995 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4996 'title': '3Blue1Brown - Playlists',
4997 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4998 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4999 'uploader': '3Blue1Brown',
976ae3ea 5000 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5001 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5002 'channel': '3Blue1Brown',
5003 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5004 'tags': ['Mathematics'],
6c73052c 5005 'channel_follower_count': int
a6213a49 5006 },
5007 }, {
5008 'note': 'playlists, singlepage',
5009 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5010 'playlist_mincount': 4,
5011 'info_dict': {
5012 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5013 'title': 'ThirstForScience - Playlists',
5014 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5015 'uploader': 'ThirstForScience',
5016 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5017 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5018 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5019 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5020 'tags': 'count:13',
5021 'channel': 'ThirstForScience',
6c73052c 5022 'channel_follower_count': int
a6213a49 5023 }
5024 }, {
5025 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5026 'only_matching': True,
5027 }, {
5028 'note': 'basic, single video playlist',
5029 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5030 'info_dict': {
5031 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5032 'uploader': 'Sergey M.',
5033 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5034 'title': 'youtube-dl public playlist',
976ae3ea 5035 'description': '',
5036 'tags': [],
5037 'view_count': int,
5038 'modified_date': '20201130',
5039 'channel': 'Sergey M.',
5040 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5041 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5042 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5043 'availability': 'public',
a6213a49 5044 },
5045 'playlist_count': 1,
5046 }, {
5047 'note': 'empty playlist',
5048 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5049 'info_dict': {
5050 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5051 'uploader': 'Sergey M.',
5052 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5053 'title': 'youtube-dl empty playlist',
976ae3ea 5054 'tags': [],
5055 'channel': 'Sergey M.',
5056 'description': '',
5057 'modified_date': '20160902',
5058 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5059 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5060 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5061 'availability': 'public',
a6213a49 5062 },
5063 'playlist_count': 0,
5064 }, {
5065 'note': 'Home tab',
5066 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5067 'info_dict': {
5068 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5069 'title': 'lex will - Home',
5070 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5071 'uploader': 'lex will',
5072 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5073 'channel': 'lex will',
5074 'tags': ['bible', 'history', 'prophesy'],
5075 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5076 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5077 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5078 'channel_follower_count': int
a6213a49 5079 },
5080 'playlist_mincount': 2,
5081 }, {
5082 'note': 'Videos tab',
5083 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5084 'info_dict': {
5085 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5086 'title': 'lex will - Videos',
5087 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5088 'uploader': 'lex will',
5089 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5090 'tags': ['bible', 'history', 'prophesy'],
5091 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5092 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5093 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5094 'channel': 'lex will',
6c73052c 5095 'channel_follower_count': int
a6213a49 5096 },
5097 'playlist_mincount': 975,
5098 }, {
5099 'note': 'Videos tab, sorted by popular',
5100 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5101 'info_dict': {
5102 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5103 'title': 'lex will - Videos',
5104 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5105 'uploader': 'lex will',
5106 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5107 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5108 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5109 'channel': 'lex will',
5110 'tags': ['bible', 'history', 'prophesy'],
5111 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5112 'channel_follower_count': int
a6213a49 5113 },
5114 'playlist_mincount': 199,
5115 }, {
5116 'note': 'Playlists tab',
5117 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5118 'info_dict': {
5119 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5120 'title': 'lex will - Playlists',
5121 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5122 'uploader': 'lex will',
5123 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5124 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5125 'channel': 'lex will',
5126 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5127 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5128 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5129 'channel_follower_count': int
a6213a49 5130 },
5131 'playlist_mincount': 17,
5132 }, {
5133 'note': 'Community tab',
5134 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5135 'info_dict': {
5136 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5137 'title': 'lex will - Community',
5138 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5139 'uploader': 'lex will',
5140 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5141 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5142 'channel': 'lex will',
5143 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5144 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5145 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5146 'channel_follower_count': int
a6213a49 5147 },
5148 'playlist_mincount': 18,
5149 }, {
5150 'note': 'Channels tab',
5151 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5152 'info_dict': {
5153 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5154 'title': 'lex will - Channels',
5155 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5156 'uploader': 'lex will',
5157 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5158 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5159 'channel': 'lex will',
5160 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5161 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5162 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5163 'channel_follower_count': int
a6213a49 5164 },
5165 'playlist_mincount': 12,
5166 }, {
5167 'note': 'Search tab',
5168 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5169 'playlist_mincount': 40,
5170 'info_dict': {
5171 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5172 'title': '3Blue1Brown - Search - linear algebra',
5173 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5174 'uploader': '3Blue1Brown',
5175 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5176 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5177 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5178 'tags': ['Mathematics'],
5179 'channel': '3Blue1Brown',
5180 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 5181 'channel_follower_count': int
a6213a49 5182 },
5183 }, {
5184 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5185 'only_matching': True,
5186 }, {
5187 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5188 'only_matching': True,
5189 }, {
5190 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5191 'only_matching': True,
5192 }, {
5193 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5194 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5195 'info_dict': {
5196 'title': '29C3: Not my department',
5197 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5198 'uploader': 'Christiaan008',
5199 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5200 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5201 'tags': [],
5202 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5203 'view_count': int,
5204 'modified_date': '20150605',
5205 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5206 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5207 'channel': 'Christiaan008',
c26f9b99 5208 'availability': 'public',
a6213a49 5209 },
5210 'playlist_count': 96,
5211 }, {
5212 'note': 'Large playlist',
5213 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5214 'info_dict': {
5215 'title': 'Uploads from Cauchemar',
5216 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5217 'uploader': 'Cauchemar',
5218 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5219 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5220 'tags': [],
5221 'modified_date': r're:\d{8}',
5222 'channel': 'Cauchemar',
5223 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5224 'view_count': int,
5225 'description': '',
5226 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5227 'availability': 'public',
a6213a49 5228 },
5229 'playlist_mincount': 1123,
976ae3ea 5230 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5231 }, {
5232 'note': 'even larger playlist, 8832 videos',
5233 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5234 'only_matching': True,
5235 }, {
5236 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5237 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5238 'info_dict': {
5239 'title': 'Uploads from Interstellar Movie',
5240 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5241 'uploader': 'Interstellar Movie',
5242 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5243 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5244 'tags': [],
5245 'view_count': int,
5246 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5247 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5248 'channel': 'Interstellar Movie',
5249 'description': '',
5250 'modified_date': r're:\d{8}',
c26f9b99 5251 'availability': 'public',
a6213a49 5252 },
5253 'playlist_mincount': 21,
5254 }, {
5255 'note': 'Playlist with "show unavailable videos" button',
5256 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5257 'info_dict': {
5258 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5259 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5260 'uploader': 'Phim Siêu Nhân Nhật Bản',
5261 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5262 'view_count': int,
5263 'channel': 'Phim Siêu Nhân Nhật Bản',
5264 'tags': [],
5265 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5266 'description': '',
5267 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5268 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5269 'modified_date': r're:\d{8}',
c26f9b99 5270 'availability': 'public',
a6213a49 5271 },
5272 'playlist_mincount': 200,
976ae3ea 5273 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5274 }, {
5275 'note': 'Playlist with unavailable videos in page 7',
5276 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5277 'info_dict': {
5278 'title': 'Uploads from BlankTV',
5279 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5280 'uploader': 'BlankTV',
5281 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5282 'channel': 'BlankTV',
5283 'channel_url': 'https://www.youtube.com/c/blanktv',
5284 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5285 'view_count': int,
5286 'tags': [],
5287 'uploader_url': 'https://www.youtube.com/c/blanktv',
5288 'modified_date': r're:\d{8}',
5289 'description': '',
c26f9b99 5290 'availability': 'public',
a6213a49 5291 },
5292 'playlist_mincount': 1000,
976ae3ea 5293 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5294 }, {
5295 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5296 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5297 'info_dict': {
5298 'title': 'Data Analysis with Dr Mike Pound',
5299 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5300 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5301 'uploader': 'Computerphile',
5302 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5303 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5304 'tags': [],
5305 'view_count': int,
5306 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5307 'channel_url': 'https://www.youtube.com/user/Computerphile',
5308 'channel': 'Computerphile',
c26f9b99 5309 'availability': 'public',
a6213a49 5310 },
5311 'playlist_mincount': 11,
5312 }, {
5313 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5314 'only_matching': True,
5315 }, {
5316 'note': 'Playlist URL that does not actually serve a playlist',
5317 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5318 'info_dict': {
5319 'id': 'FqZTN594JQw',
5320 'ext': 'webm',
5321 'title': "Smiley's People 01 detective, Adventure Series, Action",
5322 'uploader': 'STREEM',
5323 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5325 'upload_date': '20150526',
5326 'license': 'Standard YouTube License',
5327 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5328 'categories': ['People & Blogs'],
5329 'tags': list,
5330 'view_count': int,
5331 'like_count': int,
a6213a49 5332 },
5333 'params': {
5334 'skip_download': True,
5335 },
5336 'skip': 'This video is not available.',
5337 'add_ie': [YoutubeIE.ie_key()],
5338 }, {
5339 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5340 'only_matching': True,
5341 }, {
5342 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5343 'only_matching': True,
5344 }, {
5345 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5346 'info_dict': {
12a1b225 5347 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5348 'ext': 'mp4',
976ae3ea 5349 'title': str,
a6213a49 5350 'uploader': 'Sky News',
5351 'uploader_id': 'skynews',
5352 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5353 'upload_date': r're:\d{8}',
976ae3ea 5354 'description': str,
a6213a49 5355 'categories': ['News & Politics'],
5356 'tags': list,
5357 'like_count': int,
6c73052c 5358 'release_timestamp': 1642502819,
976ae3ea 5359 'channel': 'Sky News',
5360 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5361 'age_limit': 0,
5362 'view_count': int,
6c73052c 5363 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5364 'playable_in_embed': True,
6c73052c 5365 'release_date': '20220118',
976ae3ea 5366 'availability': 'public',
5367 'live_status': 'is_live',
5368 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5369 'channel_follower_count': int
a6213a49 5370 },
5371 'params': {
5372 'skip_download': True,
5373 },
976ae3ea 5374 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5375 }, {
5376 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5377 'info_dict': {
5378 'id': 'a48o2S1cPoo',
5379 'ext': 'mp4',
5380 'title': 'The Young Turks - Live Main Show',
5381 'uploader': 'The Young Turks',
5382 'uploader_id': 'TheYoungTurks',
5383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5384 'upload_date': '20150715',
5385 'license': 'Standard YouTube License',
5386 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5387 'categories': ['News & Politics'],
5388 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5389 'like_count': int,
a6213a49 5390 },
5391 'params': {
5392 'skip_download': True,
5393 },
5394 'only_matching': True,
5395 }, {
5396 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5397 'only_matching': True,
5398 }, {
5399 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5400 'only_matching': True,
5401 }, {
5402 'note': 'A channel that is not live. Should raise error',
5403 'url': 'https://www.youtube.com/user/numberphile/live',
5404 'only_matching': True,
5405 }, {
5406 'url': 'https://www.youtube.com/feed/trending',
5407 'only_matching': True,
5408 }, {
5409 'url': 'https://www.youtube.com/feed/library',
5410 'only_matching': True,
5411 }, {
5412 'url': 'https://www.youtube.com/feed/history',
5413 'only_matching': True,
5414 }, {
5415 'url': 'https://www.youtube.com/feed/subscriptions',
5416 'only_matching': True,
5417 }, {
5418 'url': 'https://www.youtube.com/feed/watch_later',
5419 'only_matching': True,
5420 }, {
5421 'note': 'Recommended - redirects to home page.',
5422 'url': 'https://www.youtube.com/feed/recommended',
5423 'only_matching': True,
5424 }, {
5425 'note': 'inline playlist with not always working continuations',
5426 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5427 'only_matching': True,
5428 }, {
5429 'url': 'https://www.youtube.com/course',
5430 'only_matching': True,
5431 }, {
5432 'url': 'https://www.youtube.com/zsecurity',
5433 'only_matching': True,
5434 }, {
5435 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5436 'only_matching': True,
5437 }, {
5438 'url': 'https://www.youtube.com/TheYoungTurks/live',
5439 'only_matching': True,
5440 }, {
5441 'url': 'https://www.youtube.com/hashtag/cctv9',
5442 'info_dict': {
5443 'id': 'cctv9',
5444 'title': '#cctv9',
976ae3ea 5445 'tags': [],
a6213a49 5446 },
5447 'playlist_mincount': 350,
5448 }, {
5449 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5450 'only_matching': True,
5451 }, {
5452 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5453 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5454 'only_matching': True
5455 }, {
5456 'note': '/browse/ should redirect to /channel/',
5457 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5458 'only_matching': True
5459 }, {
5460 'note': 'VLPL, should redirect to playlist?list=PL...',
5461 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5462 'info_dict': {
5463 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5464 'uploader': 'NoCopyrightSounds',
5465 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5466 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5467 'title': 'NCS : All Releases 💿',
976ae3ea 5468 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5469 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5470 'modified_date': r're:\d{8}',
5471 'view_count': int,
5472 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5473 'tags': [],
5474 'channel': 'NoCopyrightSounds',
c26f9b99 5475 'availability': 'public',
a6213a49 5476 },
5477 'playlist_mincount': 166,
976ae3ea 5478 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5479 }, {
5480 'note': 'Topic, should redirect to playlist?list=UU...',
5481 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5482 'info_dict': {
5483 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5484 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5485 'title': 'Uploads from Royalty Free Music - Topic',
5486 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5487 'tags': [],
5488 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5489 'channel': 'Royalty Free Music - Topic',
5490 'view_count': int,
5491 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5492 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5493 'modified_date': r're:\d{8}',
5494 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5495 'description': '',
c26f9b99 5496 'availability': 'public',
a6213a49 5497 },
5498 'expected_warnings': [
a6213a49 5499 'The URL does not have a videos tab',
976ae3ea 5500 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5501 ],
5502 'playlist_mincount': 101,
5503 }, {
5504 'note': 'Topic without a UU playlist',
5505 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5506 'info_dict': {
5507 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5508 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5509 'tags': [],
a6213a49 5510 },
5511 'expected_warnings': [
976ae3ea 5512 'the playlist redirect gave error',
a6213a49 5513 ],
5514 'playlist_mincount': 9,
5515 }, {
5516 'note': 'Youtube music Album',
5517 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5518 'info_dict': {
5519 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5520 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5521 'tags': [],
5522 'view_count': int,
5523 'description': '',
5524 'availability': 'unlisted',
5525 'modified_date': r're:\d{8}',
a6213a49 5526 },
5527 'playlist_count': 50,
5528 }, {
5529 'note': 'unlisted single video playlist',
5530 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5531 'info_dict': {
5532 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5533 'uploader': 'colethedj',
5534 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5535 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5536 'availability': 'unlisted',
5537 'tags': [],
12a1b225 5538 'modified_date': '20220418',
976ae3ea 5539 'channel': 'colethedj',
5540 'view_count': int,
5541 'description': '',
5542 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5543 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5544 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5545 },
5546 'playlist_count': 1,
5547 }, {
5548 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5549 'url': 'https://www.youtube.com/feed/recommended',
5550 'info_dict': {
5551 'id': 'recommended',
5552 'title': 'recommended',
6c73052c 5553 'tags': [],
a6213a49 5554 },
5555 'playlist_mincount': 50,
5556 'params': {
5557 'skip_download': True,
5558 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5559 },
5560 }, {
5561 'note': 'API Fallback: /videos tab, sorted by oldest first',
5562 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5563 'info_dict': {
5564 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5565 'title': 'Cody\'sLab - Videos',
5566 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5567 'uploader': 'Cody\'sLab',
5568 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5569 'channel': 'Cody\'sLab',
5570 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5571 'tags': [],
5572 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5573 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5574 'channel_follower_count': int
a6213a49 5575 },
5576 'playlist_mincount': 650,
5577 'params': {
5578 'skip_download': True,
5579 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5580 },
5581 }, {
5582 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5583 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5584 'info_dict': {
5585 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5586 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5587 'title': 'Uploads from Royalty Free Music - Topic',
5588 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5589 'modified_date': r're:\d{8}',
5590 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5591 'description': '',
5592 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5593 'tags': [],
5594 'channel': 'Royalty Free Music - Topic',
5595 'view_count': int,
5596 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
c26f9b99 5597 'availability': 'public',
a6213a49 5598 },
5599 'expected_warnings': [
976ae3ea 5600 'does not have a videos tab',
5601 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5602 ],
5603 'playlist_mincount': 101,
5604 'params': {
5605 'skip_download': True,
5606 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5607 },
7c219ea6 5608 }, {
5609 'note': 'non-standard redirect to regional channel',
5610 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5611 'only_matching': True
61d3665d 5612 }, {
5613 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5614 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5615 'info_dict': {
5616 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5617 'modified_date': '20220407',
5618 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5619 'tags': [],
5620 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5621 'uploader': 'pukkandan',
5622 'availability': 'unlisted',
5623 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5624 'channel': 'pukkandan',
5625 'description': 'Test for collaborative playlist',
5626 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5627 'view_count': int,
61d3665d 5628 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5629 },
5630 'playlist_mincount': 2
c26f9b99 5631 }, {
5632 'note': 'translated tab name',
5633 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5634 'info_dict': {
5635 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5636 'tags': [],
5637 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5638 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5639 'description': '',
5640 'title': 'cole-dlp-test-acc - 再生リスト',
5641 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5642 'uploader': 'cole-dlp-test-acc',
5643 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5644 'channel': 'cole-dlp-test-acc',
5645 },
5646 'playlist_mincount': 1,
5647 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5648 'expected_warnings': ['Preferring "ja"'],
5649 }, {
5650 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5651 'note': 'preferred lang set with playlist with translated video titles',
5652 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5653 'info_dict': {
5654 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5655 'tags': [],
5656 'view_count': int,
5657 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5658 'uploader': 'cole-dlp-test-acc',
5659 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5660 'channel': 'cole-dlp-test-acc',
5661 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5662 'description': 'test',
5663 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5664 'title': 'dlp test playlist',
5665 'availability': 'public',
5666 },
5667 'playlist_mincount': 1,
5668 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5669 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 5670 }, {
5671 # shorts audio pivot for 2GtVksBMYFM.
5672 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5673 'info_dict': {
5674 'id': 'sfv_audio_pivot',
5675 'title': 'sfv_audio_pivot',
5676 'tags': [],
5677 },
5678 'playlist_mincount': 50,
5679
a6213a49 5680 }]
5681
5682 @classmethod
5683 def suitable(cls, url):
86e5f3ed 5684 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5685
64f36541 5686 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5687
182bda88 5688 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5689 def _real_extract(self, url, smuggled_data):
cd7c66cf 5690 item_id = self._match_id(url)
14f25df2 5691 url = urllib.parse.urlunparse(
5692 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5693 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5694
fe03a6cd 5695 def get_mobj(url):
37e57a9f 5696 mobj = self._URL_RE.match(url).groupdict()
07cce701 5697 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5698 return mobj
5699
37e57a9f 5700 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5701 # Youtube returns incomplete data if tabname is not lower case
5702 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5703 if is_channel:
5704 if smuggled_data.get('is_music_url'):
37e57a9f 5705 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5706 item_id = item_id[2:]
37e57a9f 5707 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5708 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5709 mdata = self._extract_tab_endpoint(
37e57a9f 5710 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5711 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5712 get_all=False, expected_type=str)
ac56cf38 5713 if not murl:
37e57a9f 5714 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5715 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5716 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5717 pre = f'https://www.youtube.com/channel/{item_id}'
5718
64f36541 5719 original_tab_name = tab
fe03a6cd 5720 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5721 # Home URLs should redirect to /videos/
37e57a9f 5722 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5723 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5724 tab = '/videos'
5725
5726 url = ''.join((pre, tab, post))
5727 mobj = get_mobj(url)
cd7c66cf 5728
5729 # Handle both video/playlist URLs
201c1459 5730 qs = parse_qs(url)
86e5f3ed 5731 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5732
fe03a6cd 5733 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5734 if not playlist_id:
fe03a6cd 5735 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5736 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5737 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5738 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5739 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5740 mobj = get_mobj(url)
cd7c66cf 5741
5742 if video_id and playlist_id:
a06916d9 5743 if self.get_param('noplaylist'):
37e57a9f 5744 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5745 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5746 ie=YoutubeIE.ie_key(), video_id=video_id)
5747 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5748
ac56cf38 5749 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5750
7c219ea6 5751 # YouTube may provide a non-standard redirect to the regional channel
5752 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5753 redirect_url = traverse_obj(
5754 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5755 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5756 redirect_url = ''.join((
5757 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5758 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5759 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5760
37e57a9f 5761 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5762 if tabs:
5763 selected_tab = self._extract_selected_tab(tabs)
c26f9b99 5764 selected_tab_url = urljoin(
5765 url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5766 translated_tab_name = selected_tab.get('title', '').lower()
5767
5768 # Prefer tab name from tab url as it is always in en,
5769 # but only when preferred lang is set as it may not extract reliably in all cases.
5770 selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
5771 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
5772 or translated_tab_name)
5773
64f36541 5774 if selected_tab_name == 'home':
5775 selected_tab_name = 'featured'
5776 requested_tab_name = mobj['tab'][1:]
c26f9b99 5777
09f1580e 5778 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5779 if requested_tab_name == 'live': # Live tab should have redirected to the video
5780 raise UserNotLive(video_id=mobj['id'])
64f36541 5781 if requested_tab_name not in ('', selected_tab_name):
5782 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5783 if not original_tab_name:
5784 if item_id[:2] == 'UC':
5785 # Topic channels don't have /videos. Use the equivalent playlist instead
5786 pl_id = f'UU{item_id[2:]}'
5787 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5788 try:
5789 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5790 except ExtractorError:
5791 redirect_warning += ' and the playlist redirect gave error'
5792 else:
5793 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5794 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5795 if selected_tab_name and selected_tab_name != requested_tab_name:
5796 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5797 else:
5798 raise ExtractorError(redirect_warning, expected=True)
18db7548 5799
37e57a9f 5800 if redirect_warning:
64f36541 5801 self.to_screen(redirect_warning)
37e57a9f 5802 self.write_debug(f'Final URL: {url}')
18db7548 5803
358de58c 5804 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5805 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5806 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5807 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5808 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5809 if tabs:
ac56cf38 5810 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5811
37e57a9f 5812 playlist = traverse_obj(
5813 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5814 if playlist:
ac56cf38 5815 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5816
37e57a9f 5817 video_id = traverse_obj(
5818 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5819 if video_id:
09f1580e 5820 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5821 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5822 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5823 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5824
8bdd16b4 5825 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5826
c5e8d7af 5827
8bdd16b4 5828class YoutubePlaylistIE(InfoExtractor):
96565c7e 5829 IE_DESC = 'YouTube playlists'
8bdd16b4 5830 _VALID_URL = r'''(?x)(?:
5831 (?:https?://)?
5832 (?:\w+\.)?
5833 (?:
5834 (?:
5835 youtube(?:kids)?\.com|
d9190e44 5836 %(invidious)s
8bdd16b4 5837 )
5838 /.*?\?.*?\blist=
5839 )?
5840 (?P<id>%(playlist_id)s)
d9190e44
RH
5841 )''' % {
5842 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5843 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5844 }
8bdd16b4 5845 IE_NAME = 'youtube:playlist'
cdc628a4 5846 _TESTS = [{
8bdd16b4 5847 'note': 'issue #673',
5848 'url': 'PLBB231211A4F62143',
cdc628a4 5849 'info_dict': {
8bdd16b4 5850 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5851 'id': 'PLBB231211A4F62143',
976ae3ea 5852 'uploader': 'Wickman',
8bdd16b4 5853 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5854 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5855 'view_count': int,
5856 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5857 'modified_date': r're:\d{8}',
5858 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5859 'channel': 'Wickman',
5860 'tags': [],
5861 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5862 },
5863 'playlist_mincount': 29,
5864 }, {
5865 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5866 'info_dict': {
5867 'title': 'YDL_safe_search',
5868 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5869 },
5870 'playlist_count': 2,
5871 'skip': 'This playlist is private',
9558dcec 5872 }, {
8bdd16b4 5873 'note': 'embedded',
5874 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5875 'playlist_count': 4,
9558dcec 5876 'info_dict': {
8bdd16b4 5877 'title': 'JODA15',
5878 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5879 'uploader': 'milan',
5880 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5881 'description': '',
5882 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5883 'tags': [],
5884 'modified_date': '20140919',
5885 'view_count': int,
5886 'channel': 'milan',
5887 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5888 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
c26f9b99 5889 'availability': 'public',
976ae3ea 5890 },
5891 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5892 }, {
8bdd16b4 5893 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5894 'playlist_mincount': 455,
8bdd16b4 5895 'info_dict': {
5896 'title': '2018 Chinese New Singles (11/6 updated)',
5897 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5898 'uploader': 'LBK',
5899 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5900 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5901 'channel': 'LBK',
5902 'view_count': int,
5903 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5904 'tags': [],
5905 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5906 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5907 'modified_date': r're:\d{8}',
c26f9b99 5908 'availability': 'public',
976ae3ea 5909 },
5910 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5911 }, {
29f7c58a 5912 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5913 'only_matching': True,
5914 }, {
5915 # music album playlist
5916 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5917 'only_matching': True,
5918 }]
5919
5920 @classmethod
5921 def suitable(cls, url):
201c1459 5922 if YoutubeTabIE.suitable(url):
5923 return False
49a57e70 5924 from ..utils import parse_qs
201c1459 5925 qs = parse_qs(url)
5926 if qs.get('v', [None])[0]:
5927 return False
86e5f3ed 5928 return super().suitable(url)
29f7c58a 5929
5930 def _real_extract(self, url):
5931 playlist_id = self._match_id(url)
46953e7e 5932 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5933 url = update_url_query(
5934 'https://www.youtube.com/playlist',
5935 parse_qs(url) or {'list': playlist_id})
5936 if is_music_url:
5937 url = smuggle_url(url, {'is_music_url': True})
5938 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5939
5940
5941class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5942 IE_DESC = 'youtu.be'
29f7c58a 5943 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5944 _TESTS = [{
8bdd16b4 5945 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5946 'info_dict': {
5947 'id': 'yeWKywCrFtk',
5948 'ext': 'mp4',
5949 'title': 'Small Scale Baler and Braiding Rugs',
5950 'uploader': 'Backus-Page House Museum',
5951 'uploader_id': 'backuspagemuseum',
5952 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5953 'upload_date': '20161008',
5954 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5955 'categories': ['Nonprofits & Activism'],
5956 'tags': list,
5957 'like_count': int,
976ae3ea 5958 'age_limit': 0,
5959 'playable_in_embed': True,
5960 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5961 'channel': 'Backus-Page House Museum',
5962 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5963 'live_status': 'not_live',
5964 'view_count': int,
5965 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5966 'availability': 'public',
5967 'duration': 59,
12a1b225
A
5968 'comment_count': int,
5969 'channel_follower_count': int
8bdd16b4 5970 },
5971 'params': {
5972 'noplaylist': True,
5973 'skip_download': True,
5974 },
39e7107d 5975 }, {
8bdd16b4 5976 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5977 'only_matching': True,
cdc628a4
PH
5978 }]
5979
8bdd16b4 5980 def _real_extract(self, url):
5ad28e7f 5981 mobj = self._match_valid_url(url)
29f7c58a 5982 video_id = mobj.group('id')
5983 playlist_id = mobj.group('playlist_id')
8bdd16b4 5984 return self.url_result(
29f7c58a 5985 update_url_query('https://www.youtube.com/watch', {
5986 'v': video_id,
5987 'list': playlist_id,
5988 'feature': 'youtu.be',
5989 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5990
5991
b6ce9bb0 5992class YoutubeLivestreamEmbedIE(InfoExtractor):
5993 IE_DESC = 'YouTube livestream embeds'
5994 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5995 _TESTS = [{
5996 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5997 'only_matching': True,
5998 }]
5999
6000 def _real_extract(self, url):
6001 channel_id = self._match_id(url)
6002 return self.url_result(
6003 f'https://www.youtube.com/channel/{channel_id}/live',
6004 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6005
6006
8bdd16b4 6007class YoutubeYtUserIE(InfoExtractor):
96565c7e 6008 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6009 IE_NAME = 'youtube:user'
8bdd16b4 6010 _VALID_URL = r'ytuser:(?P<id>.+)'
6011 _TESTS = [{
6012 'url': 'ytuser:phihag',
6013 'only_matching': True,
6014 }]
6015
6016 def _real_extract(self, url):
6017 user_id = self._match_id(url)
6018 return self.url_result(
c586f9e8 6019 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 6020 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 6021
b05654f0 6022
3d3dddc9 6023class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6024 IE_NAME = 'youtube:favorites'
96565c7e 6025 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6026 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6027 _LOGIN_REQUIRED = True
6028 _TESTS = [{
6029 'url': ':ytfav',
6030 'only_matching': True,
6031 }, {
6032 'url': ':ytfavorites',
6033 'only_matching': True,
6034 }]
6035
6036 def _real_extract(self, url):
6037 return self.url_result(
6038 'https://www.youtube.com/playlist?list=LL',
6039 ie=YoutubeTabIE.ie_key())
6040
6041
ca5300c7 6042class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6043 IE_NAME = 'youtube:notif'
6044 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6045 _VALID_URL = r':ytnotif(?:ication)?s?'
6046 _LOGIN_REQUIRED = True
6047 _TESTS = [{
6048 'url': ':ytnotif',
6049 'only_matching': True,
6050 }, {
6051 'url': ':ytnotifications',
6052 'only_matching': True,
6053 }]
6054
6055 def _extract_notification_menu(self, response, continuation_list):
6056 notification_list = traverse_obj(
6057 response,
6058 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6059 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6060 expected_type=list) or []
6061 continuation_list[0] = None
6062 for item in notification_list:
6063 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6064 if entry:
6065 yield entry
6066 continuation = item.get('continuationItemRenderer')
6067 if continuation:
6068 continuation_list[0] = continuation
6069
6070 def _extract_notification_renderer(self, notification):
6071 video_id = traverse_obj(
6072 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6073 url = f'https://www.youtube.com/watch?v={video_id}'
6074 channel_id = None
6075 if not video_id:
6076 browse_ep = traverse_obj(
6077 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6078 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6079 post_id = self._search_regex(
6080 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6081 'post id', default=None)
6082 if not channel_id or not post_id:
6083 return
6084 # The direct /post url redirects to this in the browser
6085 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6086
6087 channel = traverse_obj(
6088 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6089 expected_type=str)
c7a7baaa 6090 notification_title = self._get_text(notification, 'shortMessage')
6091 if notification_title:
6092 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6093 # TODO: handle recommended videos
ca5300c7 6094 title = self._search_regex(
c7a7baaa 6095 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6096 'video title', default=None)
c26f9b99 6097 upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
ca5300c7 6098 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
6099 else None)
6100 return {
6101 '_type': 'url',
6102 'url': url,
6103 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6104 'video_id': video_id,
6105 'title': title,
6106 'channel_id': channel_id,
6107 'channel': channel,
6108 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6109 'upload_date': upload_date,
6110 }
6111
6112 def _notification_menu_entries(self, ytcfg):
6113 continuation_list = [None]
6114 response = None
6115 for page in itertools.count(1):
6116 ctoken = traverse_obj(
6117 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6118 response = self._extract_response(
6119 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6120 ep='notification/get_notification_menu', check_get_keys='actions',
6121 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6122 yield from self._extract_notification_menu(response, continuation_list)
6123 if not continuation_list[0]:
6124 break
6125
6126 def _real_extract(self, url):
6127 display_id = 'notifications'
6128 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6129 self._report_playlist_authcheck(ytcfg)
6130 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6131
6132
a6213a49 6133class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6134 IE_DESC = 'YouTube search'
78caa52a 6135 IE_NAME = 'youtube:search'
b05654f0 6136 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6137 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6138 _TESTS = [{
6139 'url': 'ytsearch5:youtube-dl test video',
6140 'playlist_count': 5,
6141 'info_dict': {
6142 'id': 'youtube-dl test video',
6143 'title': 'youtube-dl test video',
6144 }
6145 }]
b05654f0 6146
a61fd4cf 6147
5f7cb91a 6148class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6149 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6150 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6151 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6152 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6153 _TESTS = [{
6154 'url': 'ytsearchdate5:youtube-dl test video',
6155 'playlist_count': 5,
6156 'info_dict': {
6157 'id': 'youtube-dl test video',
6158 'title': 'youtube-dl test video',
6159 }
6160 }]
75dff0ee 6161
c9ae7b95 6162
a6213a49 6163class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6164 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6165 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6166 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6167 _TESTS = [{
6168 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6169 'playlist_mincount': 5,
6170 'info_dict': {
11f9be09 6171 'id': 'youtube-dl test video',
3462ffa8 6172 'title': 'youtube-dl test video',
6173 }
a61fd4cf 6174 }, {
6175 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6176 'playlist_mincount': 5,
6177 'info_dict': {
6178 'id': 'python',
6179 'title': 'python',
6180 }
ad210f4f 6181 }, {
6182 'url': 'https://www.youtube.com/results?search_query=%23cats',
6183 'playlist_mincount': 1,
6184 'info_dict': {
6185 'id': '#cats',
6186 'title': '#cats',
12a1b225
A
6187 # The test suite does not have support for nested playlists
6188 # 'entries': [{
6189 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6190 # 'title': '#cats',
6191 # }],
ad210f4f 6192 },
3462ffa8 6193 }, {
6194 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6195 'only_matching': True,
6196 }]
6197
6198 def _real_extract(self, url):
4dfbf869 6199 qs = parse_qs(url)
386e1dd9 6200 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6201 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6202
6203
16aa9ea4 6204class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6205 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6206 IE_NAME = 'youtube:music:search_url'
6207 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6208 _TESTS = [{
6209 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6210 'playlist_count': 16,
6211 'info_dict': {
6212 'id': 'royalty free music',
6213 'title': 'royalty free music',
6214 }
6215 }, {
6216 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6217 'playlist_mincount': 30,
6218 'info_dict': {
6219 'id': 'royalty free music - songs',
6220 'title': 'royalty free music - songs',
6221 },
6222 'params': {'extract_flat': 'in_playlist'}
6223 }, {
6224 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6225 'playlist_mincount': 30,
6226 'info_dict': {
6227 'id': 'royalty free music - community playlists',
6228 'title': 'royalty free music - community playlists',
6229 },
6230 'params': {'extract_flat': 'in_playlist'}
6231 }]
6232
6233 _SECTIONS = {
6234 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6235 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6236 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6237 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6238 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6239 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6240 }
6241
6242 def _real_extract(self, url):
6243 qs = parse_qs(url)
6244 query = (qs.get('search_query') or qs.get('q'))[0]
6245 params = qs.get('sp', (None,))[0]
6246 if params:
6247 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6248 else:
ac668111 6249 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6250 params = self._SECTIONS.get(section)
6251 if not params:
6252 section = None
6253 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6254 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6255
6256
182bda88 6257class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6258 """
25f14e9f 6259 Base class for feed extractors
82d02080 6260 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6261 """
b2e8bc1b 6262 _LOGIN_REQUIRED = True
82d02080 6263 _FEED_NAME = 'feeds'
a25bca9f 6264
6265 def _real_initialize(self):
6266 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6267
82d02080 6268 @classproperty
d7ae0639 6269 def IE_NAME(self):
82d02080 6270 return f'youtube:{self._FEED_NAME}'
04cc9617 6271
3853309f 6272 def _real_extract(self, url):
3d3dddc9 6273 return self.url_result(
182bda88 6274 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6275
6276
ef2f3c7f 6277class YoutubeWatchLaterIE(InfoExtractor):
6278 IE_NAME = 'youtube:watchlater'
96565c7e 6279 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6280 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6281 _TESTS = [{
8bdd16b4 6282 'url': ':ytwatchlater',
bc7a9cd8
S
6283 'only_matching': True,
6284 }]
25f14e9f
S
6285
6286 def _real_extract(self, url):
ef2f3c7f 6287 return self.url_result(
6288 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6289
6290
25f14e9f 6291class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6292 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6293 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6294 _FEED_NAME = 'recommended'
45db527f 6295 _LOGIN_REQUIRED = False
3d3dddc9 6296 _TESTS = [{
6297 'url': ':ytrec',
6298 'only_matching': True,
6299 }, {
6300 'url': ':ytrecommended',
6301 'only_matching': True,
6302 }, {
6303 'url': 'https://youtube.com',
6304 'only_matching': True,
6305 }]
1ed5b5c9 6306
1ed5b5c9 6307
25f14e9f 6308class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6309 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6310 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6311 _FEED_NAME = 'subscriptions'
3d3dddc9 6312 _TESTS = [{
6313 'url': ':ytsubs',
6314 'only_matching': True,
6315 }, {
6316 'url': ':ytsubscriptions',
6317 'only_matching': True,
6318 }]
1ed5b5c9 6319
1ed5b5c9 6320
25f14e9f 6321class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6322 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6323 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6324 _FEED_NAME = 'history'
3d3dddc9 6325 _TESTS = [{
6326 'url': ':ythistory',
6327 'only_matching': True,
6328 }]
1ed5b5c9
JMF
6329
6330
6e634cbe 6331class YoutubeStoriesIE(InfoExtractor):
6332 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6333 IE_NAME = 'youtube:stories'
6334 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6335 _TESTS = [{
6336 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6337 'only_matching': True,
6338 }]
6339
6340 def _real_extract(self, url):
6341 playlist_id = f'RLTD{self._match_id(url)}'
6342 return self.url_result(
50ac0e54 6343 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6344 ie=YoutubeTabIE, video_id=playlist_id)
6345
6346
80eb0bd9 6347class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 6348 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 6349 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 6350 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 6351 _TESTS = [{
1dd18a88 6352 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 6353 'only_matching': True,
6354 }]
6355
6356 @staticmethod
6357 def _generate_audio_pivot_params(video_id):
6358 """
6359 Generates sfv_audio_pivot browse params for this video id
6360 """
6361 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6362 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6363
6364 def _real_extract(self, url):
6365 video_id = self._match_id(url)
6366 return self.url_result(
6367 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6368 ie=YoutubeTabIE)
6369
6370
15870e90
PH
6371class YoutubeTruncatedURLIE(InfoExtractor):
6372 IE_NAME = 'youtube:truncated_url'
6373 IE_DESC = False # Do not list
975d35db 6374 _VALID_URL = r'''(?x)
b95aab84
PH
6375 (?:https?://)?
6376 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6377 (?:watch\?(?:
c4808c60 6378 feature=[a-z_]+|
b95aab84
PH
6379 annotation_id=annotation_[^&]+|
6380 x-yt-cl=[0-9]+|
c1708b89 6381 hl=[^&]*|
287be8c6 6382 t=[0-9]+
b95aab84
PH
6383 )?
6384 |
6385 attribution_link\?a=[^&]+
6386 )
6387 $
975d35db 6388 '''
15870e90 6389
c4808c60 6390 _TESTS = [{
2d3d2997 6391 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6392 'only_matching': True,
dc2fc736 6393 }, {
2d3d2997 6394 'url': 'https://www.youtube.com/watch?',
dc2fc736 6395 'only_matching': True,
b95aab84
PH
6396 }, {
6397 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6398 'only_matching': True,
6399 }, {
6400 'url': 'https://www.youtube.com/watch?feature=foo',
6401 'only_matching': True,
c1708b89
PH
6402 }, {
6403 'url': 'https://www.youtube.com/watch?hl=en-GB',
6404 'only_matching': True,
287be8c6
PH
6405 }, {
6406 'url': 'https://www.youtube.com/watch?t=2372',
6407 'only_matching': True,
c4808c60
PH
6408 }]
6409
15870e90
PH
6410 def _real_extract(self, url):
6411 raise ExtractorError(
78caa52a
PH
6412 'Did you forget to quote the URL? Remember that & is a meta '
6413 'character in most shells, so you want to put the URL in quotes, '
3867038a 6414 'like youtube-dl '
2d3d2997 6415 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6416 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6417 expected=True)
772fd5cc
PH
6418
6419
471d0367 6420class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6421 IE_NAME = 'youtube:clip'
471d0367 6422 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6423 _TESTS = [{
6424 # FIXME: Other metadata should be extracted from the clip, not from the base video
6425 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6426 'info_dict': {
6427 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6428 'ext': 'mp4',
6429 'section_start': 29.0,
6430 'section_end': 39.7,
6431 'duration': 10.7,
12a1b225
A
6432 'age_limit': 0,
6433 'availability': 'public',
6434 'categories': ['Gaming'],
6435 'channel': 'Scott The Woz',
6436 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6437 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6438 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6439 'like_count': int,
6440 'playable_in_embed': True,
6441 'tags': 'count:17',
6442 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6443 'title': 'Mobile Games on Console - Scott The Woz',
6444 'upload_date': '20210920',
6445 'uploader': 'Scott The Woz',
6446 'uploader_id': 'scottthewoz',
6447 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6448 'view_count': int,
6449 'live_status': 'not_live',
6450 'channel_follower_count': int
471d0367 6451 }
6452 }]
3cd786db 6453
6454 def _real_extract(self, url):
471d0367 6455 clip_id = self._match_id(url)
6456 _, data = self._extract_webpage(url, clip_id)
6457
6458 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6459 if not video_id:
6460 raise ExtractorError('Unable to find video ID')
6461
6462 clip_data = traverse_obj(data, (
6463 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6464 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6465 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6466 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6467
6468 return {
6469 '_type': 'url_transparent',
6470 'url': f'https://www.youtube.com/watch?v={video_id}',
6471 'ie_key': YoutubeIE.ie_key(),
6472 'id': clip_id,
6473 'section_start': int(clip_data['startTimeMs']) / 1000,
6474 'section_end': int(clip_data['endTimeMs']) / 1000,
6475 }
3cd786db 6476
6477
772fd5cc
PH
6478class YoutubeTruncatedIDIE(InfoExtractor):
6479 IE_NAME = 'youtube:truncated_id'
6480 IE_DESC = False # Do not list
b95aab84 6481 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6482
6483 _TESTS = [{
6484 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6485 'only_matching': True,
6486 }]
6487
6488 def _real_extract(self, url):
6489 video_id = self._match_id(url)
6490 raise ExtractorError(
86e5f3ed 6491 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6492 expected=True)