]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Make early reject of `--match-filter` stricter
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
c26f9b99 5import enum
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
720c3099 9import math
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
46383212 13import sys
f8271158 14import threading
8a784c74 15import time
e0df6211 16import traceback
14f25df2 17import urllib.error
ac668111 18import urllib.parse
c5e8d7af 19
b05654f0 20from .common import InfoExtractor, SearchInfoExtractor
25836db6 21from .openload import PhantomJSwrapper
14f25df2 22from ..compat import functools
545cc85d 23from ..jsinterp import JSInterpreter
4bb4a188 24from ..utils import (
f8271158 25 NO_DEFAULT,
26 ExtractorError,
4d37720a 27 LazyList,
693f0600 28 UserNotLive,
720c3099 29 bug_reports_message,
82d02080 30 classproperty,
c5e8d7af 31 clean_html,
d92f5d5a 32 datetime_from_str,
11f9be09 33 dict_get,
7a32c70d 34 filter_dict,
2d30521a 35 float_or_none,
11f9be09 36 format_field,
ff91cf74 37 get_first,
dd27fd17 38 int_or_none,
641ad5d8 39 is_html,
34921b43 40 join_nonempty,
48416bc4 41 js_to_json,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
4dfbf869 49 parse_qs,
dca3ff4a 50 qualities,
3995d37d 51 remove_start,
cf7e015f 52 smuggle_url,
dbdaaa23 53 str_or_none,
c93d53f5 54 str_to_int,
f3aa3c3f 55 strftime_or_none,
7c365c21 56 traverse_obj,
556dbe7f 57 try_get,
c5e8d7af
PH
58 unescapeHTML,
59 unified_strdate,
f0d785d3 60 unified_timestamp,
cf7e015f 61 unsmuggle_url,
8bdd16b4 62 update_url_query,
21c340b8 63 url_or_none,
fe93e2c4 64 urljoin,
7c365c21 65 variadic,
c5e8d7af
PH
66)
67
962ffcf8 68# any clients starting with _ cannot be explicitly requested by the user
000c15a4 69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
a0c830f4 75 'clientVersion': '2.20220801.00.00',
000c15a4 76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 85 'clientVersion': '1.20220731.00.00',
000c15a4 86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
a0c830f4 96 'clientVersion': '1.20220727.01.00',
000c15a4 97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
e7e94f2a 101 'web_creator': {
18c7683d 102 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
a0c830f4 106 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
000c15a4 111 'android': {
18c7683d 112 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
50ac0e54 116 'clientVersion': '17.31.35',
117 'androidSdkVersion': 30,
118 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 119 }
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 122 'REQUIRE_JS_PLAYER': False
000c15a4 123 },
124 'android_embedded': {
18c7683d 125 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 129 'clientVersion': '17.31.35',
130 'androidSdkVersion': 30,
131 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 132 },
133 },
b6de707d 134 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
135 'REQUIRE_JS_PLAYER': False
000c15a4 136 },
137 'android_music': {
18c7683d 138 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 139 'INNERTUBE_CONTEXT': {
140 'client': {
141 'clientName': 'ANDROID_MUSIC',
a0c830f4 142 'clientVersion': '5.16.51',
50ac0e54 143 'androidSdkVersion': 30,
144 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 145 }
146 },
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 148 'REQUIRE_JS_PLAYER': False
000c15a4 149 },
e7e94f2a 150 'android_creator': {
18c7683d 151 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
50ac0e54 155 'clientVersion': '22.30.100',
156 'androidSdkVersion': 30,
157 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
158 },
159 },
b6de707d 160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
e7e94f2a 162 },
18c7683d 163 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 165 'ios': {
18c7683d 166 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
224b5a35 170 'clientVersion': '17.33.2',
18c7683d 171 'deviceModel': 'iPhone14,3',
224b5a35 172 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 173 }
174 },
b6de707d 175 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
176 'REQUIRE_JS_PLAYER': False
000c15a4 177 },
178 'ios_embedded': {
000c15a4 179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 182 'clientVersion': '17.33.2',
18c7683d 183 'deviceModel': 'iPhone14,3',
224b5a35 184 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 185 },
186 },
b6de707d 187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
000c15a4 189 },
190 'ios_music': {
18c7683d 191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
224b5a35
SF
195 'clientVersion': '5.21',
196 'deviceModel': 'iPhone14,3',
197 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 198 },
199 },
b6de707d 200 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
201 'REQUIRE_JS_PLAYER': False
000c15a4 202 },
e7e94f2a
D
203 'ios_creator': {
204 'INNERTUBE_CONTEXT': {
205 'client': {
206 'clientName': 'IOS_CREATOR',
224b5a35
SF
207 'clientVersion': '22.33.101',
208 'deviceModel': 'iPhone14,3',
209 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
210 },
211 },
b6de707d 212 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
213 'REQUIRE_JS_PLAYER': False
e7e94f2a 214 },
3619f78d 215 # mweb has 'ultralow' formats
216 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 217 'mweb': {
18c7683d 218 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 219 'INNERTUBE_CONTEXT': {
220 'client': {
221 'clientName': 'MWEB',
a0c830f4 222 'clientVersion': '2.20220801.00.00',
000c15a4 223 }
224 },
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
226 },
227 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
228 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
229 'tv_embedded': {
230 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
231 'INNERTUBE_CONTEXT': {
232 'client': {
233 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
234 'clientVersion': '2.0',
235 },
236 },
237 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
238 },
000c15a4 239}
240
241
e7870111
D
242def _split_innertube_client(client_name):
243 variant, *base = client_name.rsplit('.', 1)
244 if base:
245 return variant, base[0], variant
246 base, *variant = client_name.split('_', 1)
247 return client_name, base, variant[0] if variant else None
248
249
000c15a4 250def build_innertube_clients():
2e4cacd0 251 THIRD_PARTY = {
e7870111 252 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 253 }
e7870111 254 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 255 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 256
257 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 258 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 259 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 260 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 261 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 262
e7870111 263 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 264 ytcfg['priority'] = 10 * priority(base_client)
265
e48b3875 266 if not variant:
e7870111
D
267 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
268 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
269 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 embedscreen['priority'] -= 3
271 elif variant == 'embedded':
e48b3875 272 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 273 ytcfg['priority'] -= 2
e48b3875 274 else:
000c15a4 275 ytcfg['priority'] -= 3
276
277
278build_innertube_clients()
279
280
c26f9b99 281class BadgeType(enum.Enum):
282 AVAILABILITY_UNLISTED = enum.auto()
283 AVAILABILITY_PRIVATE = enum.auto()
284 AVAILABILITY_PUBLIC = enum.auto()
285 AVAILABILITY_PREMIUM = enum.auto()
286 AVAILABILITY_SUBSCRIPTION = enum.auto()
287 LIVE_NOW = enum.auto()
288
289
de7f3446 290class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 291 """Provide base functions for Youtube extractors"""
e00eb564 292
3462ffa8 293 _RESERVED_NAMES = (
3cd786db 294 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 295 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 296 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 297 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 298
3619f78d 299 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
300
52efa4b3 301 # _NETRC_MACHINE = 'youtube'
3619f78d 302
b2e8bc1b
JMF
303 # If True it will raise an error if no login info is provided
304 _LOGIN_REQUIRED = False
305
d9190e44
RH
306 _INVIDIOUS_SITES = (
307 # invidious-redirect websites
308 r'(?:www\.)?redirect\.invidious\.io',
309 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 310 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
311 r'(?:www\.)?invidious\.pussthecat\.org',
312 r'(?:www\.)?invidious\.zee\.li',
313 r'(?:www\.)?invidious\.ethibox\.fr',
314 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
315 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
316 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
317 # youtube-dl invidious instances list
318 r'(?:(?:www|no)\.)?invidiou\.sh',
319 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
320 r'(?:www\.)?invidious\.kabi\.tk',
321 r'(?:www\.)?invidious\.mastodon\.host',
322 r'(?:www\.)?invidious\.zapashcanon\.fr',
323 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
324 r'(?:www\.)?invidious\.tinfoil-hat\.net',
325 r'(?:www\.)?invidious\.himiko\.cloud',
326 r'(?:www\.)?invidious\.reallyancient\.tech',
327 r'(?:www\.)?invidious\.tube',
328 r'(?:www\.)?invidiou\.site',
329 r'(?:www\.)?invidious\.site',
330 r'(?:www\.)?invidious\.xyz',
331 r'(?:www\.)?invidious\.nixnet\.xyz',
332 r'(?:www\.)?invidious\.048596\.xyz',
333 r'(?:www\.)?invidious\.drycat\.fr',
334 r'(?:www\.)?inv\.skyn3t\.in',
335 r'(?:www\.)?tube\.poal\.co',
336 r'(?:www\.)?tube\.connect\.cafe',
337 r'(?:www\.)?vid\.wxzm\.sx',
338 r'(?:www\.)?vid\.mint\.lgbt',
339 r'(?:www\.)?vid\.puffyan\.us',
340 r'(?:www\.)?yewtu\.be',
341 r'(?:www\.)?yt\.elukerio\.org',
342 r'(?:www\.)?yt\.lelux\.fi',
343 r'(?:www\.)?invidious\.ggc-project\.de',
344 r'(?:www\.)?yt\.maisputain\.ovh',
345 r'(?:www\.)?ytprivate\.com',
346 r'(?:www\.)?invidious\.13ad\.de',
347 r'(?:www\.)?invidious\.toot\.koeln',
348 r'(?:www\.)?invidious\.fdn\.fr',
349 r'(?:www\.)?watch\.nettohikari\.com',
350 r'(?:www\.)?invidious\.namazso\.eu',
351 r'(?:www\.)?invidious\.silkky\.cloud',
352 r'(?:www\.)?invidious\.exonip\.de',
353 r'(?:www\.)?invidious\.riverside\.rocks',
354 r'(?:www\.)?invidious\.blamefran\.net',
355 r'(?:www\.)?invidious\.moomoo\.de',
356 r'(?:www\.)?ytb\.trom\.tf',
357 r'(?:www\.)?yt\.cyberhost\.uk',
358 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
359 r'(?:www\.)?qklhadlycap4cnod\.onion',
360 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
361 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
362 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
363 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
364 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
365 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
366 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
367 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
368 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
369 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
370 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
371 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 372 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 373 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 374 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
375 r'(?:www\.)?watch\.whatever\.social',
376 r'(?:www\.)?piped\.garudalinux\.org',
377 r'(?:www\.)?piped\.rivo\.lol',
378 r'(?:www\.)?piped-libre\.kavin\.rocks',
379 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 380 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
381 r'(?:www\.)?il\.ax',
382 r'(?:www\.)?piped\.esmailelbob\.xyz',
383 r'(?:www\.)?piped\.projectsegfau\.lt',
384 r'(?:www\.)?piped\.privacydev\.net',
385 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
386 r'(?:www\.)?piped\.smnz\.de',
387 r'(?:www\.)?piped\.adminforge\.de',
388 r'(?:www\.)?watch\.whatevertinfoil\.de',
389 r'(?:www\.)?piped\.qdi\.fi',
d9190e44
RH
390 )
391
c26f9b99 392 # extracted from account/account_menu ep
393 # XXX: These are the supported YouTube UI and API languages,
394 # which is slightly different from languages supported for translation in YouTube studio
395 _SUPPORTED_LANG_CODES = [
396 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
397 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
398 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
399 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
400 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
401 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
402 ]
403
a057779d 404 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
405
c26f9b99 406 @functools.cached_property
407 def _preferred_lang(self):
408 """
409 Returns a language code supported by YouTube for the user preferred language.
410 Returns None if no preferred language set.
411 """
412 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
413 if not preferred_lang:
414 return
415 if preferred_lang not in self._SUPPORTED_LANG_CODES:
416 raise ExtractorError(
417 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
418 expected=True)
419 elif preferred_lang != 'en':
420 self.report_warning(
421 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
422 return preferred_lang
423
cce889b9 424 def _initialize_consent(self):
425 cookies = self._get_cookies('https://www.youtube.com/')
426 if cookies.get('__Secure-3PSID'):
427 return
428 consent_id = None
429 consent = cookies.get('CONSENT')
430 if consent:
431 if 'YES' in consent.value:
432 return
433 consent_id = self._search_regex(
434 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
435 if not consent_id:
436 consent_id = random.randint(100, 999)
437 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 438
f3aa3c3f 439 def _initialize_pref(self):
440 cookies = self._get_cookies('https://www.youtube.com/')
441 pref_cookie = cookies.get('PREF')
442 pref = {}
443 if pref_cookie:
444 try:
14f25df2 445 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 446 except ValueError:
447 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 448 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 449 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 450
b2e8bc1b 451 def _real_initialize(self):
f3aa3c3f 452 self._initialize_pref()
cce889b9 453 self._initialize_consent()
a25bca9f 454 self._check_login_required()
455
456 def _check_login_required(self):
24146491 457 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 458 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 459
b7c47b74 460 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
461 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 462
000c15a4 463 def _get_default_ytcfg(self, client='web'):
464 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 465
000c15a4 466 def _get_innertube_host(self, client='web'):
467 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 468
000c15a4 469 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 470 # try_get but with fallback to default ytcfg client values when present
471 _func = lambda y: try_get(y, getter, expected_type)
472 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
473
000c15a4 474 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 475 return self._ytcfg_get_safe(
476 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 477 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 478
000c15a4 479 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 480 return self._ytcfg_get_safe(
481 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 482 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 483
2ae778b8 484 def _select_api_hostname(self, req_api_hostname, default_client=None):
485 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
486 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
487
000c15a4 488 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 489 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 490
000c15a4 491 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 492 context = get_first(
493 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 494 # Enforce language and tz for extraction
495 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 496 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 497 return context
498
cf87314d 499 _SAPISID = None
500
109dd3b2 501 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 502 time_now = round(time.time())
cf87314d 503 if self._SAPISID is None:
504 yt_cookies = self._get_cookies('https://www.youtube.com')
505 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
506 # See: https://github.com/yt-dlp/yt-dlp/issues/393
507 sapisid_cookie = dict_get(
508 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
509 if sapisid_cookie and sapisid_cookie.value:
510 self._SAPISID = sapisid_cookie.value
511 self.write_debug('Extracted SAPISID cookie')
512 # SAPISID cookie is required if not already present
513 if not yt_cookies.get('SAPISID'):
514 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
515 self._set_cookie(
516 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
517 else:
518 self._SAPISID = False
519 if not self._SAPISID:
520 return None
1974e99f 521 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
522 sapisidhash = hashlib.sha1(
86e5f3ed 523 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 524 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
525
526 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 527 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 528 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 529
109dd3b2 530 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 531 data.update(query)
11f9be09 532 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 533 real_headers.update({'content-type': 'application/json'})
534 if headers:
535 real_headers.update(headers)
2ae778b8 536 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
537 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 538 return self._download_json(
2ae778b8 539 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 540 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 541 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 542 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 543
65141660 544 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
545 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 546
99e9e001 547 @staticmethod
548 def _extract_session_index(*data):
549 """
550 Index of current account in account list.
551 See: https://github.com/yt-dlp/yt-dlp/pull/519
552 """
553 for ytcfg in data:
554 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
555 if session_index is not None:
556 return session_index
557
558 # Deprecated?
559 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 560 if ytcfg:
14f25df2 561 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
562 if token:
563 return token
99e9e001 564 if webpage:
565 return self._search_regex(
566 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
567 'identity token', default=None, fatal=False)
a1c5d2ca
M
568
569 @staticmethod
fe93e2c4 570 def _extract_account_syncid(*args):
8ea3f7b9 571 """
572 Extract syncId required to download private playlists of secondary channels
fe93e2c4 573 @params response and/or ytcfg
8ea3f7b9 574 """
fe93e2c4 575 for data in args:
576 # ytcfg includes channel_syncid if on secondary channel
14f25df2 577 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 578 if delegated_sid:
579 return delegated_sid
580 sync_ids = (try_get(
581 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 582 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 583 if len(sync_ids) >= 2 and sync_ids[1]:
584 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
585 # and just "user_syncid||" for primary channel. We only want the channel_syncid
586 return sync_ids[0]
a1c5d2ca 587
ac56cf38 588 @staticmethod
589 def _extract_visitor_data(*args):
590 """
591 Extracts visitorData from an API response or ytcfg
592 Appears to be used to track session state
593 """
9222c381 594 return get_first(
6c73052c 595 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 596 expected_type=str)
ac56cf38 597
2762dbb1 598 @functools.cached_property
99e9e001 599 def is_authenticated(self):
600 return bool(self._generate_sapisidhash_header())
601
11f9be09 602 def extract_ytcfg(self, video_id, webpage):
8c54a305 603 if not webpage:
604 return {}
29f7c58a 605 return self._parse_json(
606 self._search_regex(
607 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 608 default='{}'), video_id, fatal=False) or {}
609
11f9be09 610 def generate_api_headers(
99e9e001 611 self, *, ytcfg=None, account_syncid=None, session_index=None,
612 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
613
2ae778b8 614 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 615 headers = {
14f25df2 616 'X-YouTube-Client-Name': str(
11f9be09 617 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
618 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 619 'Origin': origin,
620 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
621 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 622 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
623 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 624 }
625 if session_index is None:
314ee305 626 session_index = self._extract_session_index(ytcfg)
627 if account_syncid or session_index is not None:
628 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 629
109dd3b2 630 auth = self._generate_sapisidhash_header(origin)
f4f751af 631 if auth is not None:
632 headers['Authorization'] = auth
109dd3b2 633 headers['X-Origin'] = origin
7a32c70d 634 return filter_dict(headers)
29f7c58a 635
a25bca9f 636 def _download_ytcfg(self, client, video_id):
637 url = {
638 'web': 'https://www.youtube.com',
639 'web_music': 'https://music.youtube.com',
640 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
641 }.get(client)
642 if not url:
643 return {}
644 webpage = self._download_webpage(
645 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
646 return self.extract_ytcfg(video_id, webpage) or {}
647
2d6659b9 648 @staticmethod
649 def _build_api_continuation_query(continuation, ctp=None):
650 query = {
651 'continuation': continuation
652 }
653 # TODO: Inconsistency with clickTrackingParams.
654 # Currently we have a fixed ctp contained within context (from ytcfg)
655 # and a ctp in root query for continuation.
656 if ctp:
657 query['clickTracking'] = {'clickTrackingParams': ctp}
658 return query
659
2d6659b9 660 @classmethod
661 def _extract_next_continuation_data(cls, renderer):
662 next_continuation = try_get(
663 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
664 lambda x: x['continuation']['reloadContinuationData']), dict)
665 if not next_continuation:
666 return
667 continuation = next_continuation.get('continuation')
668 if not continuation:
669 return
670 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 671 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 672
673 @classmethod
674 def _extract_continuation_ep_data(cls, continuation_ep: dict):
675 if isinstance(continuation_ep, dict):
676 continuation = try_get(
14f25df2 677 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 678 if not continuation:
679 return
680 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 681 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 682
683 @classmethod
684 def _extract_continuation(cls, renderer):
685 next_continuation = cls._extract_next_continuation_data(renderer)
686 if next_continuation:
687 return next_continuation
fe93e2c4 688
7a32c70d 689 return traverse_obj(renderer, (
690 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
691 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
692 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 693
fe93e2c4 694 @classmethod
695 def _extract_alerts(cls, data):
109dd3b2 696 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
697 if not isinstance(alert_dict, dict):
698 continue
699 for alert in alert_dict.values():
700 alert_type = alert.get('type')
701 if not alert_type:
702 continue
052e1350 703 message = cls._get_text(alert, 'text')
109dd3b2 704 if message:
705 yield alert_type, message
706
c0ac49bc 707 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 708 errors, warnings = [], []
109dd3b2 709 for alert_type, alert_message in alerts:
641ad5d8 710 if alert_type.lower() == 'error' and fatal:
109dd3b2 711 errors.append([alert_type, alert_message])
a057779d 712 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 713 warnings.append([alert_type, alert_message])
714
715 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 716 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 717 if errors:
718 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
719
720 def _extract_and_report_alerts(self, data, *args, **kwargs):
721 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
722
47193e02 723 def _extract_badges(self, renderer: dict):
c26f9b99 724 privacy_icon_map = {
725 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
726 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
727 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
728 }
729
730 badge_style_map = {
731 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
732 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
733 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
734 }
735
736 label_map = {
737 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
738 'private': BadgeType.AVAILABILITY_PRIVATE,
739 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
740 'live': BadgeType.LIVE_NOW,
741 'premium': BadgeType.AVAILABILITY_PREMIUM
742 }
743
744 badges = []
745 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
746 badge_type = (
747 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
748 or badge_style_map.get(traverse_obj(badge, 'style'))
749 )
750 if badge_type:
751 badges.append({'type': badge_type})
752 continue
753
754 # fallback, won't work in some languages
755 label = traverse_obj(badge, 'label', expected_type=str, default='')
756 for match, label_badge_type in label_map.items():
757 if match in label.lower():
758 badges.append({'type': badge_type})
759 continue
760
47193e02 761 return badges
762
c26f9b99 763 @staticmethod
764 def _has_badge(badges, badge_type):
765 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
766
47193e02 767 @staticmethod
052e1350 768 def _get_text(data, *path_list, max_runs=None):
769 for path in path_list or [None]:
770 if path is None:
771 obj = [data]
772 else:
773 obj = traverse_obj(data, path, default=[])
774 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
775 obj = [obj]
776 for item in obj:
14f25df2 777 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 778 if text:
779 return text
780 runs = try_get(item, lambda x: x['runs'], list) or []
781 if not runs and isinstance(item, list):
782 runs = item
783
784 runs = runs[:min(len(runs), max_runs or len(runs))]
785 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
786 if text:
787 return text
47193e02 788
f0d785d3 789 def _get_count(self, data, *path_list):
790 count_text = self._get_text(data, *path_list) or ''
791 count = parse_count(count_text)
792 if count is None:
793 count = str_to_int(
794 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
795 return count
796
a709d873 797 @staticmethod
798 def _extract_thumbnails(data, *path_list):
799 """
800 Extract thumbnails from thumbnails dict
801 @param path_list: path list to level that contains 'thumbnails' key
802 """
803 thumbnails = []
804 for path in path_list or [()]:
805 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
806 thumbnail_url = url_or_none(thumbnail.get('url'))
807 if not thumbnail_url:
808 continue
809 # Sometimes youtube gives a wrong thumbnail URL. See:
810 # https://github.com/yt-dlp/yt-dlp/issues/233
811 # https://github.com/ytdl-org/youtube-dl/issues/28023
812 if 'maxresdefault' in thumbnail_url:
813 thumbnail_url = thumbnail_url.split('?')[0]
814 thumbnails.append({
815 'url': thumbnail_url,
816 'height': int_or_none(thumbnail.get('height')),
817 'width': int_or_none(thumbnail.get('width')),
818 })
819 return thumbnails
820
f3aa3c3f 821 @staticmethod
822 def extract_relative_time(relative_time_text):
823 """
824 Extracts a relative time from string and converts to dt object
f0d785d3 825 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 826 """
f0d785d3 827 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 828 if mobj:
f0d785d3 829 start = mobj.group('start')
830 if start:
831 return datetime_from_str(start)
f3aa3c3f 832 try:
f0d785d3 833 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 834 except ValueError:
835 return None
836
c26f9b99 837 def _parse_time_text(self, text):
838 if not text:
839 return
f3aa3c3f 840 dt = self.extract_relative_time(text)
841 timestamp = None
842 if isinstance(dt, datetime.datetime):
843 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 844
845 if timestamp is None:
846 timestamp = (
847 unified_timestamp(text) or unified_timestamp(
848 self._search_regex(
17322130 849 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 850 text.lower(), 'time text', default=None)))
f0d785d3 851
c26f9b99 852 if text and timestamp is None and self._preferred_lang in (None, 'en'):
853 self.report_warning(
854 f'Cannot parse localized time text "{text}"', only_once=True)
855 return timestamp
f3aa3c3f 856
109dd3b2 857 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
858 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 859 default_client='web'):
be5c1ae8 860 for retry in self.RetryManager():
109dd3b2 861 try:
862 response = self._call_api(
863 ep=ep, fatal=True, headers=headers,
be5c1ae8 864 video_id=item_id, query=query, note=note,
109dd3b2 865 context=self._extract_context(ytcfg, default_client),
866 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 867 api_hostname=api_hostname, default_client=default_client)
109dd3b2 868 except ExtractorError as e:
be5c1ae8 869 if not isinstance(e.cause, network_exceptions):
870 return self._error_or_warning(e, fatal=fatal)
871 elif not isinstance(e.cause, urllib.error.HTTPError):
872 retry.error = e
873 continue
109dd3b2 874
be5c1ae8 875 first_bytes = e.cause.read(512)
876 if not is_html(first_bytes):
877 yt_error = try_get(
878 self._parse_json(
879 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
880 lambda x: x['error']['message'], str)
881 if yt_error:
882 self._report_alerts([('ERROR', yt_error)], fatal=False)
883 # Downloading page may result in intermittent 5xx HTTP error
884 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
885 # We also want to catch all other network exceptions since errors in later pages can be troublesome
886 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
887 if e.cause.code not in (403, 429):
888 retry.error = e
889 continue
890 return self._error_or_warning(e, fatal=fatal)
891
892 try:
893 self._extract_and_report_alerts(response, only_once=True)
894 except ExtractorError as e:
895 # YouTube servers may return errors we want to retry on in a 200 OK response
896 # See: https://github.com/yt-dlp/yt-dlp/issues/839
897 if 'unknown error' in e.msg.lower():
898 retry.error = e
899 continue
900 return self._error_or_warning(e, fatal=fatal)
901 # Youtube sometimes sends incomplete data
902 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
903 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 904 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 905 continue
906
907 return response
109dd3b2 908
9297939e 909 @staticmethod
910 def is_music_url(url):
911 return re.match(r'https?://music\.youtube\.com/', url) is not None
912
30a074c2 913 def _extract_video(self, renderer):
914 video_id = renderer.get('videoId')
4dc23a80
M
915
916 reel_header_renderer = traverse_obj(renderer, (
917 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
918 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
919
920 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 921 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
922
923 duration = int_or_none(renderer.get('lengthSeconds'))
924 if duration is None:
925 duration = parse_duration(self._get_text(
926 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 927 if duration is None:
4dc23a80 928 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 929 duration = parse_duration(self._search_regex(
930 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
931 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
932 video_id, default=None, group='duration'))
933
f3aa3c3f 934 channel_id = traverse_obj(
a44ca5a4 935 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
936 expected_type=str, get_all=False)
4dc23a80
M
937 if not channel_id:
938 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
939
f3aa3c3f 940 overlay_style = traverse_obj(
a44ca5a4 941 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
942 get_all=False, expected_type=str)
f3aa3c3f 943 badges = self._extract_badges(renderer)
4dc23a80 944
fd2ad7cb 945 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 946 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
947 expected_type=str)) or ''
fd2ad7cb 948 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 949 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 950 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 951
4dc23a80
M
952 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
953 or self._get_text(reel_header_renderer, 'timestampText') or '')
954 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
955
867c66ff
M
956 live_status = (
957 'is_upcoming' if scheduled_timestamp is not None
958 else 'was_live' if 'streamed' in time_text.lower()
959 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
960 else None)
961
4dc23a80
M
962 # videoInfo is a string like '50K views • 10 years ago'.
963 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
964 view_count = (0 if 'no views' in view_count_text.lower()
965 else self._get_count({'simpleText': view_count_text}))
966 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
967
30a074c2 968 return {
39ed931e 969 '_type': 'url',
30a074c2 970 'ie_key': YoutubeIE.ie_key(),
971 'id': video_id,
fd2ad7cb 972 'url': url,
30a074c2 973 'title': title,
974 'description': description,
975 'duration': duration,
f3aa3c3f 976 'channel_id': channel_id,
4dc23a80
M
977 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
978 or self._get_text(reel_header_renderer, 'channelTitleText')),
979 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
980 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 981 'timestamp': (self._parse_time_text(time_text)
982 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
983 else None),
f3aa3c3f 984 'release_timestamp': scheduled_timestamp,
c26f9b99 985 'availability':
986 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
987 else self._availability(
988 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
989 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
990 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 991 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 992 view_count_field: view_count,
e63faa10 993 'live_status': live_status
30a074c2 994 }
995
0c148415 996
360e1ca5 997class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 998 IE_DESC = 'YouTube'
cb7dfeea 999 _VALID_URL = r"""(?x)^
c5e8d7af 1000 (
edb53e2d 1001 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1002 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1003 (?:www\.)?deturl\.com/www\.youtube\.com|
1004 (?:www\.)?pwnyoutube\.com|
1005 (?:www\.)?hooktube\.com|
1006 (?:www\.)?yourepeat\.com|
1007 tube\.majestyc\.net|
1008 %(invidious)s|
1009 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1010 (?:.*?\#/)? # handle anchor (#/) redirect urls
1011 (?: # the various things that can precede the ID:
b6ce9bb0 1012 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1013 |(?: # or the v= param in all its forms
f7000f3a 1014 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1015 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1016 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1017 v=
1018 )
f4b05232 1019 ))
cbaed4bb
S
1020 |(?:
1021 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1022 vid\.plus| # or vid.plus/xxxx
1023 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1024 %(invidious)s
cbaed4bb 1025 )/
edb53e2d 1026 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1027 )
c5e8d7af 1028 )? # all until now is optional -> you can pass the naked ID
201c1459 1029 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1030 (?(1).+)? # if we found the ID, everything can follow
9297939e 1031 (?:\#|$)""" % {
d9190e44 1032 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1033 }
7c6eb424 1034 _EMBED_REGEX = [
1035 r'''(?x)
1036 (?:
0ca0f881 1037 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1038 data-video-url=|
1039 <embed[^>]+?src=|
1040 embedSWF\(?:\s*|
1041 <object[^>]+data=|
1042 new\s+SWFObject\(
1043 )
1044 (["\'])
1045 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1046 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1047 \1''',
1048 # https://wordpress.org/plugins/lazy-load-for-videos/
1049 r'''(?xs)
1050 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1051 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1052 ]
171a31db 1053 _RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore
7c6eb424 1054
e40c758c 1055 _PLAYER_INFO_RE = (
cc2db878 1056 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1057 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1058 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1059 )
2c62dc26 1060 _formats = {
c2d3cb4c 1061 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1062 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1063 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1064 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1065 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1066 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1067 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1068 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1069 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1070 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1071 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1072 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1073 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1074 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1075 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1076 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1077 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1078 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1079
1080
1081 # 3D videos
c2d3cb4c 1082 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1083 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1084 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1085 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1086 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1087 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1088 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1089
96fb5605 1090 # Apple HTTP Live Streaming
11f12195 1091 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1092 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1093 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1094 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1095 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1096 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1097 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1098 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1099
1100 # DASH mp4 video
d23028a8
S
1101 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1102 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1103 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1104 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1106 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1107 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1109 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1110 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1111 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1112 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1113
f6f1fc92 1114 # Dash mp4 audio
d23028a8
S
1115 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1116 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1117 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1118 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1119 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1120 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1121 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1122
1123 # Dash webm
d23028a8
S
1124 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1125 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1126 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1127 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1131 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1132 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1133 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1134 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1139 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1140 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1142 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1143 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1144 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1145 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1146
1147 # Dash webm audio
d23028a8
S
1148 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1149 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1150
0857baad 1151 # Dash webm audio with opus inside
d23028a8
S
1152 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1153 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1154 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1155
ce6b9a2d
PH
1156 # RTMP (unnamed)
1157 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1158
1159 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1160 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1161 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1162 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1163 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1164 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1165 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1166 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1167 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1168 }
29f7c58a 1169 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1170
fd5c4aab
S
1171 _GEO_BYPASS = False
1172
78caa52a 1173 IE_NAME = 'youtube'
2eb88d95
PH
1174 _TESTS = [
1175 {
2d3d2997 1176 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1177 'info_dict': {
1178 'id': 'BaW_jenozKc',
1179 'ext': 'mp4',
3867038a 1180 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1181 'uploader': 'Philipp Hagemeister',
1182 'uploader_id': 'phihag',
ec85ded8 1183 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1184 'channel': 'Philipp Hagemeister',
dd4c4492
S
1185 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1186 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1187 'upload_date': '20121002',
ff9f925b 1188 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1189 'categories': ['Science & Technology'],
3867038a 1190 'tags': ['youtube-dl'],
556dbe7f 1191 'duration': 10,
dbdaaa23 1192 'view_count': int,
3e7c1224 1193 'like_count': int,
ff9f925b 1194 'availability': 'public',
1195 'playable_in_embed': True,
1196 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1197 'live_status': 'not_live',
1198 'age_limit': 0,
7c80519c 1199 'start_time': 1,
297a564b 1200 'end_time': 9,
12a1b225 1201 'comment_count': int,
6c73052c 1202 'channel_follower_count': int
2eb88d95 1203 }
0e853ca4 1204 },
fccd3771 1205 {
4bc3a23e
PH
1206 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1207 'note': 'Embed-only video (#1746)',
1208 'info_dict': {
1209 'id': 'yZIXLfi8CZQ',
1210 'ext': 'mp4',
1211 'upload_date': '20120608',
1212 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1213 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1214 'uploader': 'SET India',
94bfcd23 1215 'uploader_id': 'setindia',
ec85ded8 1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1217 'age_limit': 18,
545cc85d 1218 },
1219 'skip': 'Private video',
fccd3771 1220 },
11b56058 1221 {
8bdd16b4 1222 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1223 'note': 'Use the first video ID in the URL',
1224 'info_dict': {
1225 'id': 'BaW_jenozKc',
1226 'ext': 'mp4',
3867038a 1227 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1228 'uploader': 'Philipp Hagemeister',
1229 'uploader_id': 'phihag',
ec85ded8 1230 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1231 'channel': 'Philipp Hagemeister',
1232 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1233 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1234 'upload_date': '20121002',
976ae3ea 1235 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1236 'categories': ['Science & Technology'],
3867038a 1237 'tags': ['youtube-dl'],
556dbe7f 1238 'duration': 10,
dbdaaa23 1239 'view_count': int,
11b56058 1240 'like_count': int,
976ae3ea 1241 'availability': 'public',
1242 'playable_in_embed': True,
1243 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1244 'live_status': 'not_live',
1245 'age_limit': 0,
12a1b225 1246 'comment_count': int,
6c73052c 1247 'channel_follower_count': int
34a7de29
S
1248 },
1249 'params': {
1250 'skip_download': True,
1251 },
11b56058 1252 },
dd27fd17 1253 {
2d3d2997 1254 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1255 'note': '256k DASH audio (format 141) via DASH manifest',
1256 'info_dict': {
1257 'id': 'a9LDPn-MO4I',
1258 'ext': 'm4a',
1259 'upload_date': '20121002',
1260 'uploader_id': '8KVIDEO',
ec85ded8 1261 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1262 'description': '',
1263 'uploader': '8KVIDEO',
1264 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1265 },
4bc3a23e
PH
1266 'params': {
1267 'youtube_include_dash_manifest': True,
1268 'format': '141',
4919603f 1269 },
de3c7fe0 1270 'skip': 'format 141 not served anymore',
dd27fd17 1271 },
8bdd16b4 1272 # DASH manifest with encrypted signature
1273 {
1274 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1275 'info_dict': {
1276 'id': 'IB3lcPjvWLA',
1277 'ext': 'm4a',
1278 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1279 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1280 'duration': 244,
1281 'uploader': 'AfrojackVEVO',
1282 'uploader_id': 'AfrojackVEVO',
1283 'upload_date': '20131011',
cc2db878 1284 'abr': 129.495,
976ae3ea 1285 'like_count': int,
1286 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1287 'playable_in_embed': True,
1288 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1289 'view_count': int,
1290 'track': 'The Spark',
1291 'live_status': 'not_live',
1292 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1293 'channel': 'Afrojack',
1294 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1295 'tags': 'count:19',
1296 'availability': 'public',
1297 'categories': ['Music'],
1298 'age_limit': 0,
1299 'alt_title': 'The Spark',
6c73052c 1300 'channel_follower_count': int
8bdd16b4 1301 },
1302 'params': {
1303 'youtube_include_dash_manifest': True,
1304 'format': '141/bestaudio[ext=m4a]',
1305 },
1306 },
65c2fde2 1307 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1308 {
65c2fde2 1309 'note': 'Embed allowed age-gate video',
2d3d2997 1310 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1311 'info_dict': {
1312 'id': 'HtVdAasjOgU',
1313 'ext': 'mp4',
1314 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1315 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1316 'duration': 142,
c522adb1
JMF
1317 'uploader': 'The Witcher',
1318 'uploader_id': 'WitcherGame',
ec85ded8 1319 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1320 'upload_date': '20140605',
34952f09 1321 'age_limit': 18,
976ae3ea 1322 'categories': ['Gaming'],
1323 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1324 'availability': 'needs_auth',
1325 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1326 'like_count': int,
1327 'channel': 'The Witcher',
1328 'live_status': 'not_live',
1329 'tags': 'count:17',
1330 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1331 'playable_in_embed': True,
1332 'view_count': int,
6c73052c 1333 'channel_follower_count': int
c522adb1
JMF
1334 },
1335 },
65c2fde2 1336 {
1337 'note': 'Age-gate video with embed allowed in public site',
1338 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1339 'info_dict': {
1340 'id': 'HsUATh_Nc2U',
1341 'ext': 'mp4',
1342 'title': 'Godzilla 2 (Official Video)',
1343 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1344 'upload_date': '20200408',
1345 'uploader_id': 'FlyingKitty900',
1346 'uploader': 'FlyingKitty',
1347 'age_limit': 18,
976ae3ea 1348 'availability': 'needs_auth',
1349 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1350 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1351 'channel': 'FlyingKitty',
1352 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1353 'view_count': int,
1354 'categories': ['Entertainment'],
1355 'live_status': 'not_live',
1356 'tags': ['Flyingkitty', 'godzilla 2'],
1357 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1358 'like_count': int,
1359 'duration': 177,
1360 'playable_in_embed': True,
6c73052c 1361 'channel_follower_count': int
65c2fde2 1362 },
1363 },
1364 {
1365 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1366 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1367 'info_dict': {
1368 'id': 'Tq92D6wQ1mg',
1369 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1370 'ext': 'mp4',
17322130 1371 'upload_date': '20191228',
65c2fde2 1372 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1373 'uploader': 'Projekt Melody',
1374 'description': 'md5:17eccca93a786d51bc67646756894066',
1375 'age_limit': 18,
976ae3ea 1376 'like_count': int,
1377 'availability': 'needs_auth',
1378 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1379 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1380 'view_count': int,
1381 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1382 'channel': 'Projekt Melody',
1383 'live_status': 'not_live',
1384 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1385 'playable_in_embed': True,
1386 'categories': ['Entertainment'],
1387 'duration': 106,
1388 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1389 'comment_count': int,
6c73052c 1390 'channel_follower_count': int
65c2fde2 1391 },
1392 },
1393 {
1394 'note': 'Non-Agegated non-embeddable video',
1395 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1396 'info_dict': {
1397 'id': 'MeJVWBSsPAY',
1398 'ext': 'mp4',
1399 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1400 'uploader': 'Herr Lurik',
1401 'uploader_id': 'st3in234',
1402 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1403 'upload_date': '20130730',
976ae3ea 1404 'track': 'Such mich find mich',
1405 'age_limit': 0,
1406 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1407 'like_count': int,
1408 'playable_in_embed': False,
1409 'creator': 'OOMPH!',
1410 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1411 'view_count': int,
1412 'alt_title': 'Such mich find mich',
1413 'duration': 210,
1414 'channel': 'Herr Lurik',
1415 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1416 'categories': ['Music'],
1417 'availability': 'public',
1418 'uploader_url': 'http://www.youtube.com/user/st3in234',
1419 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1420 'live_status': 'not_live',
1421 'artist': 'OOMPH!',
6c73052c 1422 'channel_follower_count': int
65c2fde2 1423 },
1424 },
1425 {
1426 'note': 'Non-bypassable age-gated video',
1427 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1428 'only_matching': True,
1429 },
8bdd16b4 1430 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1431 # YouTube Red ad is not captured for creator
1432 {
1433 'url': '__2ABJjxzNo',
1434 'info_dict': {
1435 'id': '__2ABJjxzNo',
1436 'ext': 'mp4',
1437 'duration': 266,
1438 'upload_date': '20100430',
1439 'uploader_id': 'deadmau5',
1440 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1441 'creator': 'deadmau5',
1442 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1443 'uploader': 'deadmau5',
1444 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1445 'alt_title': 'Some Chords',
976ae3ea 1446 'availability': 'public',
1447 'tags': 'count:14',
1448 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1449 'view_count': int,
1450 'live_status': 'not_live',
1451 'channel': 'deadmau5',
1452 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1453 'like_count': int,
1454 'track': 'Some Chords',
1455 'artist': 'deadmau5',
1456 'playable_in_embed': True,
1457 'age_limit': 0,
1458 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1459 'categories': ['Music'],
1460 'album': 'Some Chords',
6c73052c 1461 'channel_follower_count': int
8bdd16b4 1462 },
1463 'expected_warnings': [
1464 'DASH manifest missing',
1465 ]
1466 },
067aa17e 1467 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1468 {
1469 'url': 'lqQg6PlCWgI',
1470 'info_dict': {
1471 'id': 'lqQg6PlCWgI',
1472 'ext': 'mp4',
556dbe7f 1473 'duration': 6085,
90227264 1474 'upload_date': '20150827',
cbe2bd91 1475 'uploader_id': 'olympic',
ec85ded8 1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1477 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1478 'uploader': 'Olympics',
cbe2bd91 1479 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1480 'like_count': int,
1481 'release_timestamp': 1343767800,
1482 'playable_in_embed': True,
1483 'categories': ['Sports'],
1484 'release_date': '20120731',
1485 'channel': 'Olympics',
1486 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1487 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1488 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1489 'age_limit': 0,
1490 'availability': 'public',
1491 'live_status': 'was_live',
1492 'view_count': int,
1493 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1494 'channel_follower_count': int
cbe2bd91
PH
1495 },
1496 'params': {
1497 'skip_download': 'requires avconv',
e52a40ab 1498 }
cbe2bd91 1499 },
6271f1ca
PH
1500 # Non-square pixels
1501 {
1502 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1503 'info_dict': {
1504 'id': '_b-2C3KPAM0',
1505 'ext': 'mp4',
1506 'stretched_ratio': 16 / 9.,
556dbe7f 1507 'duration': 85,
6271f1ca
PH
1508 'upload_date': '20110310',
1509 'uploader_id': 'AllenMeow',
ec85ded8 1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1511 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1512 'uploader': '孫ᄋᄅ',
6271f1ca 1513 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1514 'playable_in_embed': True,
1515 'channel': '孫ᄋᄅ',
1516 'age_limit': 0,
1517 'tags': 'count:11',
1518 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1519 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1520 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1521 'view_count': int,
1522 'categories': ['People & Blogs'],
1523 'like_count': int,
1524 'live_status': 'not_live',
1525 'availability': 'unlisted',
12a1b225 1526 'comment_count': int,
6c73052c 1527 'channel_follower_count': int
6271f1ca 1528 },
06b491eb
S
1529 },
1530 # url_encoded_fmt_stream_map is empty string
1531 {
1532 'url': 'qEJwOuvDf7I',
1533 'info_dict': {
1534 'id': 'qEJwOuvDf7I',
f57b7835 1535 'ext': 'webm',
06b491eb
S
1536 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1537 'description': '',
1538 'upload_date': '20150404',
1539 'uploader_id': 'spbelect',
1540 'uploader': 'Наблюдатели Петербурга',
1541 },
1542 'params': {
1543 'skip_download': 'requires avconv',
e323cf3f
S
1544 },
1545 'skip': 'This live event has ended.',
06b491eb 1546 },
067aa17e 1547 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1548 {
1549 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1550 'info_dict': {
1551 'id': 'FIl7x6_3R5Y',
eb6793ba 1552 'ext': 'webm',
da77d856
S
1553 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1554 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1555 'duration': 220,
da77d856
S
1556 'upload_date': '20150625',
1557 'uploader_id': 'dorappi2000',
ec85ded8 1558 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1559 'uploader': 'dorappi2000',
eb6793ba 1560 'formats': 'mincount:31',
da77d856 1561 },
eb6793ba 1562 'skip': 'not actual anymore',
2ee8f5d8 1563 },
8a1a26ce
YCH
1564 # DASH manifest with segment_list
1565 {
1566 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1567 'md5': '8ce563a1d667b599d21064e982ab9e31',
1568 'info_dict': {
1569 'id': 'CsmdDsKjzN8',
1570 'ext': 'mp4',
17ee98e1 1571 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1572 'uploader': 'Airtek',
1573 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1574 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1575 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1576 },
1577 'params': {
1578 'youtube_include_dash_manifest': True,
1579 'format': '135', # bestvideo
be49068d
S
1580 },
1581 'skip': 'This live event has ended.',
2ee8f5d8 1582 },
cf7e015f
S
1583 {
1584 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1585 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1586 'info_dict': {
545cc85d 1587 'id': 'jvGDaLqkpTg',
1588 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1589 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1590 },
1591 'playlist': [{
1592 'info_dict': {
545cc85d 1593 'id': 'jvGDaLqkpTg',
cf7e015f 1594 'ext': 'mp4',
545cc85d 1595 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1596 'description': 'md5:e03b909557865076822aa169218d6a5d',
1597 'duration': 10643,
1598 'upload_date': '20161111',
1599 'uploader': 'Team PGP',
1600 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1601 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1602 },
1603 }, {
1604 'info_dict': {
545cc85d 1605 'id': '3AKt1R1aDnw',
cf7e015f 1606 'ext': 'mp4',
545cc85d 1607 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1608 'description': 'md5:e03b909557865076822aa169218d6a5d',
1609 'duration': 10991,
1610 'upload_date': '20161111',
1611 'uploader': 'Team PGP',
1612 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1614 },
1615 }, {
1616 'info_dict': {
545cc85d 1617 'id': 'RtAMM00gpVc',
cf7e015f 1618 'ext': 'mp4',
545cc85d 1619 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1620 'description': 'md5:e03b909557865076822aa169218d6a5d',
1621 'duration': 10995,
1622 'upload_date': '20161111',
1623 'uploader': 'Team PGP',
1624 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1625 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1626 },
1627 }, {
1628 'info_dict': {
545cc85d 1629 'id': '6N2fdlP3C5U',
cf7e015f 1630 'ext': 'mp4',
545cc85d 1631 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1632 'description': 'md5:e03b909557865076822aa169218d6a5d',
1633 'duration': 10990,
1634 'upload_date': '20161111',
1635 'uploader': 'Team PGP',
1636 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1637 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1638 },
1639 }],
1640 'params': {
1641 'skip_download': True,
1642 },
65c2fde2 1643 'skip': 'Not multifeed anymore',
cbaed4bb 1644 },
f9f49d87 1645 {
067aa17e 1646 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1647 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1648 'info_dict': {
1649 'id': 'gVfLd0zydlo',
1650 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1651 },
1652 'playlist_count': 2,
be49068d 1653 'skip': 'Not multifeed anymore',
f9f49d87 1654 },
cbaed4bb 1655 {
2d3d2997 1656 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1657 'only_matching': True,
0e49d9a6 1658 },
6d4fc66b 1659 {
2d3d2997 1660 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1661 'only_matching': True,
1662 },
0e49d9a6 1663 {
067aa17e 1664 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1665 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1666 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1667 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1668 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1669 'info_dict': {
1670 'id': 'lsguqyKfVQg',
1671 'ext': 'mp4',
1672 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1673 'alt_title': 'Dark Walk',
0e49d9a6 1674 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1675 'duration': 133,
0e49d9a6
LL
1676 'upload_date': '20151119',
1677 'uploader_id': 'IronSoulElf',
ec85ded8 1678 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1679 'uploader': 'IronSoulElf',
11f9be09 1680 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1681 'track': 'Dark Walk',
1682 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1683 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1684 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1685 'categories': ['Film & Animation'],
1686 'view_count': int,
1687 'live_status': 'not_live',
1688 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1689 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1690 'tags': 'count:13',
1691 'availability': 'public',
1692 'channel': 'IronSoulElf',
1693 'playable_in_embed': True,
1694 'like_count': int,
1695 'age_limit': 0,
6c73052c 1696 'channel_follower_count': int
0e49d9a6
LL
1697 },
1698 'params': {
1699 'skip_download': True,
1700 },
1701 },
61f92af1 1702 {
067aa17e 1703 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1704 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1705 'only_matching': True,
1706 },
313dfc45
LL
1707 {
1708 # Video with yt:stretch=17:0
1709 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1710 'info_dict': {
1711 'id': 'Q39EVAstoRM',
1712 'ext': 'mp4',
1713 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1714 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1715 'upload_date': '20151107',
1716 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1717 'uploader': 'CH GAMER DROID',
1718 },
1719 'params': {
1720 'skip_download': True,
1721 },
be49068d 1722 'skip': 'This video does not exist.',
313dfc45 1723 },
201c1459 1724 {
1725 # Video with incomplete 'yt:stretch=16:'
1726 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1727 'only_matching': True,
1728 },
7caf9830
S
1729 {
1730 # Video licensed under Creative Commons
1731 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1732 'info_dict': {
1733 'id': 'M4gD1WSo5mA',
1734 'ext': 'mp4',
1735 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1736 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1737 'duration': 721,
17322130 1738 'upload_date': '20150128',
7caf9830 1739 'uploader_id': 'BerkmanCenter',
ec85ded8 1740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1741 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1742 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1743 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1744 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1745 'like_count': int,
1746 'age_limit': 0,
1747 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1748 'channel': 'The Berkman Klein Center for Internet & Society',
1749 'availability': 'public',
1750 'view_count': int,
1751 'categories': ['Education'],
1752 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1753 'live_status': 'not_live',
1754 'playable_in_embed': True,
12a1b225 1755 'comment_count': int,
d5d1df8a 1756 'channel_follower_count': int,
1757 'chapters': list,
7caf9830
S
1758 },
1759 'params': {
1760 'skip_download': True,
1761 },
1762 },
fd050249
S
1763 {
1764 # Channel-like uploader_url
1765 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1766 'info_dict': {
1767 'id': 'eQcmzGIKrzg',
1768 'ext': 'mp4',
1769 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1770 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1771 'duration': 4060,
17322130 1772 'upload_date': '20151120',
eb6793ba 1773 'uploader': 'Bernie Sanders',
fd050249 1774 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1775 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1776 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1777 'playable_in_embed': True,
1778 'tags': 'count:12',
1779 'like_count': int,
1780 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1781 'age_limit': 0,
1782 'availability': 'public',
1783 'categories': ['News & Politics'],
1784 'channel': 'Bernie Sanders',
1785 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1786 'view_count': int,
1787 'live_status': 'not_live',
1788 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1789 'comment_count': int,
d5d1df8a 1790 'channel_follower_count': int,
1791 'chapters': list,
fd050249
S
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
1796 },
040ac686
S
1797 {
1798 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1799 'only_matching': True,
7f29cf54
S
1800 },
1801 {
067aa17e 1802 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1803 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1804 'only_matching': True,
6496ccb4
S
1805 },
1806 {
1807 # Rental video preview
1808 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1809 'info_dict': {
1810 'id': 'uGpuVWrhIzE',
1811 'ext': 'mp4',
1812 'title': 'Piku - Trailer',
1813 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1814 'upload_date': '20150811',
1815 'uploader': 'FlixMatrix',
1816 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1817 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1818 'license': 'Standard YouTube License',
1819 },
1820 'params': {
1821 'skip_download': True,
1822 },
eb6793ba 1823 'skip': 'This video is not available.',
022a5d66 1824 },
12afdc2a
S
1825 {
1826 # YouTube Red video with episode data
1827 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1828 'info_dict': {
1829 'id': 'iqKdEhx-dD4',
1830 'ext': 'mp4',
1831 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1832 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1833 'duration': 2085,
12afdc2a
S
1834 'upload_date': '20170118',
1835 'uploader': 'Vsauce',
1836 'uploader_id': 'Vsauce',
1837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1838 'series': 'Mind Field',
1839 'season_number': 1,
1840 'episode_number': 1,
976ae3ea 1841 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1842 'tags': 'count:12',
1843 'view_count': int,
1844 'availability': 'public',
1845 'age_limit': 0,
1846 'channel': 'Vsauce',
1847 'episode': 'Episode 1',
1848 'categories': ['Entertainment'],
1849 'season': 'Season 1',
1850 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1851 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1852 'like_count': int,
1853 'playable_in_embed': True,
1854 'live_status': 'not_live',
6c73052c 1855 'channel_follower_count': int
12afdc2a
S
1856 },
1857 'params': {
1858 'skip_download': True,
1859 },
1860 'expected_warnings': [
1861 'Skipping DASH manifest',
1862 ],
1863 },
c7121fa7
S
1864 {
1865 # The following content has been identified by the YouTube community
1866 # as inappropriate or offensive to some audiences.
1867 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1868 'info_dict': {
1869 'id': '6SJNVb0GnPI',
1870 'ext': 'mp4',
1871 'title': 'Race Differences in Intelligence',
1872 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1873 'duration': 965,
1874 'upload_date': '20140124',
1875 'uploader': 'New Century Foundation',
1876 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1878 },
1879 'params': {
1880 'skip_download': True,
1881 },
545cc85d 1882 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1883 },
022a5d66
S
1884 {
1885 # itag 212
1886 'url': '1t24XAntNCY',
1887 'only_matching': True,
fd5c4aab
S
1888 },
1889 {
1890 # geo restricted to JP
1891 'url': 'sJL6WA-aGkQ',
1892 'only_matching': True,
1893 },
cd5a74a2
S
1894 {
1895 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1896 'only_matching': True,
1897 },
bc2ca1bb 1898 {
1899 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1900 'only_matching': True,
1901 },
1902 {
1903 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1904 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1905 'only_matching': True,
1906 },
825cd268
RA
1907 {
1908 # DRM protected
1909 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1910 'only_matching': True,
4fe54c12
S
1911 },
1912 {
1913 # Video with unsupported adaptive stream type formats
1914 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1915 'info_dict': {
1916 'id': 'Z4Vy8R84T1U',
1917 'ext': 'mp4',
1918 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1919 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1920 'duration': 433,
1921 'upload_date': '20130923',
1922 'uploader': 'Amelia Putri Harwita',
1923 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1925 'formats': 'maxcount:10',
1926 },
1927 'params': {
1928 'skip_download': True,
1929 'youtube_include_dash_manifest': False,
1930 },
5429d6a9 1931 'skip': 'not actual anymore',
5caabd3c 1932 },
1933 {
822b9d9c 1934 # Youtube Music Auto-generated description
5caabd3c 1935 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1936 'info_dict': {
1937 'id': 'MgNrAu2pzNs',
1938 'ext': 'mp4',
1939 'title': 'Voyeur Girl',
1940 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1941 'upload_date': '20190312',
5429d6a9
S
1942 'uploader': 'Stephen - Topic',
1943 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1944 'artist': 'Stephen',
1945 'track': 'Voyeur Girl',
1946 'album': 'it\'s too much love to know my dear',
1947 'release_date': '20190313',
1948 'release_year': 2019,
976ae3ea 1949 'alt_title': 'Voyeur Girl',
1950 'view_count': int,
1951 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1952 'playable_in_embed': True,
1953 'like_count': int,
1954 'categories': ['Music'],
1955 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1956 'channel': 'Stephen',
1957 'availability': 'public',
1958 'creator': 'Stephen',
1959 'duration': 169,
1960 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1961 'age_limit': 0,
1962 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1963 'tags': 'count:11',
1964 'live_status': 'not_live',
6c73052c 1965 'channel_follower_count': int
5caabd3c 1966 },
1967 'params': {
1968 'skip_download': True,
1969 },
1970 },
66b48727
RA
1971 {
1972 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1973 'only_matching': True,
1974 },
011e75e6
S
1975 {
1976 # invalid -> valid video id redirection
1977 'url': 'DJztXj2GPfl',
1978 'info_dict': {
1979 'id': 'DJztXj2GPfk',
1980 'ext': 'mp4',
1981 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1982 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1983 'upload_date': '20090125',
1984 'uploader': 'Prochorowka',
1985 'uploader_id': 'Prochorowka',
1986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1987 'artist': 'Panjabi MC',
1988 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1989 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1990 },
1991 'params': {
1992 'skip_download': True,
1993 },
545cc85d 1994 'skip': 'Video unavailable',
ea74e00b
DP
1995 },
1996 {
1997 # empty description results in an empty string
1998 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1999 'info_dict': {
2000 'id': 'x41yOUIvK2k',
2001 'ext': 'mp4',
2002 'title': 'IMG 3456',
2003 'description': '',
2004 'upload_date': '20170613',
2005 'uploader_id': 'ElevageOrVert',
2006 'uploader': 'ElevageOrVert',
976ae3ea 2007 'view_count': int,
2008 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2009 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2010 'like_count': int,
2011 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2012 'tags': [],
2013 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2014 'availability': 'public',
2015 'age_limit': 0,
2016 'categories': ['Pets & Animals'],
2017 'duration': 7,
2018 'playable_in_embed': True,
2019 'live_status': 'not_live',
2020 'channel': 'ElevageOrVert',
6c73052c 2021 'channel_follower_count': int
ea74e00b
DP
2022 },
2023 'params': {
2024 'skip_download': True,
2025 },
2026 },
a0566bbf 2027 {
29f7c58a 2028 # with '};' inside yt initial data (see [1])
2029 # see [2] for an example with '};' inside ytInitialPlayerResponse
2030 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2031 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2032 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2033 'info_dict': {
2034 'id': 'CHqg6qOn4no',
2035 'ext': 'mp4',
2036 'title': 'Part 77 Sort a list of simple types in c#',
2037 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2038 'upload_date': '20130831',
2039 'uploader_id': 'kudvenkat',
2040 'uploader': 'kudvenkat',
976ae3ea 2041 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2042 'like_count': int,
2043 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2044 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2045 'live_status': 'not_live',
2046 'categories': ['Education'],
2047 'availability': 'public',
2048 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2049 'tags': 'count:12',
2050 'playable_in_embed': True,
2051 'age_limit': 0,
2052 'view_count': int,
2053 'duration': 522,
2054 'channel': 'kudvenkat',
12a1b225 2055 'comment_count': int,
d5d1df8a 2056 'channel_follower_count': int,
2057 'chapters': list,
a0566bbf 2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
29f7c58a 2063 {
2064 # another example of '};' in ytInitialData
2065 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2066 'only_matching': True,
2067 },
2068 {
2069 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2070 'only_matching': True,
2071 },
545cc85d 2072 {
cc2db878 2073 # https://github.com/ytdl-org/youtube-dl/pull/28094
2074 'url': 'OtqTfy26tG0',
2075 'info_dict': {
2076 'id': 'OtqTfy26tG0',
2077 'ext': 'mp4',
2078 'title': 'Burn Out',
2079 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2080 'upload_date': '20141120',
2081 'uploader': 'The Cinematic Orchestra - Topic',
2082 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2083 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2084 'artist': 'The Cinematic Orchestra',
2085 'track': 'Burn Out',
2086 'album': 'Every Day',
976ae3ea 2087 'like_count': int,
2088 'live_status': 'not_live',
2089 'alt_title': 'Burn Out',
2090 'duration': 614,
2091 'age_limit': 0,
2092 'view_count': int,
2093 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2094 'creator': 'The Cinematic Orchestra',
2095 'channel': 'The Cinematic Orchestra',
2096 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2097 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2098 'availability': 'public',
2099 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2100 'categories': ['Music'],
2101 'playable_in_embed': True,
6c73052c 2102 'channel_follower_count': int
cc2db878 2103 },
2104 'params': {
2105 'skip_download': True,
2106 },
545cc85d 2107 },
bc2ca1bb 2108 {
2109 # controversial video, only works with bpctr when authenticated with cookies
2110 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2111 'only_matching': True,
2112 },
a1a7907b 2113 {
2114 # controversial video, requires bpctr/contentCheckOk
2115 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2116 'info_dict': {
2117 'id': 'SZJvDhaSDnc',
2118 'ext': 'mp4',
2119 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2120 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2121 'uploader': 'CBS Mornings',
11f9be09 2122 'uploader_id': 'CBSThisMorning',
a1a7907b 2123 'upload_date': '20140716',
976ae3ea 2124 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2125 'duration': 170,
2126 'categories': ['News & Politics'],
2127 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2128 'view_count': int,
2129 'channel': 'CBS Mornings',
2130 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2131 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2132 'age_limit': 18,
2133 'availability': 'needs_auth',
2134 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2135 'like_count': int,
2136 'live_status': 'not_live',
2137 'playable_in_embed': True,
6c73052c 2138 'channel_follower_count': int
a1a7907b 2139 }
2140 },
f7ad7160 2141 {
2142 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2143 'url': 'cBvYw8_A0vQ',
2144 'info_dict': {
2145 'id': 'cBvYw8_A0vQ',
2146 'ext': 'mp4',
2147 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2148 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2149 'upload_date': '20201120',
2150 'uploader': 'Walk around Japan',
2151 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2153 'duration': 1456,
2154 'categories': ['Travel & Events'],
2155 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2156 'view_count': int,
2157 'channel': 'Walk around Japan',
2158 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2159 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2160 'age_limit': 0,
2161 'availability': 'public',
2162 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2163 'live_status': 'not_live',
2164 'playable_in_embed': True,
6c73052c 2165 'channel_follower_count': int
f7ad7160 2166 },
2167 'params': {
2168 'skip_download': True,
2169 },
0fb983f6 2170 }, {
2171 # Has multiple audio streams
2172 'url': 'WaOKSUlf4TM',
2173 'only_matching': True
9297939e 2174 }, {
2175 # Requires Premium: has format 141 when requested using YTM url
2176 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2177 'only_matching': True
2178 }, {
120916da 2179 # multiple subtitles with same lang_code
2180 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2181 'only_matching': True,
109dd3b2 2182 }, {
2183 # Force use android client fallback
2184 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2185 'info_dict': {
2186 'id': 'YOelRv7fMxY',
11f9be09 2187 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2188 'ext': '3gp',
2189 'upload_date': '20210624',
2190 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2191 'uploader': 'colinfurze',
11f9be09 2192 'uploader_id': 'colinfurze',
109dd3b2 2193 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2194 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2195 'duration': 596,
2196 'categories': ['Entertainment'],
2197 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2198 'view_count': int,
2199 'channel': 'colinfurze',
2200 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2201 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2202 'age_limit': 0,
2203 'availability': 'public',
2204 'like_count': int,
2205 'live_status': 'not_live',
2206 'playable_in_embed': True,
d5d1df8a 2207 'channel_follower_count': int,
2208 'chapters': list,
109dd3b2 2209 },
2210 'params': {
2211 'format': '17', # 3gp format available on android
2212 'extractor_args': {'youtube': {'player_client': ['android']}},
2213 },
120916da 2214 },
109dd3b2 2215 {
2216 # Skip download of additional client configs (remix client config in this case)
2217 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2218 'only_matching': True,
2219 'params': {
2220 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2221 },
8fc54b12 2222 }, {
2223 # shorts
2224 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2225 'only_matching': True,
9222c381 2226 }, {
2227 'note': 'Storyboards',
2228 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2229 'info_dict': {
2230 'id': '5KLPxDtMqe8',
2231 'ext': 'mhtml',
2232 'format_id': 'sb0',
2233 'title': 'Your Brain is Plastic',
2234 'uploader_id': 'scishow',
2235 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2236 'upload_date': '20140324',
2237 'uploader': 'SciShow',
976ae3ea 2238 'like_count': int,
2239 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2240 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2241 'view_count': int,
2242 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2243 'playable_in_embed': True,
2244 'tags': 'count:12',
2245 'uploader_url': 'http://www.youtube.com/user/scishow',
2246 'availability': 'public',
2247 'channel': 'SciShow',
2248 'live_status': 'not_live',
2249 'duration': 248,
2250 'categories': ['Education'],
2251 'age_limit': 0,
d5d1df8a 2252 'channel_follower_count': int,
2253 'chapters': list,
9222c381 2254 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2255 }, {
2256 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2257 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2258 'info_dict': {
2259 'id': '2NUZ8W2llS4',
2260 'ext': 'mp4',
2261 'title': 'The NP that test your phone performance 🙂',
2262 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2263 'uploader': 'Leon Nguyen',
2264 'uploader_id': 'VNSXIII',
2265 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2266 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2267 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2268 'duration': 21,
2269 'view_count': int,
2270 'age_limit': 0,
2271 'categories': ['Gaming'],
2272 'tags': 'count:23',
2273 'playable_in_embed': True,
2274 'live_status': 'not_live',
2275 'upload_date': '20220103',
2276 'like_count': int,
2277 'availability': 'public',
2278 'channel': 'Leon Nguyen',
2279 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2280 'comment_count': int,
992f9a73 2281 'channel_follower_count': int
2282 }
1ff88b7a 2283 }, {
2284 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2285 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2286 'info_dict': {
2287 'id': '2NUZ8W2llS4',
2288 'ext': 'mp4',
2289 'title': 'The NP that test your phone performance 🙂',
2290 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2291 'uploader': 'Leon Nguyen',
2292 'uploader_id': 'VNSXIII',
2293 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2294 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2295 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2296 'duration': 21,
2297 'view_count': int,
2298 'age_limit': 0,
2299 'categories': ['Gaming'],
2300 'tags': 'count:23',
2301 'playable_in_embed': True,
2302 'live_status': 'not_live',
2303 'upload_date': '20220102',
2304 'like_count': int,
2305 'availability': 'public',
2306 'channel': 'Leon Nguyen',
2307 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2308 'comment_count': int,
2309 'channel_follower_count': int
2310 },
2311 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2312 }, {
2313 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2314 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2315 'info_dict': {
2316 'id': 'mzZzzBU6lrM',
2317 'ext': 'mp4',
2318 'title': 'I Met GeorgeNotFound In Real Life...',
2319 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2320 'uploader': 'Quackity',
2321 'uploader_id': 'QuackityHQ',
2322 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2323 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2324 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2325 'duration': 955,
2326 'view_count': int,
2327 'age_limit': 0,
2328 'categories': ['Entertainment'],
2329 'tags': 'count:26',
2330 'playable_in_embed': True,
2331 'live_status': 'not_live',
2332 'release_timestamp': 1641172509,
2333 'release_date': '20220103',
2334 'upload_date': '20220103',
2335 'like_count': int,
2336 'availability': 'public',
2337 'channel': 'Quackity',
2338 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2339 'channel_follower_count': int
2340 }
2341 },
2342 { # continuous livestream. Microformat upload date should be preferred.
2343 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2344 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2345 'info_dict': {
2346 'id': 'kgx4WGK0oNU',
2347 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2348 'ext': 'mp4',
2349 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2350 'availability': 'public',
2351 'age_limit': 0,
2352 'release_timestamp': 1637975704,
2353 'upload_date': '20210619',
2354 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2355 'live_status': 'is_live',
2356 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2357 'uploader': '阿鲍Abao',
2358 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2359 'channel': 'Abao in Tokyo',
2360 'channel_follower_count': int,
2361 'release_date': '20211127',
2362 'tags': 'count:39',
2363 'categories': ['People & Blogs'],
2364 'like_count': int,
2365 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2366 'view_count': int,
2367 'playable_in_embed': True,
2368 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2369 'concurrent_view_count': int,
992f9a73 2370 },
2371 'params': {'skip_download': True}
6e634cbe 2372 }, {
2373 # Story. Requires specific player params to work.
ee27297f 2374 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2375 'info_dict': {
ee27297f 2376 'id': 'vv8qTUWmulI',
6e634cbe 2377 'ext': 'mp4',
ee27297f 2378 'availability': 'unlisted',
2379 'view_count': int,
2380 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2381 'upload_date': '20220526',
2382 'categories': ['Education'],
2383 'title': 'Story',
2384 'channel': 'IT\'S HISTORY',
2385 'description': '',
2386 'uploader_id': 'BlastfromthePast',
2387 'duration': 12,
2388 'uploader': 'IT\'S HISTORY',
6e634cbe 2389 'playable_in_embed': True,
6e634cbe 2390 'age_limit': 0,
6e634cbe 2391 'live_status': 'not_live',
ee27297f 2392 'tags': [],
2393 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2394 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2395 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2396 },
2397 'skip': 'stories get removed after some period of time',
ee27297f 2398 }, {
2399 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2400 'info_dict': {
2401 'id': 'tjjjtzRLHvA',
2402 'ext': 'mp4',
2403 'title': 'ハッシュタグ無し };if window.ytcsi',
2404 'upload_date': '20220323',
2405 'like_count': int,
2406 'availability': 'unlisted',
2407 'channel': 'nao20010128nao',
2408 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2409 'age_limit': 0,
2410 'uploader': 'nao20010128nao',
2411 'uploader_id': 'nao20010128nao',
2412 'categories': ['Music'],
6e634cbe 2413 'view_count': int,
2414 'description': '',
ee27297f 2415 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2416 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2417 'live_status': 'not_live',
2418 'playable_in_embed': True,
2419 'channel_follower_count': int,
2420 'duration': 6,
2421 'tags': [],
2422 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2423 }
c26f9b99 2424 }, {
2425 # Prefer primary title+description language metadata by default
2426 # Do not prefer translated description if primary is empty
2427 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2428 'info_dict': {
2429 'id': 'el3E4MbxRqQ',
2430 'ext': 'mp4',
2431 'title': 'dlp test video 2 - primary sv no desc',
2432 'description': '',
2433 'channel': 'cole-dlp-test-acc',
2434 'tags': [],
2435 'view_count': int,
2436 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2437 'like_count': int,
2438 'playable_in_embed': True,
2439 'availability': 'unlisted',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2441 'age_limit': 0,
2442 'duration': 5,
2443 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2444 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2445 'live_status': 'not_live',
2446 'upload_date': '20220908',
2447 'categories': ['People & Blogs'],
2448 'uploader': 'cole-dlp-test-acc',
2449 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2450 },
2451 'params': {'skip_download': True}
2452 }, {
2453 # Extractor argument: prefer translated title+description
2454 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2455 'info_dict': {
2456 'id': 'gHKT4uU8Zng',
2457 'ext': 'mp4',
2458 'channel': 'cole-dlp-test-acc',
2459 'tags': [],
2460 'duration': 5,
2461 'live_status': 'not_live',
2462 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2463 'upload_date': '20220728',
2464 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2465 'view_count': int,
2466 'categories': ['People & Blogs'],
2467 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2468 'title': 'dlp test video title translated (fr)',
2469 'availability': 'public',
2470 'uploader': 'cole-dlp-test-acc',
2471 'age_limit': 0,
2472 'description': 'dlp test video description translated (fr)',
2473 'playable_in_embed': True,
2474 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2475 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2476 },
2477 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2478 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2479 }, {
2480 'note': '6 channel audio',
2481 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2482 'only_matching': True,
6e634cbe 2483 }
2eb88d95
PH
2484 ]
2485
f2e8dbcc 2486 _WEBPAGE_TESTS = [
2487 # YouTube <object> embed
2488 {
2489 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2490 'md5': '873c81d308b979f0e23ee7e620b312a3',
2491 'info_dict': {
2492 'id': 'msN87y-iEx0',
2493 'ext': 'mp4',
2494 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2495 'upload_date': '20080526',
2496 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2497 'uploader': 'Christopher Sykes',
2498 'uploader_id': 'ChristopherJSykes',
2499 'age_limit': 0,
2500 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2501 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2502 'playable_in_embed': True,
2503 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2504 'like_count': int,
2505 'comment_count': int,
2506 'channel': 'Christopher Sykes',
2507 'live_status': 'not_live',
2508 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2509 'availability': 'public',
2510 'duration': 195,
2511 'view_count': int,
2512 'categories': ['Science & Technology'],
2513 'channel_follower_count': int,
2514 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2515 },
2516 'params': {
2517 'skip_download': True,
2518 }
2519 },
2520 ]
2521
201c1459 2522 @classmethod
2523 def suitable(cls, url):
4dfbf869 2524 from ..utils import parse_qs
2525
201c1459 2526 qs = parse_qs(url)
2527 if qs.get('list', [None])[0]:
2528 return False
86e5f3ed 2529 return super().suitable(url)
201c1459 2530
e0df6211 2531 def __init__(self, *args, **kwargs):
86e5f3ed 2532 super().__init__(*args, **kwargs)
545cc85d 2533 self._code_cache = {}
83799698 2534 self._player_cache = {}
e0df6211 2535
4d37720a 2536 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2537 lock = threading.Lock()
185bf310 2538 start_time = time.time()
adbc4ec4
THD
2539 formats = [f for f in formats if f.get('is_from_start')]
2540
185bf310 2541 def refetch_manifest(format_id, delay):
2542 nonlocal formats, start_time, is_live
2543 if time.time() <= start_time + delay:
adbc4ec4
THD
2544 return
2545
2546 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2547 video_details = traverse_obj(
2548 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2549 microformats = traverse_obj(
2550 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2551 expected_type=dict, default=[])
4d37720a
L
2552 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2553 is_live = live_status == 'is_live'
185bf310 2554 start_time = time.time()
adbc4ec4 2555
185bf310 2556 def mpd_feed(format_id, delay):
adbc4ec4
THD
2557 """
2558 @returns (manifest_url, manifest_stream_number, is_live) or None
2559 """
2560 with lock:
185bf310 2561 refetch_manifest(format_id, delay)
adbc4ec4
THD
2562
2563 f = next((f for f in formats if f['format_id'] == format_id), None)
2564 if not f:
185bf310 2565 if not is_live:
2566 self.to_screen(f'{video_id}: Video is no longer live')
2567 else:
2568 self.report_warning(
2569 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2570 return None
2571 return f['manifest_url'], f['manifest_stream_number'], is_live
2572
2573 for f in formats:
4d37720a
L
2574 f['is_live'] = is_live
2575 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2576 live_start_time, mpd_feed, not is_live and f.copy())
2577 if is_live:
2578 f['fragments'] = gen
2579 f['protocol'] = 'http_dash_segments_generator'
2580 else:
2581 f['fragments'] = LazyList(gen({}))
2582 del f['is_from_start']
adbc4ec4 2583
4d37720a 2584 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2585 FETCH_SPAN, MAX_DURATION = 5, 432000
2586
2587 mpd_url, stream_number, is_live = None, None, True
2588
2589 begin_index = 0
2590 download_start_time = ctx.get('start') or time.time()
2591
2592 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2593 if lack_early_segments:
2594 self.report_warning(bug_reports_message(
2595 'Starting download from the last 120 hours of the live stream since '
2596 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2597 lack_early_segments = True
2598
2599 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2600 fragments, fragment_base_url = None, None
2601
a539f065 2602 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2603 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2604 # Obtain from MPD's maximum seq value
2605 old_mpd_url = mpd_url
185bf310 2606 last_error = ctx.pop('last_error', None)
14f25df2 2607 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2608 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2609 or (mpd_url, stream_number, False))
2610 if not refresh_sequence:
2611 if expire_fast and not is_live:
2612 return False, last_seq
2613 elif old_mpd_url == mpd_url:
2614 return True, last_seq
4d37720a
L
2615 if manifestless_orig_fmt:
2616 fmt_info = manifestless_orig_fmt
2617 else:
2618 try:
2619 fmts, _ = self._extract_mpd_formats_and_subtitles(
2620 mpd_url, None, note=False, errnote=False, fatal=False)
2621 except ExtractorError:
2622 fmts = None
2623 if not fmts:
2624 no_fragment_score += 2
2625 return False, last_seq
2626 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2627 fragments = fmt_info['fragments']
2628 fragment_base_url = fmt_info['fragment_base_url']
2629 assert fragment_base_url
2630
2631 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2632 return True, _last_seq
2633
4d37720a 2634 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2635 while is_live:
2636 fetch_time = time.time()
2637 if no_fragment_score > 30:
2638 return
2639 if last_segment_url:
2640 # Obtain from "X-Head-Seqnum" header value from each segment
2641 try:
2642 urlh = self._request_webpage(
2643 last_segment_url, None, note=False, errnote=False, fatal=False)
2644 except ExtractorError:
2645 urlh = None
2646 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2647 if last_seq is None:
a539f065 2648 no_fragment_score += 2
adbc4ec4
THD
2649 last_segment_url = None
2650 continue
2651 else:
a539f065
LNO
2652 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2653 no_fragment_score += 2
185bf310 2654 if not should_continue:
adbc4ec4
THD
2655 continue
2656
2657 if known_idx > last_seq:
2658 last_segment_url = None
2659 continue
2660
2661 last_seq += 1
2662
2663 if begin_index < 0 and known_idx < 0:
2664 # skip from the start when it's negative value
2665 known_idx = last_seq + begin_index
2666 if lack_early_segments:
2667 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2668 try:
2669 for idx in range(known_idx, last_seq):
2670 # do not update sequence here or you'll get skipped some part of it
a539f065 2671 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2672 if not should_continue:
adbc4ec4
THD
2673 known_idx = idx - 1
2674 raise ExtractorError('breaking out of outer loop')
2675 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2676 yield {
2677 'url': last_segment_url,
36195c44 2678 'fragment_count': last_seq,
adbc4ec4
THD
2679 }
2680 if known_idx == last_seq:
2681 no_fragment_score += 5
2682 else:
2683 no_fragment_score = 0
2684 known_idx = last_seq
2685 except ExtractorError:
2686 continue
2687
4d37720a
L
2688 if manifestless_orig_fmt:
2689 # Stop at the first iteration if running for post-live manifestless;
2690 # fragment count no longer increase since it starts
2691 break
2692
adbc4ec4
THD
2693 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2694
b6de707d 2695 def _extract_player_url(self, *ytcfgs, webpage=None):
2696 player_url = traverse_obj(
2697 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2698 get_all=False, expected_type=str)
11f9be09 2699 if not player_url:
b6de707d 2700 return
60f393e4 2701 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2702
b6de707d 2703 def _download_player_url(self, video_id, fatal=False):
2704 res = self._download_webpage(
2705 'https://www.youtube.com/iframe_api',
2706 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2707 if res:
2708 player_version = self._search_regex(
2709 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2710 if player_version:
2711 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2712
60064c53
PH
2713 def _signature_cache_id(self, example_sig):
2714 """ Return a string representation of a signature """
14f25df2 2715 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2716
e40c758c
S
2717 @classmethod
2718 def _extract_player_info(cls, player_url):
2719 for player_re in cls._PLAYER_INFO_RE:
2720 id_m = re.search(player_re, player_url)
2721 if id_m:
2722 break
2723 else:
c081b35c 2724 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2725 return id_m.group('id')
e40c758c 2726
404f611f 2727 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2728 player_id = self._extract_player_info(player_url)
2729 if player_id not in self._code_cache:
1276a43a 2730 code = self._download_webpage(
109dd3b2 2731 player_url, video_id, fatal=fatal,
2732 note='Downloading player ' + player_id,
2733 errnote='Download of %s failed' % player_url)
1276a43a 2734 if code:
2735 self._code_cache[player_id] = code
404f611f 2736 return self._code_cache.get(player_id)
109dd3b2 2737
e40c758c 2738 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2739 player_id = self._extract_player_info(player_url)
e0df6211 2740
c4417ddb 2741 # Read from filesystem cache
86e5f3ed 2742 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2743 assert os.path.basename(func_id) == func_id
a0e07d31 2744
ae61d108 2745 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2746 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2747
580ce007 2748 if not cache_spec:
2749 code = self._load_player(video_id, player_url)
404f611f 2750 if code:
109dd3b2 2751 res = self._parse_sig_js(code)
ac668111 2752 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2753 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2754 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2755
2756 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2757
60064c53 2758 def _print_sig_code(self, func, example_sig):
404f611f 2759 if not self.get_param('youtube_print_sig_code'):
2760 return
2761
edf3e38e
PH
2762 def gen_sig_code(idxs):
2763 def _genslice(start, end, step):
78caa52a 2764 starts = '' if start == 0 else str(start)
8bcc8756 2765 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2766 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2767 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2768
2769 step = None
7af808a5
PH
2770 # Quelch pyflakes warnings - start will be set when step is set
2771 start = '(Never used)'
edf3e38e
PH
2772 for i, prev in zip(idxs[1:], idxs[:-1]):
2773 if step is not None:
2774 if i - prev == step:
2775 continue
2776 yield _genslice(start, prev, step)
2777 step = None
2778 continue
2779 if i - prev in [-1, 1]:
2780 step = i - prev
2781 start = prev
2782 continue
2783 else:
78caa52a 2784 yield 's[%d]' % prev
edf3e38e 2785 if step is None:
78caa52a 2786 yield 's[%d]' % i
edf3e38e
PH
2787 else:
2788 yield _genslice(start, i, step)
2789
ac668111 2790 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2791 cache_res = func(test_string)
edf3e38e 2792 cache_spec = [ord(c) for c in cache_res]
78caa52a 2793 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2794 signature_id_tuple = '(%s)' % (
14f25df2 2795 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2796 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2797 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2798 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2799
e0df6211
PH
2800 def _parse_sig_js(self, jscode):
2801 funcname = self._search_regex(
abefc03f
S
2802 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2803 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2804 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2805 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2806 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2807 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2808 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2809 # Obsolete patterns
2810 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2811 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2812 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2813 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2814 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2815 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2816 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2817 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2818 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2819
2820 jsi = JSInterpreter(jscode)
2821 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2822 return lambda s: initial_function([s])
2823
580ce007 2824 def _cached(self, func, *cache_id):
2825 def inner(*args, **kwargs):
2826 if cache_id not in self._player_cache:
2827 try:
2828 self._player_cache[cache_id] = func(*args, **kwargs)
2829 except ExtractorError as e:
2830 self._player_cache[cache_id] = e
2831 except Exception as e:
2832 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2833
2834 ret = self._player_cache[cache_id]
2835 if isinstance(ret, Exception):
2836 raise ret
2837 return ret
2838 return inner
2839
545cc85d 2840 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2841 """Turn the encrypted s field into a working signature"""
580ce007 2842 extract_sig = self._cached(
2843 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2844 func = extract_sig(video_id, player_url, s)
2845 self._print_sig_code(func, s)
2846 return func(s)
404f611f 2847
2848 def _decrypt_nsig(self, s, video_id, player_url):
2849 """Turn the encrypted n field into a working signature"""
2850 if player_url is None:
2851 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2852 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2853
b505e851 2854 try:
2855 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2856 except ExtractorError as e:
2857 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2858 if self.get_param('youtube_print_sig_code'):
2859 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2860
25836db6 2861 try:
2862 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2863 ret = extract_nsig(jsi, func_code)(s)
2864 except JSInterpreter.Exception as e:
2865 try:
992dc6b4 2866 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2867 except ExtractorError:
2868 raise e
2869 self.report_warning(
2870 f'Native nsig extraction failed: Trying with PhantomJS\n'
2871 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 2872 self.write_debug(e, only_once=True)
25836db6 2873
2874 args, func_body = func_code
2875 ret = jsi.execute(
2876 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2877 video_id=video_id, note='Executing signature code').strip()
580ce007 2878
2879 self.write_debug(f'Decrypted nsig {s} => {ret}')
2880 return ret
2881
90a1df30 2882 def _extract_n_function_name(self, jscode):
2883 funcname, idx = self._search_regex(
2884 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2885 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2886 if not idx:
2887 return funcname
2888
2889 return json.loads(js_to_json(self._search_regex(
2890 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2891 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2892
580ce007 2893 def _extract_n_function_code(self, video_id, player_url):
404f611f 2894 player_id = self._extract_player_info(player_url)
05deb747 2895 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 2896 jscode = func_code or self._load_player(video_id, player_url)
2897 jsi = JSInterpreter(jscode)
404f611f 2898
2899 if func_code:
580ce007 2900 return jsi, player_id, func_code
404f611f 2901
b505e851 2902 func_name = self._extract_n_function_name(jscode)
2903
2904 # For redundancy
2905 func_code = self._search_regex(
2906 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2907 # NB: The end of the regex is intentionally kept strict
2908 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2909 jscode, 'nsig function', group=('var', 'code'), default=None)
2910 if func_code:
2911 func_code = ([func_code[0]], func_code[1])
2912 else:
2913 self.write_debug('Extracting nsig function with jsinterp')
2914 func_code = jsi.extract_function_code(func_name)
2915
580ce007 2916 self.cache.store('youtube-nsig', player_id, func_code)
2917 return jsi, player_id, func_code
2918
2919 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2920 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2921
580ce007 2922 def extract_nsig(s):
25836db6 2923 try:
2924 ret = func([s])
2925 except JSInterpreter.Exception:
2926 raise
2927 except Exception as e:
2928 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2929
f6ca640b 2930 if ret.startswith('enhanced_except_'):
25836db6 2931 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2932 return ret
580ce007 2933
2934 return extract_nsig
e0df6211 2935
109dd3b2 2936 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2937 """
2938 Extract signatureTimestamp (sts)
2939 Required to tell API what sig/player version is in use.
2940 """
2941 sts = None
2942 if isinstance(ytcfg, dict):
2943 sts = int_or_none(ytcfg.get('STS'))
2944
2945 if not sts:
2946 # Attempt to extract from player
2947 if player_url is None:
2948 error_msg = 'Cannot extract signature timestamp without player_url.'
2949 if fatal:
2950 raise ExtractorError(error_msg)
2951 self.report_warning(error_msg)
2952 return
404f611f 2953 code = self._load_player(video_id, player_url, fatal=fatal)
2954 if code:
109dd3b2 2955 sts = int_or_none(self._search_regex(
2956 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2957 'JS player signature timestamp', group='sts', fatal=fatal))
2958 return sts
2959
11f9be09 2960 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2961 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2962 label = 'fully ' if is_full else ''
2963 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2964 expected_type=url_or_none)
2965 if not url:
2966 self.report_warning(f'Unable to mark {label}watched')
2967 return
14f25df2 2968 parsed_url = urllib.parse.urlparse(url)
2969 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2970
2971 # cpn generation algorithm is reverse engineered from base.js.
2972 # In fact it works even with dummy cpn.
2973 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2974 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2975
2976 # # more consistent results setting it to right before the end
2977 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2978
2979 qs.update({
2980 'ver': ['2'],
2981 'cpn': [cpn],
2982 'cmt': video_length,
2983 'el': 'detailpage', # otherwise defaults to "shorts"
2984 })
2985
2986 if is_full:
2987 # these seem to mark watchtime "history" in the real world
2988 # they're required, so send in a single value
2989 qs.update({
5318156f 2990 'st': 0,
06cc8f10
B
2991 'et': video_length,
2992 })
2993
14f25df2 2994 url = urllib.parse.urlunparse(
2995 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2996
2997 self._download_webpage(
2998 url, video_id, f'Marking {label}watched',
2999 'Unable to mark watched', fatal=False)
d77ab8e2 3000
bfd973ec 3001 @classmethod
3002 def _extract_from_webpage(cls, url, webpage):
3003 # Invidious Instances
3004 # https://github.com/yt-dlp/yt-dlp/issues/195
3005 # https://github.com/iv-org/invidious/pull/1730
3006 mobj = re.search(
3007 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3008 webpage)
3009 if mobj:
3010 yield cls.url_result(mobj.group('url'), cls)
3011 raise cls.StopExtraction()
3012
3013 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3014
3015 # lazyYT YouTube embed
bfd973ec 3016 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3017 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3018
3019 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3020 for m in re.findall(r'''(?x)<div[^>]+
3021 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3022 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3023 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3024
97665381
PH
3025 @classmethod
3026 def extract_id(cls, url):
ae61d108 3027 video_id = cls.get_temp_id(url)
3028 if not video_id:
3029 raise ExtractorError(f'Invalid URL: {url}')
3030 return video_id
c5e8d7af 3031
7c365c21 3032 def _extract_chapters_from_json(self, data, duration):
3033 chapter_list = traverse_obj(
3034 data, (
3035 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3036 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3037 ), expected_type=list)
3038
3039 return self._extract_chapters(
3040 chapter_list,
3041 chapter_time=lambda chapter: float_or_none(
3042 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3043 chapter_title=lambda chapter: traverse_obj(
3044 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3045 duration=duration)
3046
3047 def _extract_chapters_from_engagement_panel(self, data, duration):
3048 content_list = traverse_obj(
8bdd16b4 3049 data,
7c365c21 3050 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 3051 expected_type=list, default=[])
052e1350 3052 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3053 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3054
1890fc63 3055 return next(filter(None, (
3056 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3057 chapter_time, chapter_title, duration)
3058 for contents in content_list)), [])
7c365c21 3059
1890fc63 3060 def _extract_chapters_from_description(self, description, duration):
2e30b46f 3061 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3062 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
1890fc63 3063 return self._extract_chapters(
2e30b46f 3064 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
1890fc63 3065 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2e30b46f 3066 duration=duration, strict=False) or self._extract_chapters(
3067 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3068 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
1890fc63 3069 duration=duration, strict=False)
84213ea8 3070
1890fc63 3071 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3072 if not duration:
3073 return
3074 chapter_list = [{
3075 'start_time': chapter_time(chapter),
3076 'title': chapter_title(chapter),
3077 } for chapter in chapter_list or []]
3078 if not strict:
3079 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3080
a3976e07 3081 chapters = [{'start_time': 0}]
1890fc63 3082 for idx, chapter in enumerate(chapter_list):
a3976e07 3083 if chapter['start_time'] is None:
1890fc63 3084 self.report_warning(f'Incomplete chapter {idx}')
3085 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 3086 chapters.append(chapter)
709ee214 3087 elif chapter not in chapters:
3088 self.report_warning(
3089 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
a3976e07 3090 return chapters[1:]
84213ea8 3091
a1c5d2ca
M
3092 def _extract_comment(self, comment_renderer, parent=None):
3093 comment_id = comment_renderer.get('commentId')
3094 if not comment_id:
3095 return
fe93e2c4 3096
052e1350 3097 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 3098
c26f9b99 3099 # Timestamp is an estimate calculated from the current time and time_text
3100 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3101 timestamp = self._parse_time_text(time_text)
3102
052e1350 3103 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 3104 author_id = try_get(comment_renderer,
14f25df2 3105 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 3106
49bd8c66 3107 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 3108 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 3109 author_thumbnail = try_get(comment_renderer,
14f25df2 3110 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
3111
3112 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 3113 is_favorited = 'creatorHeart' in (try_get(
3114 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
3115 return {
3116 'id': comment_id,
3117 'text': text,
d92f5d5a 3118 'timestamp': timestamp,
a1c5d2ca
M
3119 'time_text': time_text,
3120 'like_count': votes,
97524332 3121 'is_favorited': is_favorited,
a1c5d2ca
M
3122 'author': author,
3123 'author_id': author_id,
3124 'author_thumbnail': author_thumbnail,
3125 'author_is_uploader': author_is_uploader,
3126 'parent': parent or 'root'
3127 }
3128
46383212 3129 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3130
3131 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3132
3133 def extract_header(contents):
2d6659b9 3134 _continuation = None
3135 for content in contents:
46383212 3136 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3137 expected_comment_count = self._get_count(
3138 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3139
2d6659b9 3140 if expected_comment_count:
46383212 3141 tracker['est_total'] = expected_comment_count
3142 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3143 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3144
3145 sort_menu_item = try_get(
3146 comments_header_renderer,
3147 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3148 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3149
3150 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3151 if not _continuation:
3152 continue
3153
46383212 3154 sort_text = str_or_none(sort_menu_item.get('title'))
3155 if not sort_text:
2d6659b9 3156 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3157 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3158 break
a2160aa4 3159 return _continuation
a1c5d2ca 3160
2d6659b9 3161 def extract_thread(contents):
a1c5d2ca 3162 if not parent:
46383212 3163 tracker['current_page_thread'] = 0
a1c5d2ca 3164 for content in contents:
46383212 3165 if not parent and tracker['total_parent_comments'] >= max_parents:
3166 yield
a1c5d2ca 3167 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3168 comment_renderer = get_first(
3169 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3170 expected_type=dict, default={})
a1c5d2ca 3171
a1c5d2ca
M
3172 comment = self._extract_comment(comment_renderer, parent)
3173 if not comment:
3174 continue
46383212 3175
3176 tracker['running_total'] += 1
3177 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3178 yield comment
46383212 3179
a1c5d2ca
M
3180 # Attempt to get the replies
3181 comment_replies_renderer = try_get(
3182 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3183
3184 if comment_replies_renderer:
46383212 3185 tracker['current_page_thread'] += 1
a1c5d2ca 3186 comment_entries_iter = self._comment_entries(
99e9e001 3187 comment_replies_renderer, ytcfg, video_id,
46383212 3188 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3189 yield from itertools.islice(comment_entries_iter, min(
3190 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3191
46383212 3192 # Keeps track of counts across recursive calls
3193 if not tracker:
3194 tracker = dict(
3195 running_total=0,
3196 est_total=0,
3197 current_page_thread=0,
3198 total_parent_comments=0,
3199 total_reply_comments=0)
3200
3201 # TODO: Deprecated
2d6659b9 3202 # YouTube comments have a max depth of 2
46383212 3203 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3204 if max_depth:
da4db748 3205 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3206 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3207 if max_depth == 1 and parent:
3208 return
a1c5d2ca 3209
46383212 3210 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3211 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3212
46383212 3213 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3214
46383212 3215 response = None
6e634cbe 3216 is_forced_continuation = False
2d6659b9 3217 is_first_continuation = parent is None
6e634cbe 3218 if is_first_continuation and not continuation:
3219 # Sometimes you can get comments by generating the continuation yourself,
3220 # even if YouTube initially reports them being disabled - e.g. stories comments.
3221 # Note: if the comment section is actually disabled, YouTube may return a response with
3222 # required check_get_keys missing. So we will disable that check initially in this case.
3223 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3224 is_forced_continuation = True
a1c5d2ca
M
3225
3226 for page_num in itertools.count(0):
3227 if not continuation:
3228 break
46383212 3229 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3230 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3231 if page_num == 0:
3232 if is_first_continuation:
3233 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3234 else:
2d6659b9 3235 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3236 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3237 else:
3238 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3239 ' ' if parent else '', ' replies' if parent else '',
3240 page_num, comment_prog_str)
e72e48c5
M
3241 try:
3242 response = self._extract_response(
3243 item_id=None, query=continuation,
3244 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3245 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3246 except ExtractorError as e:
3247 # Ignore incomplete data error for replies if retries didn't work.
3248 # This is to allow any other parent comments and comment threads to be downloaded.
3249 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3250 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3251 self.report_warning(
3252 'Received incomplete data for a comment reply thread and retrying did not help. '
3253 'Ignoring to let other comments be downloaded.')
3254 else:
3255 raise
6e634cbe 3256 is_forced_continuation = False
46383212 3257 continuation_contents = traverse_obj(
3258 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3259
2d6659b9 3260 continuation = None
46383212 3261 for continuation_section in continuation_contents:
3262 continuation_items = traverse_obj(
3263 continuation_section,
3264 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3265 get_all=False, expected_type=list) or []
3266 if is_first_continuation:
3267 continuation = extract_header(continuation_items)
3268 is_first_continuation = False
2d6659b9 3269 if continuation:
a1c5d2ca 3270 break
46383212 3271 continue
a1c5d2ca 3272
46383212 3273 for entry in extract_thread(continuation_items):
3274 if not entry:
3275 return
3276 yield entry
3277 continuation = self._extract_continuation({'contents': continuation_items})
3278 if continuation:
2d6659b9 3279 break
a1c5d2ca 3280
6e634cbe 3281 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3282 if message and not parent and tracker['running_total'] == 0:
3283 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3284 raise self.CommentsDisabled
6e634cbe 3285
3286 @staticmethod
3287 def _generate_comment_continuation(video_id):
3288 """
3289 Generates initial comment section continuation token from given video id
3290 """
3291 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3292 return base64.b64encode(token.encode()).decode()
3293
a2160aa4 3294 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3295 """Entry for comment extraction"""
2d6659b9 3296 def _real_comment_extract(contents):
aae16f6e 3297 renderer = next((
3298 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3299 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3300 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3301
a2160aa4 3302 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3303 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3304
109dd3b2 3305 @staticmethod
99e9e001 3306 def _get_checkok_params():
3307 return {'contentCheckOk': True, 'racyCheckOk': True}
3308
3309 @classmethod
3310 def _generate_player_context(cls, sts=None):
109dd3b2 3311 context = {
3312 'html5Preference': 'HTML5_PREF_WANTS',
3313 }
3314 if sts is not None:
3315 context['signatureTimestamp'] = sts
3316 return {
3317 'playbackContext': {
3318 'contentPlaybackContext': context
a1a7907b 3319 },
99e9e001 3320 **cls._get_checkok_params()
109dd3b2 3321 }
3322
e7e94f2a
D
3323 @staticmethod
3324 def _is_agegated(player_response):
3325 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3326 return True
e7e94f2a
D
3327
3328 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3329 AGE_GATE_REASONS = (
3330 'confirm your age', 'age-restricted', 'inappropriate', # reason
3331 'age_verification_required', 'age_check_required', # status
3332 )
3333 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3334
3335 @staticmethod
3336 def _is_unplayable(player_response):
3337 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3338
50ac0e54 3339 _STORY_PLAYER_PARAMS = '8AEB'
3340
3341 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3342
11f9be09 3343 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3344 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3345 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3346 headers = self.generate_api_headers(
99e9e001 3347 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3348
6e634cbe 3349 yt_query = {
3350 'videoId': video_id,
6e634cbe 3351 }
50ac0e54 3352 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3353 yt_query['params'] = self._STORY_PLAYER_PARAMS
3354
11f9be09 3355 yt_query.update(self._generate_player_context(sts))
3356 return self._extract_response(
3357 item_id=video_id, ep='player', query=yt_query,
379e44ed 3358 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3359 default_client=client,
11f9be09 3360 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3361 ) or None
3362
11f9be09 3363 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3364 requested_clients = []
d0d012d4 3365 default = ['android', 'web']
000c15a4 3366 allowed_clients = sorted(
86e5f3ed 3367 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3368 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3369 for client in self._configuration_arg('player_client'):
3370 if client in allowed_clients:
3371 requested_clients.append(client)
d0d012d4 3372 elif client == 'default':
3373 requested_clients.extend(default)
b4c055ba 3374 elif client == 'all':
3375 requested_clients.extend(allowed_clients)
3376 else:
3377 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3378 if not requested_clients:
d0d012d4 3379 requested_clients = default
cf7e015f 3380
11f9be09 3381 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3382 requested_clients.extend(
e7e94f2a 3383 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3384
11f9be09 3385 return orderedSet(requested_clients)
cf7e015f 3386
50ac0e54 3387 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3388 initial_pr = None
3389 if webpage:
b7c47b74 3390 initial_pr = self._search_json(
3391 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3392
ae729626 3393 all_clients = set(clients)
c0bc527b 3394 clients = clients[::-1]
b6de707d 3395 prs = []
e7e94f2a 3396
ae729626 3397 def append_client(*client_names):
e7870111 3398 """ Append the first client name that exists but not already used """
ae729626 3399 for client_name in client_names:
e7870111
D
3400 actual_client = _split_innertube_client(client_name)[0]
3401 if actual_client in INNERTUBE_CLIENTS:
3402 if actual_client not in all_clients:
ae729626 3403 clients.append(client_name)
e7870111
D
3404 all_clients.add(actual_client)
3405 return
e7e94f2a 3406
379e44ed 3407 # Android player_response does not have microFormats which are needed for
3408 # extraction of some data. So we return the initial_pr with formats
3409 # stripped out even if not requested by the user
3410 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3411 if initial_pr:
3412 pr = dict(initial_pr)
3413 pr['streamingData'] = None
b6de707d 3414 prs.append(pr)
379e44ed 3415
3416 last_error = None
b6de707d 3417 tried_iframe_fallback = False
3418 player_url = None
c0bc527b 3419 while clients:
e7870111 3420 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3421 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3422 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3423 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3424
b6de707d 3425 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3426 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3427 if 'js' in self._configuration_arg('player_skip'):
3428 require_js_player = False
3429 player_url = None
3430
3431 if not player_url and not tried_iframe_fallback and require_js_player:
3432 player_url = self._download_player_url(video_id)
3433 tried_iframe_fallback = True
3434
379e44ed 3435 try:
3436 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3437 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3438 except ExtractorError as e:
3439 if last_error:
3440 self.report_warning(last_error)
3441 last_error = e
3442 continue
3443
11f9be09 3444 if pr:
a3e96421 3445 # YouTube may return a different video player response than expected.
3446 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3447 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3448 if pr_video_id and pr_video_id != video_id:
3449 self.report_warning(
c7dcf0b3 3450 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3451 else:
3452 prs.append(pr)
c0bc527b 3453
e7e94f2a 3454 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3455 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3456 append_client(f'{base_client}_creator')
e7e94f2a 3457 elif self._is_agegated(pr):
e7870111
D
3458 if variant == 'tv_embedded':
3459 append_client(f'{base_client}_embedded')
3460 elif not variant:
3461 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3462
379e44ed 3463 if last_error:
b6de707d 3464 if not len(prs):
379e44ed 3465 raise last_error
3466 self.report_warning(last_error)
b6de707d 3467 return prs, player_url
11f9be09 3468
4d37720a
L
3469 def _needs_live_processing(self, live_status, duration):
3470 if (live_status == 'is_live' and self.get_param('live_from_start')
3471 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3472 return live_status
3473
3474 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
a0bb6ce5 3475 itags, stream_ids = {}, []
b25cac65 3476 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3477 q = qualities([
2a9c6dcd 3478 # Normally tiny is the smallest video-only formats. But
3479 # audio-only formats with unknown quality may get tagged as tiny
3480 'tiny',
3481 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3482 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3483 ])
11f9be09 3484 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3485
545cc85d 3486 for fmt in streaming_formats:
727029c5 3487 if fmt.get('targetDurationSec'):
545cc85d 3488 continue
321bf820 3489
cc2db878 3490 itag = str_or_none(fmt.get('itag'))
9297939e 3491 audio_track = fmt.get('audioTrack') or {}
3492 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3493 if stream_id in stream_ids:
3494 continue
3495
cc2db878 3496 quality = fmt.get('quality')
2a9c6dcd 3497 height = int_or_none(fmt.get('height'))
d3fc8074 3498 if quality == 'tiny' or not quality:
3499 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3500 # The 3gp format (17) in android client has a quality of "small",
3501 # but is actually worse than other formats
3502 if itag == '17':
3503 quality = 'tiny'
3504 if quality:
3505 if itag:
3506 itag_qualities[itag] = quality
3507 if height:
3508 res_qualities[height] = quality
cc2db878 3509 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3510 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3511 # number of fragment that would subsequently requested with (`&sq=N`)
3512 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3513 continue
3514
545cc85d 3515 fmt_url = fmt.get('url')
3516 if not fmt_url:
14f25df2 3517 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3518 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3519 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3520 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3521 continue
52023f12 3522 try:
3523 fmt_url += '&%s=%s' % (
3524 traverse_obj(sc, ('sp', -1)) or 'signature',
3525 self._decrypt_signature(encrypted_sig, video_id, player_url)
3526 )
3527 except ExtractorError as e:
580ce007 3528 self.report_warning('Signature extraction failed: Some formats may be missing',
3529 video_id=video_id, only_once=True)
52023f12 3530 self.write_debug(e, only_once=True)
201e9eaa 3531 continue
545cc85d 3532
404f611f 3533 query = parse_qs(fmt_url)
3534 throttled = False
b2916526 3535 if query.get('n'):
404f611f 3536 try:
580ce007 3537 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3538 fmt_url = update_url_query(fmt_url, {
580ce007 3539 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3540 })
404f611f 3541 except ExtractorError as e:
25836db6 3542 phantomjs_hint = ''
3543 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3544 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3545 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3546 if player_url:
3547 self.report_warning(
3548 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3549 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3550 self.write_debug(e, only_once=True)
3551 else:
3552 self.report_warning(
3553 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3554 video_id=video_id, only_once=True)
404f611f 3555 throttled = True
3556
545cc85d 3557 if itag:
a0bb6ce5 3558 itags[itag] = 'https'
9297939e 3559 stream_ids.append(stream_id)
3560
0ad92dfb 3561 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3562 language_preference = (
3563 10 if audio_track.get('audioIsDefault') and 10
3564 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3565 else -1)
0ad92dfb 3566 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3567 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3568 # Make sure to avoid false positives with small duration differences.
62b58c09 3569 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3570 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3571 if is_damaged:
0f06bcd7 3572 self.report_warning(
3573 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3574 dct = {
3575 'asr': int_or_none(fmt.get('audioSampleRate')),
3576 'filesize': int_or_none(fmt.get('contentLength')),
3577 'format_id': itag,
34921b43 3578 'format_note': join_nonempty(
26e8e044 3579 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3580 ' (default)' if language_preference > 0 else ''),
404f611f 3581 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3582 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3583 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3584 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3585 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3586 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3587 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3588 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3589 'height': height,
dca3ff4a 3590 'quality': q(quality),
727029c5 3591 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3592 'tbr': tbr,
545cc85d 3593 'url': fmt_url,
2a9c6dcd 3594 'width': int_or_none(fmt.get('width')),
ab6df717 3595 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3596 'desc' if language_preference < -1 else ''),
3597 'language_preference': language_preference,
a405b38f 3598 # Strictly de-prioritize damaged and 3gp formats
3599 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3600 }
60bdb7bd 3601 mime_mobj = re.match(
3602 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3603 if mime_mobj:
3604 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3605 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3606 no_audio = dct.get('acodec') == 'none'
3607 no_video = dct.get('vcodec') == 'none'
3608 if no_audio:
3609 dct['vbr'] = tbr
3610 if no_video:
3611 dct['abr'] = tbr
3612 if no_audio or no_video:
545cc85d 3613 dct['downloader_options'] = {
3614 # Youtube throttles chunks >~10M
3615 'http_chunk_size': 10485760,
bf1317d2 3616 }
7c60c33e 3617 if dct.get('ext'):
3618 dct['container'] = dct['ext'] + '_dash'
11f9be09 3619 yield dct
545cc85d 3620
4d37720a
L
3621 needs_live_processing = self._needs_live_processing(live_status, duration)
3622 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3623
3624 skip_manifests = set(self._configuration_arg('skip'))
3625 if (not self.get_param('youtube_include_hls_manifest', True)
3626 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3627 or needs_live_processing and skip_bad_formats):
3628 skip_manifests.add('hls')
3629
0f06bcd7 3630 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3631 skip_manifests.add('dash')
3632 if self._configuration_arg('include_live_dash'):
3633 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3634 'Use include_incomplete_formats extractor argument instead')
3635 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3636 skip_manifests.add('dash')
5d3a0e79 3637
a0bb6ce5 3638 def process_manifest_format(f, proto, itag):
3639 if itag in itags:
3640 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3641 return False
3642 itag = f'{itag}-{proto}'
3643 if itag:
3644 f['format_id'] = itag
3645 itags[itag] = proto
3646
b25cac65 3647 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3648 if f['quality'] == -1 and f.get('height'):
3649 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3650 return True
2a9c6dcd 3651
c646d76f 3652 subtitles = {}
11f9be09 3653 for sd in streaming_data:
4d37720a 3654 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3655 if hls_manifest_url:
4d37720a
L
3656 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3657 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3658 subtitles = self._merge_subtitles(subs, subtitles)
3659 for f in fmts:
a0bb6ce5 3660 if process_manifest_format(f, 'hls', self._search_regex(
3661 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3662 yield f
545cc85d 3663
4d37720a 3664 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3665 if dash_manifest_url:
c646d76f 3666 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3667 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3668 for f in formats:
a0bb6ce5 3669 if process_manifest_format(f, 'dash', f['format_id']):
3670 f['filesize'] = int_or_none(self._search_regex(
3671 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3672 if needs_live_processing:
adbc4ec4
THD
3673 f['is_from_start'] = True
3674
a0bb6ce5 3675 yield f
c646d76f 3676 yield subtitles
11f9be09 3677
720c3099 3678 def _extract_storyboard(self, player_responses, duration):
3679 spec = get_first(
3680 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3681 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3682 if not base_url:
720c3099 3683 return
720c3099 3684 L = len(spec) - 1
3685 for i, args in enumerate(spec):
3686 args = args.split('#')
3687 counts = list(map(int_or_none, args[:5]))
3688 if len(args) != 8 or not all(counts):
3689 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3690 continue
3691 width, height, frame_count, cols, rows = counts
3692 N, sigh = args[6:]
3693
3694 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3695 fragment_count = frame_count / (cols * rows)
3696 fragment_duration = duration / fragment_count
3697 yield {
3698 'format_id': f'sb{i}',
3699 'format_note': 'storyboard',
3700 'ext': 'mhtml',
3701 'protocol': 'mhtml',
3702 'acodec': 'none',
3703 'vcodec': 'none',
3704 'url': url,
3705 'width': width,
3706 'height': height,
45e8a04e 3707 'fps': frame_count / duration,
3708 'rows': rows,
3709 'columns': cols,
720c3099 3710 'fragments': [{
b3edc806 3711 'url': url.replace('$M', str(j)),
720c3099 3712 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3713 } for j in range(math.ceil(fragment_count))],
3714 }
3715
adbc4ec4 3716 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3717 webpage = None
3718 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3719 query = {'bpctr': '9999999999', 'has_verified': '1'}
3720 if smuggled_data.get('is_story'):
3721 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3722 webpage = self._download_webpage(
50ac0e54 3723 webpage_url, video_id, fatal=False, query=query)
11f9be09 3724
3725 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3726
b6de707d 3727 player_responses, player_url = self._extract_player_responses(
11f9be09 3728 self._get_requested_clients(url, smuggled_data),
50ac0e54 3729 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3730
adbc4ec4
THD
3731 return webpage, master_ytcfg, player_responses, player_url
3732
a1b2d843 3733 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3734 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3735 is_live = get_first(video_details, 'isLive')
3736 if is_live is None:
3737 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
3738 live_content = get_first(video_details, 'isLiveContent')
3739 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
3740 post_live = get_first(video_details, 'isPostLiveDvr')
3741 live_status = ('post_live' if post_live
3742 else 'is_live' if is_live
3743 else 'is_upcoming' if is_upcoming
6678a4f0 3744 else 'was_live' if live_content
3745 else 'not_live' if False in (is_live, live_content)
3746 else None)
adbc4ec4 3747 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
4d37720a 3748 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
adbc4ec4 3749
4d37720a 3750 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
3751
3752 def _real_extract(self, url):
3753 url, smuggled_data = unsmuggle_url(url, {})
3754 video_id = self._match_id(url)
3755
3756 base_url = self.http_scheme() + '//www.youtube.com/'
3757 webpage_url = base_url + 'watch?v=' + video_id
3758
3759 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3760
11f9be09 3761 playability_statuses = traverse_obj(
3762 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3763
3764 trailer_video_id = get_first(
3765 playability_statuses,
3766 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3767 expected_type=str)
3768 if trailer_video_id:
3769 return self.url_result(
3770 trailer_video_id, self.ie_key(), trailer_video_id)
3771
3772 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3773 if webpage else (lambda x: None))
3774
3775 video_details = traverse_obj(
3776 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3777 microformats = traverse_obj(
3778 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3779 expected_type=dict, default=[])
c26f9b99 3780
3781 translated_title = self._get_text(microformats, (..., 'title'))
3782 video_title = (self._preferred_lang and translated_title
3783 or get_first(video_details, 'title') # primary
3784 or translated_title
3785 or search_meta(['og:title', 'twitter:title', 'title']))
3786 translated_description = self._get_text(microformats, (..., 'description'))
3787 original_description = get_first(video_details, 'shortDescription')
3788 video_description = (
3789 self._preferred_lang and translated_description
3790 # If original description is blank, it will be an empty string.
3791 # Do not prefer translated description in this case.
3792 or original_description if original_description is not None else translated_description)
11f9be09 3793
d89257f3 3794 multifeed_metadata_list = get_first(
3795 player_responses,
3796 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3797 expected_type=str)
3798 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3799 if self.get_param('noplaylist'):
11f9be09 3800 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3801 else:
3802 entries = []
3803 feed_ids = []
3804 for feed in multifeed_metadata_list.split(','):
3805 # Unquote should take place before split on comma (,) since textual
3806 # fields may contain comma as well (see
3807 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3808 feed_data = urllib.parse.parse_qs(
ac668111 3809 urllib.parse.unquote_plus(feed))
d89257f3 3810
3811 def feed_entry(name):
3812 return try_get(
14f25df2 3813 feed_data, lambda x: x[name][0], str)
d89257f3 3814
3815 feed_id = feed_entry('id')
3816 if not feed_id:
3817 continue
3818 feed_title = feed_entry('title')
3819 title = video_title
3820 if feed_title:
3821 title += ' (%s)' % feed_title
3822 entries.append({
3823 '_type': 'url_transparent',
3824 'ie_key': 'Youtube',
3825 'url': smuggle_url(
3826 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3827 {'force_singlefeed': True}),
3828 'title': title,
3829 })
3830 feed_ids.append(feed_id)
3831 self.to_screen(
3832 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3833 % (', '.join(feed_ids), video_id))
3834 return self.playlist_result(
3835 entries, video_id, video_title, video_description)
11f9be09 3836
9da6612b 3837 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3838 or int_or_none(get_first(microformats, 'lengthSeconds'))
3839 or parse_duration(search_meta('duration')) or None)
a1b2d843 3840
4d37720a
L
3841 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3842 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3843 if live_status == 'post_live':
3844 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 3845
545cc85d 3846 if not formats:
11f9be09 3847 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3848 self.report_drm(video_id)
11f9be09 3849 pemr = get_first(
3850 playability_statuses,
3851 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3852 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3853 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3854 if subreason:
545cc85d 3855 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3856 countries = get_first(microformats, 'availableCountries')
545cc85d 3857 if not countries:
3858 regions_allowed = search_meta('regionsAllowed')
3859 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3860 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3861 reason += f'. {subreason}'
545cc85d 3862 if reason:
b7da73eb 3863 self.raise_no_formats(reason, expected=True)
bf1317d2 3864
11f9be09 3865 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3866 if not keywords and webpage:
3867 keywords = [
3868 unescapeHTML(m.group('content'))
3869 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3870 for keyword in keywords:
3871 if keyword.startswith('yt:stretch='):
201c1459 3872 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3873 if mobj:
3874 # NB: float is intentional for forcing float division
3875 w, h = (float(v) for v in mobj.groups())
3876 if w > 0 and h > 0:
3877 ratio = w / h
3878 for f in formats:
3879 if f.get('vcodec') != 'none':
3880 f['stretched_ratio'] = ratio
3881 break
a709d873 3882 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3883 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3884 if thumbnail_url:
3885 thumbnails.append({
3886 'url': thumbnail_url,
ff2751ac 3887 })
fccf5021 3888 original_thumbnails = thumbnails.copy()
3889
0ba692ac 3890 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3891 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3892 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3893 thumbnail_names = [
962ffcf8 3894 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3895 # in resolution, these are not the custom thumbnail. So de-prioritize them
3896 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3897 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3898 ]
cca80fe6 3899 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3900 thumbnails.extend({
3901 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3902 video_id=video_id, name=name, ext=ext,
4d37720a 3903 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 3904 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3905 for thumb in thumbnails:
cca80fe6 3906 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3907 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3908 self._remove_duplicate_formats(thumbnails)
fccf5021 3909 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3910
7ea65411 3911 category = get_first(microformats, 'category') or search_meta('genre')
3912 channel_id = str_or_none(
3913 get_first(video_details, 'channelId')
3914 or get_first(microformats, 'externalChannelId')
3915 or search_meta('channelId'))
7ea65411 3916 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3917
adbc4ec4
THD
3918 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3919 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3920 if not duration and live_end_time and live_start_time:
3921 duration = live_end_time - live_start_time
3922
4d37720a
L
3923 needs_live_processing = self._needs_live_processing(live_status, duration)
3924
3925 def is_bad_format(fmt):
3926 if needs_live_processing and not fmt.get('is_from_start'):
3927 return True
3928 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
3929 and fmt.get('protocol') == 'http_dash_segments'):
3930 return True
3931
3932 for fmt in filter(is_bad_format, formats):
3933 fmt['preference'] = (fmt.get('preference') or -1) - 10
3934 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
3935
3936 if needs_live_processing:
3937 self._prepare_live_from_start_formats(
3938 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 3939
720c3099 3940 formats.extend(self._extract_storyboard(player_responses, duration))
3941
31b532a1 3942 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3943 self._sort_formats(formats, (
3944 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3945
545cc85d 3946 info = {
3947 'id': video_id,
39ca3b5c 3948 'title': video_title,
545cc85d 3949 'formats': formats,
3950 'thumbnails': thumbnails,
fccf5021 3951 # The best thumbnail that we are sure exists. Prevents unnecessary
3952 # URL checking if user don't care about getting the best possible thumbnail
3953 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3954 'description': video_description,
11f9be09 3955 'uploader': get_first(video_details, 'author'),
545cc85d 3956 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3957 'uploader_url': owner_profile_url,
3958 'channel_id': channel_id,
a70635b8 3959 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3960 'duration': duration,
3961 'view_count': int_or_none(
11f9be09 3962 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3963 or search_meta('interactionCount')),
11f9be09 3964 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3965 'age_limit': 18 if (
11f9be09 3966 get_first(microformats, 'isFamilySafe') is False
545cc85d 3967 or search_meta('isFamilyFriendly') == 'false'
3968 or search_meta('og:restrictions:age') == '18+') else 0,
3969 'webpage_url': webpage_url,
3970 'categories': [category] if category else None,
3971 'tags': keywords,
11f9be09 3972 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 3973 'live_status': live_status,
adbc4ec4 3974 'release_timestamp': live_start_time,
545cc85d 3975 }
b477fc13 3976
c646d76f 3977 subtitles = {}
3944e7af 3978 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3979 if pctr:
ecdc9049 3980 def get_lang_code(track):
3981 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3982 or track.get('languageCode'))
3983
3984 # Converted into dicts to remove duplicates
3985 captions = {
3986 get_lang_code(sub): sub
3987 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3988 translation_languages = {
3989 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3990 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3991
774d79cc 3992 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3993 lang_subs = container.setdefault(lang_code, [])
545cc85d 3994 for fmt in self._SUBTITLE_FORMATS:
3995 query.update({
3996 'fmt': fmt,
3997 })
3998 lang_subs.append({
3999 'ext': fmt,
60f393e4 4000 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4001 'name': sub_name,
545cc85d 4002 })
7e72694b 4003
07b47084 4004 # NB: Constructing the full subtitle dictionary is slow
4005 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4006 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4007 for lang_code, caption_track in captions.items():
4008 base_url = caption_track.get('baseUrl')
1235d333 4009 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4010 if not base_url:
4011 continue
ecdc9049 4012 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4013 if caption_track.get('kind') != 'asr':
545cc85d 4014 if not lang_code:
4015 continue
4016 process_language(
ecdc9049 4017 subtitles, base_url, lang_code, lang_name, {})
4018 if not caption_track.get('isTranslatable'):
4019 continue
3944e7af 4020 for trans_code, trans_name in translation_languages.items():
4021 if not trans_code:
545cc85d 4022 continue
1235d333 4023 orig_trans_code = trans_code
ecdc9049 4024 if caption_track.get('kind') != 'asr':
07b47084 4025 if not get_translated_subs:
18e49408 4026 continue
ecdc9049 4027 trans_code += f'-{lang_code}'
a70635b8 4028 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 4029 # Add an "-orig" label to the original language so that it can be distinguished.
4030 # The subs are returned without "-orig" as well for compatibility
1235d333 4031 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 4032 process_language(
d49669ac 4033 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4034 # Setting tlang=lang returns damaged subtitles.
d49669ac 4035 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4036 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4037
4038 info['automatic_captions'] = automatic_captions
4039 info['subtitles'] = subtitles
7e72694b 4040
14f25df2 4041 parsed_url = urllib.parse.urlparse(url)
545cc85d 4042 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4043 query = urllib.parse.parse_qs(component)
545cc85d 4044 for k, v in query.items():
4045 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4046 d_k += '_time'
4047 if d_k not in info and k in s_ks:
4048 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4049
4050 # Youtube Music Auto-generated description
822b9d9c 4051 if video_description:
1890fc63 4052 mobj = re.search(
4053 r'''(?xs)
4054 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4055 (?P<album>[^\n]+)
4056 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4057 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4058 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4059 .+\nAuto-generated\ by\ YouTube\.\s*$
4060 ''', video_description)
822b9d9c 4061 if mobj:
822b9d9c
RA
4062 release_year = mobj.group('release_year')
4063 release_date = mobj.group('release_date')
4064 if release_date:
4065 release_date = release_date.replace('-', '')
4066 if not release_year:
545cc85d 4067 release_year = release_date[:4]
4068 info.update({
4069 'album': mobj.group('album'.strip()),
4070 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4071 'track': mobj.group('track').strip(),
4072 'release_date': release_date,
cc2db878 4073 'release_year': int_or_none(release_year),
545cc85d 4074 })
7e72694b 4075
545cc85d 4076 initial_data = None
4077 if webpage:
56ba69e4 4078 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 4079 if not initial_data:
99e9e001 4080 query = {'videoId': video_id}
4081 query.update(self._get_checkok_params())
109dd3b2 4082 initial_data = self._extract_response(
4083 item_id=video_id, ep='next', fatal=False,
99e9e001 4084 ytcfg=master_ytcfg, query=query,
4085 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4086 note='Downloading initial data API JSON')
545cc85d 4087
0df111a3 4088 info['comment_count'] = traverse_obj(initial_data, (
4089 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4090 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4091 ), (
4092 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4093 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4094 ), expected_type=int_or_none, get_all=False)
4095
19a03940 4096 try: # This will error if there is no livechat
c60ee3a2 4097 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4098 except (KeyError, IndexError, TypeError):
4099 pass
4100 else:
ecdc9049 4101 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4102 # url is needed to set cookies
4103 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4104 'video_id': video_id,
4105 'ext': 'json',
4d37720a
L
4106 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4107 else 'youtube_live_chat_replay'),
c60ee3a2 4108 }]
545cc85d 4109
4110 if initial_data:
7c365c21 4111 info['chapters'] = (
4112 self._extract_chapters_from_json(initial_data, duration)
4113 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4114 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4115 or None)
545cc85d 4116
17322130 4117 contents = traverse_obj(
4118 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4119 expected_type=list, default=[])
4120
4121 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4122 if vpir:
4123 stl = vpir.get('superTitleLink')
4124 if stl:
4125 stl = self._get_text(stl)
4126 if try_get(
4127 vpir,
4128 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4129 info['location'] = stl
4130 else:
affc4fef 4131 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4132 if mobj:
545cc85d 4133 info.update({
17322130 4134 'series': mobj.group(1),
4135 'season_number': int(mobj.group(2)),
4136 'episode_number': int(mobj.group(3)),
545cc85d 4137 })
17322130 4138 for tlb in (try_get(
4139 vpir,
4140 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4141 list) or []):
3ffb2f5b 4142 tbrs = variadic(
4143 traverse_obj(
4144 tlb, 'toggleButtonRenderer',
4145 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4146 default=[]))
4147 for tbr in tbrs:
4148 for getter, regex in [(
4149 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4150 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4151 lambda x: x['accessibility'],
4152 lambda x: x['accessibilityData']['accessibilityData'],
4153 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4154 label = (try_get(tbr, getter, dict) or {}).get('label')
4155 if label:
4156 mobj = re.match(regex, label)
4157 if mobj:
4158 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4159 break
17322130 4160 sbr_tooltip = try_get(
4161 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4162 if sbr_tooltip:
4163 like_count, dislike_count = sbr_tooltip.split(' / ')
4164 info.update({
4165 'like_count': str_to_int(like_count),
4166 'dislike_count': str_to_int(dislike_count),
4167 })
867c66ff
M
4168 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4169 if vcr:
4170 vc = self._get_count(vcr, 'viewCount')
4171 # Upcoming premieres with waiting count are treated as live here
4172 if vcr.get('isLive'):
4173 info['concurrent_view_count'] = vc
4174 elif info.get('view_count') is None:
4175 info['view_count'] = vc
4176
17322130 4177 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4178 if vsir:
4179 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4180 info.update({
4181 'channel': self._get_text(vor, 'title'),
4182 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4183
4184 rows = try_get(
4185 vsir,
4186 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4187 list) or []
4188 multiple_songs = False
4189 for row in rows:
4190 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4191 multiple_songs = True
4192 break
4193 for row in rows:
4194 mrr = row.get('metadataRowRenderer') or {}
4195 mrr_title = mrr.get('title')
4196 if not mrr_title:
4197 continue
4198 mrr_title = self._get_text(mrr, 'title')
4199 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4200 if mrr_title == 'License':
4201 info['license'] = mrr_contents_text
4202 elif not multiple_songs:
4203 if mrr_title == 'Album':
4204 info['album'] = mrr_contents_text
4205 elif mrr_title == 'Artist':
4206 info['artist'] = mrr_contents_text
4207 elif mrr_title == 'Song':
4208 info['track'] = mrr_contents_text
545cc85d 4209
4210 fallbacks = {
4211 'channel': 'uploader',
4212 'channel_id': 'uploader_id',
4213 'channel_url': 'uploader_url',
4214 }
992f9a73 4215
17322130 4216 # The upload date for scheduled, live and past live streams / premieres in microformats
4217 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4218 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4219 upload_date = (
4220 unified_strdate(get_first(microformats, 'uploadDate'))
4221 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4222 if not upload_date or (
4d37720a 4223 live_status in ('not_live', None)
1ff88b7a 4224 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4225 ):
c26f9b99 4226 upload_date = strftime_or_none(
4227 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4228 info['upload_date'] = upload_date
992f9a73 4229
545cc85d 4230 for to, frm in fallbacks.items():
4231 if not info.get(to):
4232 info[to] = info.get(frm)
4233
4234 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4235 v = info.get(s_k)
4236 if v:
4237 info[d_k] = v
b84071c0 4238
c26f9b99 4239 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4240
4241 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4242 or get_first(video_details, 'isPrivate', expected_type=bool))
4243
4244 info['availability'] = (
4245 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4246 else self._availability(
4247 is_private=is_private,
4248 needs_premium=(
4249 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4250 or False if initial_data and is_private is not None else None),
4251 needs_subscription=(
4252 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4253 or False if initial_data and is_private is not None else None),
4254 needs_auth=info['age_limit'] >= 18,
4255 is_unlisted=None if is_private is None else (
4256 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4257 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4258
a2160aa4 4259 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4260
11f9be09 4261 self.mark_watched(video_id, player_responses)
d77ab8e2 4262
545cc85d 4263 return info
c5e8d7af 4264
a61fd4cf 4265
a6213a49 4266class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4267 @staticmethod
4268 def passthrough_smuggled_data(func):
bd7e919a 4269 def _smuggle(info, smuggled_data):
4270 if info.get('_type') not in ('url', 'url_transparent'):
4271 return info
4272 if smuggled_data.get('is_music_url'):
4273 parsed_url = urllib.parse.urlparse(info['url'])
4274 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4275 smuggled_data.pop('is_music_url')
4276 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4277 if smuggled_data:
4278 info['url'] = smuggle_url(info['url'], smuggled_data)
4279 return info
182bda88 4280
4281 @functools.wraps(func)
4282 def wrapper(self, url):
4283 url, smuggled_data = unsmuggle_url(url, {})
4284 if self.is_music_url(url):
4285 smuggled_data['is_music_url'] = True
4286 info_dict = func(self, url, smuggled_data)
bd7e919a 4287 if smuggled_data:
4288 _smuggle(info_dict, smuggled_data)
4289 if info_dict.get('entries'):
a8c754cc 4290 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4291 return info_dict
4292 return wrapper
4293
a6213a49 4294 def _extract_channel_id(self, webpage):
4295 channel_id = self._html_search_meta(
4296 'channelId', webpage, 'channel id', default=None)
4297 if channel_id:
4298 return channel_id
4299 channel_url = self._html_search_meta(
4300 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4301 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4302 'twitter:app:url:googleplay'), webpage, 'channel url')
4303 return self._search_regex(
4304 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4305 channel_url, 'channel id')
15f6397c 4306
8bdd16b4 4307 @staticmethod
cd7c66cf 4308 def _extract_basic_item_renderer(item):
4309 # Modified from _extract_grid_item_renderer
201c1459 4310 known_basic_renderers = (
a17526e4 4311 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4312 )
4313 for key, renderer in item.items():
201c1459 4314 if not isinstance(renderer, dict):
cd7c66cf 4315 continue
201c1459 4316 elif key in known_basic_renderers:
4317 return renderer
4318 elif key.startswith('grid') and key.endswith('Renderer'):
4319 return renderer
8bdd16b4 4320
8bdd16b4 4321 def _grid_entries(self, grid_renderer):
4322 for item in grid_renderer['items']:
4323 if not isinstance(item, dict):
39b62db1 4324 continue
cd7c66cf 4325 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4326 if not isinstance(renderer, dict):
4327 continue
052e1350 4328 title = self._get_text(renderer, 'title')
fe93e2c4 4329
8bdd16b4 4330 # playlist
4331 playlist_id = renderer.get('playlistId')
4332 if playlist_id:
4333 yield self.url_result(
4334 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4335 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4336 video_title=title)
201c1459 4337 continue
8bdd16b4 4338 # video
4339 video_id = renderer.get('videoId')
4340 if video_id:
4341 yield self._extract_video(renderer)
201c1459 4342 continue
8bdd16b4 4343 # channel
4344 channel_id = renderer.get('channelId')
4345 if channel_id:
8bdd16b4 4346 yield self.url_result(
4347 'https://www.youtube.com/channel/%s' % channel_id,
4348 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4349 continue
4350 # generic endpoint URL support
4351 ep_url = urljoin('https://www.youtube.com/', try_get(
4352 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4353 str))
201c1459 4354 if ep_url:
4355 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4356 if ie.suitable(ep_url):
4357 yield self.url_result(
4358 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4359 break
8bdd16b4 4360
16aa9ea4 4361 def _music_reponsive_list_entry(self, renderer):
4362 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4363 if video_id:
4364 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4365 ie=YoutubeIE.ie_key(), video_id=video_id)
4366 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4367 if playlist_id:
4368 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4369 if video_id:
4370 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4371 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4372 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4373 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4374 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4375 if browse_id:
4376 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4377 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4378
3d3dddc9 4379 def _shelf_entries_from_content(self, shelf_renderer):
4380 content = shelf_renderer.get('content')
4381 if not isinstance(content, dict):
8bdd16b4 4382 return
cd7c66cf 4383 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4384 if renderer:
4385 # TODO: add support for nested playlists so each shelf is processed
4386 # as separate playlist
4387 # TODO: this includes only first N items
86e5f3ed 4388 yield from self._grid_entries(renderer)
3d3dddc9 4389 renderer = content.get('horizontalListRenderer')
4390 if renderer:
4391 # TODO
4392 pass
8bdd16b4 4393
29f7c58a 4394 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4395 ep = try_get(
4396 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4397 str)
8bdd16b4 4398 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4399 if shelf_url:
29f7c58a 4400 # Skipping links to another channels, note that checking for
4401 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4402 # will not work
4403 if skip_channels and '/channels?' in shelf_url:
4404 return
052e1350 4405 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4406 yield self.url_result(shelf_url, video_title=title)
4407 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4408 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4409
8bdd16b4 4410 def _playlist_entries(self, video_list_renderer):
4411 for content in video_list_renderer['contents']:
4412 if not isinstance(content, dict):
4413 continue
4414 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4415 if not isinstance(renderer, dict):
4416 continue
4417 video_id = renderer.get('videoId')
4418 if not video_id:
4419 continue
4420 yield self._extract_video(renderer)
07aeced6 4421
3462ffa8 4422 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4423 renderer = traverse_obj(
4424 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
3462ffa8 4425 video_id = renderer.get('videoId')
4426 if not video_id:
4427 return
4428 yield self._extract_video(renderer)
4429
8bdd16b4 4430 def _video_entry(self, video_renderer):
4431 video_id = video_renderer.get('videoId')
4432 if video_id:
4433 return self._extract_video(video_renderer)
dacb3a86 4434
ad210f4f 4435 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4436 url = urljoin('https://youtube.com', traverse_obj(
4437 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4438 if url:
4439 return self.url_result(
4440 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4441
8bdd16b4 4442 def _post_thread_entries(self, post_thread_renderer):
4443 post_renderer = try_get(
4444 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4445 if not post_renderer:
4446 return
4447 # video attachment
4448 video_renderer = try_get(
895b0931 4449 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4450 video_id = video_renderer.get('videoId')
4451 if video_id:
4452 entry = self._extract_video(video_renderer)
8bdd16b4 4453 if entry:
4454 yield entry
895b0931 4455 # playlist attachment
4456 playlist_id = try_get(
14f25df2 4457 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4458 if playlist_id:
4459 yield self.url_result(
e28f1c0a 4460 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4461 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4462 # inline video links
4463 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4464 for run in runs:
4465 if not isinstance(run, dict):
4466 continue
4467 ep_url = try_get(
14f25df2 4468 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4469 if not ep_url:
4470 continue
4471 if not YoutubeIE.suitable(ep_url):
4472 continue
4473 ep_video_id = YoutubeIE._match_id(ep_url)
4474 if video_id == ep_video_id:
4475 continue
895b0931 4476 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4477
8bdd16b4 4478 def _post_thread_continuation_entries(self, post_thread_continuation):
4479 contents = post_thread_continuation.get('contents')
4480 if not isinstance(contents, list):
4481 return
4482 for content in contents:
4483 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4484 if isinstance(renderer, dict):
4485 yield from self._post_thread_entries(renderer)
8bdd16b4 4486 continue
6b0b0a28 4487 renderer = content.get('videoRenderer')
4488 if isinstance(renderer, dict):
4489 yield self._video_entry(renderer)
07aeced6 4490
39ed931e 4491 r''' # unused
4492 def _rich_grid_entries(self, contents):
4493 for content in contents:
4494 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4495 if video_renderer:
4496 entry = self._video_entry(video_renderer)
4497 if entry:
4498 yield entry
4499 '''
52efa4b3 4500
0a5095fe 4501 def _report_history_entries(self, renderer):
4502 for url in traverse_obj(renderer, (
7a32c70d 4503 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4504 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4505 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4506 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4507
a6213a49 4508 def _extract_entries(self, parent_renderer, continuation_list):
4509 # continuation_list is modified in-place with continuation_list = [continuation_token]
4510 continuation_list[:] = [None]
4511 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4512 for content in contents:
4513 if not isinstance(content, dict):
4514 continue
16aa9ea4 4515 is_renderer = traverse_obj(
4516 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4517 expected_type=dict)
a6213a49 4518 if not is_renderer:
0a5095fe 4519 if content.get('richItemRenderer'):
4520 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4521 yield entry
4522 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4523 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4524 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4525 yield from self._report_history_entries(table)
4526 continuation_list[0] = self._extract_continuation(table)
a6213a49 4527 continue
0a5095fe 4528
a6213a49 4529 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4530 for isr_content in isr_contents:
4531 if not isinstance(isr_content, dict):
8bdd16b4 4532 continue
69184e41 4533
a6213a49 4534 known_renderers = {
4535 'playlistVideoListRenderer': self._playlist_entries,
4536 'gridRenderer': self._grid_entries,
a17526e4 4537 'reelShelfRenderer': self._grid_entries,
4538 'shelfRenderer': self._shelf_entries,
16aa9ea4 4539 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4540 'backstagePostThreadRenderer': self._post_thread_entries,
4541 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4542 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4543 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4544 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4545 }
4546 for key, renderer in isr_content.items():
4547 if key not in known_renderers:
4548 continue
4549 for entry in known_renderers[key](renderer):
4550 if entry:
4551 yield entry
4552 continuation_list[0] = self._extract_continuation(renderer)
4553 break
70d5c17b 4554
4555 if not continuation_list[0]:
a6213a49 4556 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4557
a6213a49 4558 if not continuation_list[0]:
4559 continuation_list[0] = self._extract_continuation(parent_renderer)
4560
4561 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4562 continuation_list = [None]
4563 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4564 tab_content = try_get(tab, lambda x: x['content'], dict)
4565 if not tab_content:
4566 return
3462ffa8 4567 parent_renderer = (
29f7c58a 4568 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4569 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4570 yield from extract_entries(parent_renderer)
3462ffa8 4571 continuation = continuation_list[0]
d069eca7 4572
8bdd16b4 4573 for page_num in itertools.count(1):
4574 if not continuation:
4575 break
99e9e001 4576 headers = self.generate_api_headers(
4577 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4578 response = self._extract_response(
86e5f3ed 4579 item_id=f'{item_id} page {page_num}',
fe93e2c4 4580 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4581 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4582
4583 if not response:
8bdd16b4 4584 break
ac56cf38 4585 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4586 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4587 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4588
a1b535bd 4589 known_renderers = {
e4b98809 4590 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4591 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4592 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4593 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4594 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4595 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4596 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4597 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4598 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4599 'playlistVideoListContinuation': (self._playlist_entries, None),
4600 'gridContinuation': (self._grid_entries, None),
4601 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4602 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4603 }
1fb53b94 4604
4605 continuation_items = traverse_obj(response, (
4606 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4607 'appendContinuationItemsAction', 'continuationItems'
4608 ), 'continuationContents', get_all=False)
4609 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4610
a1b535bd 4611 video_items_renderer = None
1fb53b94 4612 for key in continuation_item.keys():
a1b535bd 4613 if key not in known_renderers:
8bdd16b4 4614 continue
1fb53b94 4615 func, parent_key = known_renderers[key]
4616 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4617 continuation_list = [None]
1fb53b94 4618 yield from func(video_items_renderer)
9ba5705a 4619 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4620
4621 if not video_items_renderer:
a1b535bd 4622 break
9558dcec 4623
8bdd16b4 4624 @staticmethod
7c219ea6 4625 def _extract_selected_tab(tabs, fatal=True):
86973308
M
4626 for tab_renderer in tabs:
4627 if tab_renderer.get('selected'):
4628 return tab_renderer
4629 if fatal:
4630 raise ExtractorError('Unable to find selected tab')
4631
4632 @staticmethod
4633 def _extract_tab_renderers(response):
4634 return traverse_obj(
4635 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 4636
ac56cf38 4637 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 4638 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 4639
8bdd16b4 4640 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 4641 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4642 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4643
4644 return self.playlist_result(
4645 self._entries(
4646 selected_tab, metadata['id'], ytcfg,
4647 self._extract_account_syncid(ytcfg, data),
4648 self._extract_visitor_data(data, ytcfg)),
4649 **metadata)
39ed931e 4650
bd7e919a 4651 def _extract_metadata_from_tabs(self, item_id, data):
4652 info = {'id': item_id}
4653
4654 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 4655 if metadata_renderer:
bd7e919a 4656 info.update({
4657 'uploader': metadata_renderer.get('title'),
4658 'uploader_id': metadata_renderer.get('externalId'),
4659 'uploader_url': metadata_renderer.get('channelUrl'),
4660 })
4661 if info['uploader_id']:
4662 info['id'] = info['uploader_id']
4663 else:
4664 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 4665
301d07fc 4666 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4667 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4668 def _get_uncropped(url):
4669 return url_or_none((url or '').split('=')[0] + '=s0')
4670
6141346d 4671 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 4672 if avatar_thumbnails:
4673 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4674 if uncropped_avatar:
4675 avatar_thumbnails.append({
4676 'url': uncropped_avatar,
4677 'id': 'avatar_uncropped',
4678 'preference': 1
4679 })
4680
4681 channel_banners = self._extract_thumbnails(
bd7e919a 4682 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 4683 for banner in channel_banners:
4684 banner['preference'] = -10
4685
4686 if channel_banners:
4687 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4688 if uncropped_banner:
4689 channel_banners.append({
4690 'url': uncropped_banner,
4691 'id': 'banner_uncropped',
4692 'preference': -5
4693 })
4694
bd7e919a 4695 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4696 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4697 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4698
301d07fc 4699 primary_thumbnails = self._extract_thumbnails(
a17526e4 4700 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
4701 playlist_thumbnails = self._extract_thumbnails(
4702 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4703
bd7e919a 4704 info.update({
4705 'title': (traverse_obj(metadata_renderer, 'title')
4706 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4707 or info['id']),
4708 'availability': self._extract_availability(data),
4709 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4710 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4711 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4712 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4713 })
f0d785d3 4714
6141346d
M
4715 # Playlist stats is a text runs array containing [video count, view count, last updated].
4716 # last updated or (view count and last updated) may be missing.
4717 playlist_stats = get_first(
bd7e919a 4718 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4719
6141346d
M
4720 last_updated_unix = self._parse_time_text(
4721 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4722 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
bd7e919a 4723 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
6141346d 4724
bd7e919a 4725 info['view_count'] = self._get_count(playlist_stats, 1)
4726 if info['view_count'] is None: # 0 is allowed
4727 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4728
4729 info['playlist_count'] = self._get_count(playlist_stats, 0)
4730 if info['playlist_count'] is None: # 0 is allowed
4731 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4732
4733 if not info.get('uploader_id'):
6141346d 4734 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 4735 if not owner: # Deprecated
6141346d
M
4736 owner = traverse_obj(
4737 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4738 ('videoOwner', 'videoOwnerRenderer', 'title'))
4739 owner_text = self._get_text(owner)
4740 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 4741 info.update({
6141346d
M
4742 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4743 'uploader_id': browse_ep.get('browseId'),
4744 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
bd7e919a 4745 })
6141346d 4746
bd7e919a 4747 info.update({
4748 'channel': info['uploader'],
4749 'channel_id': info['uploader_id'],
4750 'channel_url': info['uploader_url']
4751 })
4752 return info
73c4ac2c 4753
6e634cbe 4754 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4755 first_id = last_id = response = None
2be71994 4756 for page_num in itertools.count(1):
cd7c66cf 4757 videos = list(self._playlist_entries(playlist))
4758 if not videos:
4759 return
2be71994 4760 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4761 if start >= len(videos):
4762 return
24146491 4763 yield from videos[start:]
2be71994 4764 first_id = first_id or videos[0]['id']
4765 last_id = videos[-1]['id']
79360d99 4766 watch_endpoint = try_get(
4767 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4768 headers = self.generate_api_headers(
4769 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4770 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4771 query = {
4772 'playlistId': playlist_id,
4773 'videoId': watch_endpoint.get('videoId') or last_id,
4774 'index': watch_endpoint.get('index') or len(videos),
4775 'params': watch_endpoint.get('params') or 'OAE%3D'
4776 }
4777 response = self._extract_response(
4778 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4779 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4780 check_get_keys='contents'
4781 )
cd7c66cf 4782 playlist = try_get(
79360d99 4783 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4784
ac56cf38 4785 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4786 title = playlist.get('title') or try_get(
14f25df2 4787 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4788 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4789
4790 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4791 playlist_url = urljoin(url, try_get(
4792 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4793 str))
6e634cbe 4794
4795 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4796 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4797 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4798
4799 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4800 return self.url_result(
4801 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4802 video_title=title)
cd7c66cf 4803
8bdd16b4 4804 return self.playlist_result(
6e634cbe 4805 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4806 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4807
47193e02 4808 def _extract_availability(self, data):
4809 """
4810 Gets the availability of a given playlist/tab.
4811 Note: Unless YouTube tells us explicitly, we do not assume it is public
4812 @param data: response
4813 """
6141346d
M
4814 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4815 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4816 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 4817
6141346d 4818 badges = self._extract_badges(sidebar_renderer)
47193e02 4819
4820 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
4821 privacy_setting_icon = get_first(
4822 (playlist_header_renderer, sidebar_renderer),
4823 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4824 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4825 expected_type=str)
4826
4827 microformats_is_unlisted = traverse_obj(
4828 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 4829
c26f9b99 4830 return (
4831 'public' if (
4832 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4833 or player_header_privacy == 'PUBLIC'
4834 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4835 else self._availability(
4836 is_private=(
4837 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4838 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4839 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4840 is_unlisted=(
4841 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4842 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
4843 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4844 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 4845 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4846 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4847 needs_auth=False))
47193e02 4848
4849 @staticmethod
4850 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4851 sidebar_renderer = try_get(
4852 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4853 for item in sidebar_renderer:
4854 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4855 if renderer:
4856 return renderer
4857
ac56cf38 4858 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4859 """
6141346d 4860 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 4861 """
6141346d
M
4862 is_playlist = bool(traverse_obj(
4863 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
4864 if not is_playlist:
47193e02 4865 return
11f9be09 4866 headers = self.generate_api_headers(
99e9e001 4867 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4868 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4869 query = {
6141346d
M
4870 'params': 'wgYCCAA=',
4871 'browseId': f'VL{item_id}'
47193e02 4872 }
4873 return self._extract_response(
4874 item_id=item_id, headers=headers, query=query,
fe93e2c4 4875 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 4876 note='Redownloading playlist API JSON with unavailable videos')
358de58c 4877
2762dbb1 4878 @functools.cached_property
a25bca9f 4879 def skip_webpage(self):
4880 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4881
ac56cf38 4882 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4883 webpage, data = None, None
4884 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4885 try:
be5c1ae8 4886 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4887 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4888 except ExtractorError as e:
4889 if isinstance(e.cause, network_exceptions):
14f25df2 4890 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4891 retry.error = e
4892 continue
4893 self._error_or_warning(e, fatal=fatal)
14fdfea9 4894 break
ac56cf38 4895
be5c1ae8 4896 try:
4897 self._extract_and_report_alerts(data)
4898 except ExtractorError as e:
4899 self._error_or_warning(e, fatal=fatal)
4900 break
ac56cf38 4901
be5c1ae8 4902 # Sometimes youtube returns a webpage with incomplete ytInitialData
4903 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4904 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4905 retry.error = ExtractorError('Incomplete yt initial data received')
4906 continue
ac56cf38 4907
cd7c66cf 4908 return webpage, data
4909
a25bca9f 4910 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4911 """Use if failed to extract ytcfg (and data) from initial webpage"""
4912 if not ytcfg and self.is_authenticated:
4913 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4914 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4915 raise ExtractorError(
4916 f'{msg}. If you are not downloading private content, or '
4917 'your cookies are only for the first account and channel,'
4918 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4919 expected=True)
4920 self.report_warning(msg, only_once=True)
4921
ac56cf38 4922 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4923 data = None
a25bca9f 4924 if not self.skip_webpage:
ac56cf38 4925 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4926 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4927 # Reject webpage data if redirected to home page without explicitly requesting
86973308 4928 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 4929 if (url != 'https://www.youtube.com/feed/recommended'
4930 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4931 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4932 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4933 if fatal:
4934 raise ExtractorError(msg, expected=True)
4935 self.report_warning(msg, only_once=True)
ac56cf38 4936 if not data:
a25bca9f 4937 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4938 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4939 return data, ytcfg
4940
4941 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4942 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4943 resolve_response = self._extract_response(
4944 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4945 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4946 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4947 for ep_key, ep in endpoints.items():
4948 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4949 if params:
4950 return self._extract_response(
4951 item_id=item_id, query=params, ep=ep, headers=headers,
4952 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4953 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4954 err_note = 'Failed to resolve url (does the playlist exist?)'
4955 if fatal:
4956 raise ExtractorError(err_note, expected=True)
4957 self.report_warning(err_note, item_id)
4958
a6213a49 4959 _SEARCH_PARAMS = None
4960
af5c1c55 4961 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4962 data = {'query': query}
4963 if params is NO_DEFAULT:
4964 params = self._SEARCH_PARAMS
4965 if params:
4966 data['params'] = params
16aa9ea4 4967
4968 content_keys = (
4969 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4970 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4971 # ytmusic search
4972 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4973 ('continuationContents', ),
4974 )
a25bca9f 4975 display_id = f'query "{query}"'
86e5f3ed 4976 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4977 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4978 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4979
a61fd4cf 4980 continuation_list = [None]
a25bca9f 4981 search = None
a6213a49 4982 for page_num in itertools.count(1):
a61fd4cf 4983 data.update(continuation_list[0] or {})
a25bca9f 4984 headers = self.generate_api_headers(
4985 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4986 search = self._extract_response(
a25bca9f 4987 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4988 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4989 slr_contents = traverse_obj(search, *content_keys)
4990 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4991 if not continuation_list[0]:
a6213a49 4992 break
4993
4994
4995class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4996 IE_DESC = 'YouTube Tabs'
4997 _VALID_URL = r'''(?x:
4998 https?://
4999 (?:\w+\.)?
5000 (?:
5001 youtube(?:kids)?\.com|
5002 %(invidious)s
5003 )/
5004 (?:
5005 (?P<channel_type>channel|c|user|browse)/|
5006 (?P<not_channel>
5007 feed/|hashtag/|
5008 (?:playlist|watch)\?.*?\blist=
5009 )|
5010 (?!(?:%(reserved_names)s)\b) # Direct URLs
5011 )
5012 (?P<id>[^/?\#&]+)
5013 )''' % {
5014 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5015 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5016 }
5017 IE_NAME = 'youtube:tab'
5018
5019 _TESTS = [{
5020 'note': 'playlists, multipage',
5021 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5022 'playlist_mincount': 94,
5023 'info_dict': {
5024 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5025 'title': 'Igor Kleiner - Playlists',
a6213a49 5026 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5027 'uploader': 'Igor Kleiner',
a6213a49 5028 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5029 'channel': 'Igor Kleiner',
5030 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5031 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5032 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5033 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5034 'channel_follower_count': int
a6213a49 5035 },
5036 }, {
5037 'note': 'playlists, multipage, different order',
5038 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5039 'playlist_mincount': 94,
5040 'info_dict': {
5041 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5042 'title': 'Igor Kleiner - Playlists',
a6213a49 5043 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5045 'uploader': 'Igor Kleiner',
5046 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5047 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5048 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5049 'channel': 'Igor Kleiner',
5050 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5051 'channel_follower_count': int
a6213a49 5052 },
5053 }, {
5054 'note': 'playlists, series',
5055 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5056 'playlist_mincount': 5,
5057 'info_dict': {
5058 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5059 'title': '3Blue1Brown - Playlists',
5060 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5061 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5062 'uploader': '3Blue1Brown',
976ae3ea 5063 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5064 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5065 'channel': '3Blue1Brown',
5066 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5067 'tags': ['Mathematics'],
6c73052c 5068 'channel_follower_count': int
a6213a49 5069 },
5070 }, {
5071 'note': 'playlists, singlepage',
5072 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5073 'playlist_mincount': 4,
5074 'info_dict': {
5075 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5076 'title': 'ThirstForScience - Playlists',
5077 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5078 'uploader': 'ThirstForScience',
5079 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5080 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5081 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5082 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5083 'tags': 'count:13',
5084 'channel': 'ThirstForScience',
6c73052c 5085 'channel_follower_count': int
a6213a49 5086 }
5087 }, {
5088 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5089 'only_matching': True,
5090 }, {
5091 'note': 'basic, single video playlist',
5092 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5093 'info_dict': {
5094 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5095 'uploader': 'Sergey M.',
5096 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5097 'title': 'youtube-dl public playlist',
976ae3ea 5098 'description': '',
5099 'tags': [],
5100 'view_count': int,
5101 'modified_date': '20201130',
5102 'channel': 'Sergey M.',
5103 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5104 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5105 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5106 'availability': 'public',
a6213a49 5107 },
5108 'playlist_count': 1,
5109 }, {
5110 'note': 'empty playlist',
5111 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5112 'info_dict': {
5113 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5114 'uploader': 'Sergey M.',
5115 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5116 'title': 'youtube-dl empty playlist',
976ae3ea 5117 'tags': [],
5118 'channel': 'Sergey M.',
5119 'description': '',
5120 'modified_date': '20160902',
5121 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5122 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5123 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5124 'availability': 'public',
a6213a49 5125 },
5126 'playlist_count': 0,
5127 }, {
5128 'note': 'Home tab',
5129 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5130 'info_dict': {
5131 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5132 'title': 'lex will - Home',
5133 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5134 'uploader': 'lex will',
5135 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5136 'channel': 'lex will',
5137 'tags': ['bible', 'history', 'prophesy'],
5138 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5139 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5140 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5141 'channel_follower_count': int
a6213a49 5142 },
5143 'playlist_mincount': 2,
5144 }, {
5145 'note': 'Videos tab',
5146 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5147 'info_dict': {
5148 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5149 'title': 'lex will - Videos',
5150 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5151 'uploader': 'lex will',
5152 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5153 'tags': ['bible', 'history', 'prophesy'],
5154 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5155 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5156 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5157 'channel': 'lex will',
6c73052c 5158 'channel_follower_count': int
a6213a49 5159 },
5160 'playlist_mincount': 975,
5161 }, {
5162 'note': 'Videos tab, sorted by popular',
5163 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5164 'info_dict': {
5165 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5166 'title': 'lex will - Videos',
5167 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5168 'uploader': 'lex will',
5169 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5170 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5171 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5172 'channel': 'lex will',
5173 'tags': ['bible', 'history', 'prophesy'],
5174 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5175 'channel_follower_count': int
a6213a49 5176 },
5177 'playlist_mincount': 199,
5178 }, {
5179 'note': 'Playlists tab',
5180 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5181 'info_dict': {
5182 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5183 'title': 'lex will - Playlists',
5184 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5185 'uploader': 'lex will',
5186 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5187 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5188 'channel': 'lex will',
5189 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5190 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5191 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5192 'channel_follower_count': int
a6213a49 5193 },
5194 'playlist_mincount': 17,
5195 }, {
5196 'note': 'Community tab',
5197 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5198 'info_dict': {
5199 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5200 'title': 'lex will - Community',
5201 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5202 'uploader': 'lex will',
5203 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5204 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5205 'channel': 'lex will',
5206 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5207 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5208 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5209 'channel_follower_count': int
a6213a49 5210 },
5211 'playlist_mincount': 18,
5212 }, {
5213 'note': 'Channels tab',
5214 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5215 'info_dict': {
5216 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5217 'title': 'lex will - Channels',
5218 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5219 'uploader': 'lex will',
5220 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5221 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5222 'channel': 'lex will',
5223 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5224 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5225 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5226 'channel_follower_count': int
a6213a49 5227 },
5228 'playlist_mincount': 12,
5229 }, {
5230 'note': 'Search tab',
5231 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5232 'playlist_mincount': 40,
5233 'info_dict': {
5234 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5235 'title': '3Blue1Brown - Search - linear algebra',
5236 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5237 'uploader': '3Blue1Brown',
5238 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5239 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5240 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5241 'tags': ['Mathematics'],
5242 'channel': '3Blue1Brown',
5243 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 5244 'channel_follower_count': int
a6213a49 5245 },
5246 }, {
5247 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5248 'only_matching': True,
5249 }, {
5250 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5251 'only_matching': True,
5252 }, {
5253 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5254 'only_matching': True,
5255 }, {
5256 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5257 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5258 'info_dict': {
5259 'title': '29C3: Not my department',
5260 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5261 'uploader': 'Christiaan008',
5262 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5263 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5264 'tags': [],
5265 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5266 'view_count': int,
5267 'modified_date': '20150605',
5268 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5269 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5270 'channel': 'Christiaan008',
c26f9b99 5271 'availability': 'public',
a6213a49 5272 },
5273 'playlist_count': 96,
5274 }, {
5275 'note': 'Large playlist',
5276 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5277 'info_dict': {
5278 'title': 'Uploads from Cauchemar',
5279 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5280 'uploader': 'Cauchemar',
5281 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5282 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5283 'tags': [],
5284 'modified_date': r're:\d{8}',
5285 'channel': 'Cauchemar',
5286 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5287 'view_count': int,
5288 'description': '',
5289 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5290 'availability': 'public',
a6213a49 5291 },
5292 'playlist_mincount': 1123,
976ae3ea 5293 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5294 }, {
5295 'note': 'even larger playlist, 8832 videos',
5296 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5297 'only_matching': True,
5298 }, {
5299 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5300 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5301 'info_dict': {
5302 'title': 'Uploads from Interstellar Movie',
5303 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5304 'uploader': 'Interstellar Movie',
5305 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5306 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5307 'tags': [],
5308 'view_count': int,
5309 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5310 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5311 'channel': 'Interstellar Movie',
5312 'description': '',
5313 'modified_date': r're:\d{8}',
c26f9b99 5314 'availability': 'public',
a6213a49 5315 },
5316 'playlist_mincount': 21,
5317 }, {
5318 'note': 'Playlist with "show unavailable videos" button',
5319 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5320 'info_dict': {
5321 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5322 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5323 'uploader': 'Phim Siêu Nhân Nhật Bản',
5324 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5325 'view_count': int,
5326 'channel': 'Phim Siêu Nhân Nhật Bản',
5327 'tags': [],
5328 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5329 'description': '',
5330 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5331 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5332 'modified_date': r're:\d{8}',
c26f9b99 5333 'availability': 'public',
a6213a49 5334 },
5335 'playlist_mincount': 200,
976ae3ea 5336 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5337 }, {
5338 'note': 'Playlist with unavailable videos in page 7',
5339 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5340 'info_dict': {
5341 'title': 'Uploads from BlankTV',
5342 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5343 'uploader': 'BlankTV',
5344 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5345 'channel': 'BlankTV',
5346 'channel_url': 'https://www.youtube.com/c/blanktv',
5347 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5348 'view_count': int,
5349 'tags': [],
5350 'uploader_url': 'https://www.youtube.com/c/blanktv',
5351 'modified_date': r're:\d{8}',
5352 'description': '',
c26f9b99 5353 'availability': 'public',
a6213a49 5354 },
5355 'playlist_mincount': 1000,
976ae3ea 5356 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5357 }, {
5358 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5359 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5360 'info_dict': {
5361 'title': 'Data Analysis with Dr Mike Pound',
5362 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5363 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5364 'uploader': 'Computerphile',
5365 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5366 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5367 'tags': [],
5368 'view_count': int,
5369 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5370 'channel_url': 'https://www.youtube.com/user/Computerphile',
5371 'channel': 'Computerphile',
c26f9b99 5372 'availability': 'public',
6141346d 5373 'modified_date': '20190712',
a6213a49 5374 },
5375 'playlist_mincount': 11,
5376 }, {
5377 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5378 'only_matching': True,
5379 }, {
5380 'note': 'Playlist URL that does not actually serve a playlist',
5381 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5382 'info_dict': {
5383 'id': 'FqZTN594JQw',
5384 'ext': 'webm',
5385 'title': "Smiley's People 01 detective, Adventure Series, Action",
5386 'uploader': 'STREEM',
5387 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5388 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5389 'upload_date': '20150526',
5390 'license': 'Standard YouTube License',
5391 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5392 'categories': ['People & Blogs'],
5393 'tags': list,
5394 'view_count': int,
5395 'like_count': int,
a6213a49 5396 },
5397 'params': {
5398 'skip_download': True,
5399 },
5400 'skip': 'This video is not available.',
5401 'add_ie': [YoutubeIE.ie_key()],
5402 }, {
5403 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5404 'only_matching': True,
5405 }, {
5406 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5407 'only_matching': True,
5408 }, {
5409 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5410 'info_dict': {
12a1b225 5411 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5412 'ext': 'mp4',
976ae3ea 5413 'title': str,
a6213a49 5414 'uploader': 'Sky News',
5415 'uploader_id': 'skynews',
5416 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5417 'upload_date': r're:\d{8}',
976ae3ea 5418 'description': str,
a6213a49 5419 'categories': ['News & Politics'],
5420 'tags': list,
5421 'like_count': int,
86973308 5422 'release_timestamp': int,
976ae3ea 5423 'channel': 'Sky News',
5424 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5425 'age_limit': 0,
5426 'view_count': int,
86973308 5427 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5428 'playable_in_embed': True,
86973308 5429 'release_date': r're:\d+',
976ae3ea 5430 'availability': 'public',
5431 'live_status': 'is_live',
5432 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5433 'channel_follower_count': int,
5434 'concurrent_view_count': int,
a6213a49 5435 },
5436 'params': {
5437 'skip_download': True,
5438 },
976ae3ea 5439 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5440 }, {
5441 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5442 'info_dict': {
5443 'id': 'a48o2S1cPoo',
5444 'ext': 'mp4',
5445 'title': 'The Young Turks - Live Main Show',
5446 'uploader': 'The Young Turks',
5447 'uploader_id': 'TheYoungTurks',
5448 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5449 'upload_date': '20150715',
5450 'license': 'Standard YouTube License',
5451 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5452 'categories': ['News & Politics'],
5453 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5454 'like_count': int,
a6213a49 5455 },
5456 'params': {
5457 'skip_download': True,
5458 },
5459 'only_matching': True,
5460 }, {
5461 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5462 'only_matching': True,
5463 }, {
5464 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5465 'only_matching': True,
5466 }, {
5467 'note': 'A channel that is not live. Should raise error',
5468 'url': 'https://www.youtube.com/user/numberphile/live',
5469 'only_matching': True,
5470 }, {
5471 'url': 'https://www.youtube.com/feed/trending',
5472 'only_matching': True,
5473 }, {
5474 'url': 'https://www.youtube.com/feed/library',
5475 'only_matching': True,
5476 }, {
5477 'url': 'https://www.youtube.com/feed/history',
5478 'only_matching': True,
5479 }, {
5480 'url': 'https://www.youtube.com/feed/subscriptions',
5481 'only_matching': True,
5482 }, {
5483 'url': 'https://www.youtube.com/feed/watch_later',
5484 'only_matching': True,
5485 }, {
5486 'note': 'Recommended - redirects to home page.',
5487 'url': 'https://www.youtube.com/feed/recommended',
5488 'only_matching': True,
5489 }, {
5490 'note': 'inline playlist with not always working continuations',
5491 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5492 'only_matching': True,
5493 }, {
5494 'url': 'https://www.youtube.com/course',
5495 'only_matching': True,
5496 }, {
5497 'url': 'https://www.youtube.com/zsecurity',
5498 'only_matching': True,
5499 }, {
5500 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5501 'only_matching': True,
5502 }, {
5503 'url': 'https://www.youtube.com/TheYoungTurks/live',
5504 'only_matching': True,
5505 }, {
5506 'url': 'https://www.youtube.com/hashtag/cctv9',
5507 'info_dict': {
5508 'id': 'cctv9',
5509 'title': '#cctv9',
976ae3ea 5510 'tags': [],
a6213a49 5511 },
4dc23a80 5512 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5513 }, {
5514 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5515 'only_matching': True,
5516 }, {
5517 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5518 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5519 'only_matching': True
5520 }, {
5521 'note': '/browse/ should redirect to /channel/',
5522 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5523 'only_matching': True
5524 }, {
5525 'note': 'VLPL, should redirect to playlist?list=PL...',
5526 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5527 'info_dict': {
5528 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5529 'uploader': 'NoCopyrightSounds',
5530 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5531 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5532 'title': 'NCS : All Releases 💿',
976ae3ea 5533 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5534 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5535 'modified_date': r're:\d{8}',
5536 'view_count': int,
5537 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5538 'tags': [],
5539 'channel': 'NoCopyrightSounds',
c26f9b99 5540 'availability': 'public',
a6213a49 5541 },
5542 'playlist_mincount': 166,
976ae3ea 5543 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5544 }, {
5545 'note': 'Topic, should redirect to playlist?list=UU...',
5546 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5547 'info_dict': {
5548 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5549 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5550 'title': 'Uploads from Royalty Free Music - Topic',
5551 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5552 'tags': [],
5553 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5554 'channel': 'Royalty Free Music - Topic',
5555 'view_count': int,
5556 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5557 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5558 'modified_date': r're:\d{8}',
5559 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5560 'description': '',
c26f9b99 5561 'availability': 'public',
a6213a49 5562 },
a6213a49 5563 'playlist_mincount': 101,
5564 }, {
86973308
M
5565 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5566 # Treat as a general feed
a6213a49 5567 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5568 'info_dict': {
5569 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5570 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5571 'tags': [],
a6213a49 5572 },
a6213a49 5573 'playlist_mincount': 9,
5574 }, {
5575 'note': 'Youtube music Album',
5576 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5577 'info_dict': {
5578 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5579 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5580 'tags': [],
5581 'view_count': int,
5582 'description': '',
5583 'availability': 'unlisted',
5584 'modified_date': r're:\d{8}',
a6213a49 5585 },
5586 'playlist_count': 50,
5587 }, {
5588 'note': 'unlisted single video playlist',
5589 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5590 'info_dict': {
5591 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5592 'uploader': 'colethedj',
5593 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5594 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5595 'availability': 'unlisted',
5596 'tags': [],
12a1b225 5597 'modified_date': '20220418',
976ae3ea 5598 'channel': 'colethedj',
5599 'view_count': int,
5600 'description': '',
5601 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5602 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5603 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5604 },
5605 'playlist_count': 1,
5606 }, {
5607 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5608 'url': 'https://www.youtube.com/feed/recommended',
5609 'info_dict': {
5610 'id': 'recommended',
5611 'title': 'recommended',
6c73052c 5612 'tags': [],
a6213a49 5613 },
5614 'playlist_mincount': 50,
5615 'params': {
5616 'skip_download': True,
5617 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5618 },
5619 }, {
5620 'note': 'API Fallback: /videos tab, sorted by oldest first',
5621 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5622 'info_dict': {
5623 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5624 'title': 'Cody\'sLab - Videos',
5625 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5626 'uploader': 'Cody\'sLab',
5627 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5628 'channel': 'Cody\'sLab',
5629 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5630 'tags': [],
5631 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5632 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5633 'channel_follower_count': int
a6213a49 5634 },
5635 'playlist_mincount': 650,
5636 'params': {
5637 'skip_download': True,
5638 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5639 },
86973308 5640 'skip': 'Query for sorting no longer works',
a6213a49 5641 }, {
5642 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5643 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5644 'info_dict': {
5645 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5646 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5647 'title': 'Uploads from Royalty Free Music - Topic',
5648 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5649 'modified_date': r're:\d{8}',
5650 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5651 'description': '',
5652 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5653 'tags': [],
5654 'channel': 'Royalty Free Music - Topic',
5655 'view_count': int,
5656 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
c26f9b99 5657 'availability': 'public',
a6213a49 5658 },
a6213a49 5659 'playlist_mincount': 101,
5660 'params': {
5661 'skip_download': True,
5662 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5663 },
7c219ea6 5664 }, {
5665 'note': 'non-standard redirect to regional channel',
5666 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5667 'only_matching': True
61d3665d 5668 }, {
5669 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5670 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5671 'info_dict': {
5672 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5673 'modified_date': '20220407',
5674 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5675 'tags': [],
5676 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5677 'uploader': 'pukkandan',
5678 'availability': 'unlisted',
5679 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5680 'channel': 'pukkandan',
5681 'description': 'Test for collaborative playlist',
5682 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5683 'view_count': int,
61d3665d 5684 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5685 },
5686 'playlist_mincount': 2
c26f9b99 5687 }, {
5688 'note': 'translated tab name',
5689 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5690 'info_dict': {
5691 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5692 'tags': [],
5693 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5694 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 5695 'description': 'test description',
c26f9b99 5696 'title': 'cole-dlp-test-acc - 再生リスト',
5697 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5698 'uploader': 'cole-dlp-test-acc',
5699 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5700 'channel': 'cole-dlp-test-acc',
6141346d 5701 'channel_follower_count': int,
c26f9b99 5702 },
5703 'playlist_mincount': 1,
5704 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5705 'expected_warnings': ['Preferring "ja"'],
5706 }, {
5707 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5708 'note': 'preferred lang set with playlist with translated video titles',
5709 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5710 'info_dict': {
5711 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5712 'tags': [],
5713 'view_count': int,
5714 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5715 'uploader': 'cole-dlp-test-acc',
5716 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5717 'channel': 'cole-dlp-test-acc',
5718 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5719 'description': 'test',
5720 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5721 'title': 'dlp test playlist',
5722 'availability': 'public',
5723 },
5724 'playlist_mincount': 1,
5725 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5726 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 5727 }, {
5728 # shorts audio pivot for 2GtVksBMYFM.
5729 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5730 'info_dict': {
5731 'id': 'sfv_audio_pivot',
5732 'title': 'sfv_audio_pivot',
5733 'tags': [],
5734 },
5735 'playlist_mincount': 50,
5736
86973308
M
5737 }, {
5738 # Channel with a real live tab (not to be mistaken with streams tab)
5739 # Do not treat like it should redirect to live stream
5740 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5741 'info_dict': {
5742 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5743 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5744 'tags': [],
5745 },
5746 'playlist_mincount': 20,
5747 }, {
5748 # Tab name is not the same as tab id
5749 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5750 'info_dict': {
5751 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5752 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5753 'tags': [],
5754 },
5755 'playlist_mincount': 8,
5756 }, {
5757 # Home tab id is literally home. Not to get mistaken with featured
5758 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5759 'info_dict': {
5760 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5761 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5762 'tags': [],
5763 },
5764 'playlist_mincount': 8,
5765 }, {
5766 # Should get three playlists for videos, shorts and streams tabs
5767 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5768 'info_dict': {
5769 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 5770 'title': 'Polka Ch. 尾丸ポルカ',
5771 'channel_follower_count': int,
5772 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5773 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5774 'uploader': 'Polka Ch. 尾丸ポルカ',
5775 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5776 'channel': 'Polka Ch. 尾丸ポルカ',
5777 'tags': 'count:35',
5778 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5779 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
86973308
M
5780 },
5781 'playlist_count': 3,
5782 }, {
5783 # Shorts tab with channel with handle
5784 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5785 'info_dict': {
5786 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5787 'title': 'Not Just Bikes - Shorts',
5788 'tags': 'count:12',
5789 'uploader': 'Not Just Bikes',
5790 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5791 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5792 'channel_follower_count': int,
5793 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5794 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5795 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5796 'channel': 'Not Just Bikes',
5797 },
5798 'playlist_mincount': 10,
5799 }, {
5800 # Streams tab
5801 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5802 'info_dict': {
5803 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5804 'title': '中村悠一 - Live',
5805 'tags': 'count:7',
5806 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5807 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5808 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5809 'channel': '中村悠一',
5810 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5811 'channel_follower_count': int,
5812 'uploader': '中村悠一',
5813 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5814 },
5815 'playlist_mincount': 60,
5816 }, {
5817 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5818 # See test_youtube_lists
5819 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5820 'only_matching': True,
5821 }, {
5822 # No uploads and no UCID given. Should fail with no uploads error
5823 # See test_youtube_lists
5824 'url': 'https://www.youtube.com/news',
5825 'only_matching': True
5826 }, {
5827 # No videos tab but has a shorts tab
5828 'url': 'https://www.youtube.com/c/TKFShorts',
5829 'info_dict': {
5830 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5831 'title': 'Shorts Break - Shorts',
5832 'tags': 'count:32',
5833 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5834 'channel': 'Shorts Break',
5835 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5836 'uploader': 'Shorts Break',
5837 'channel_follower_count': int,
5838 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5839 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5840 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5841 },
5842 'playlist_mincount': 30,
5843 }, {
5844 # Trending Now Tab. tab id is empty
5845 'url': 'https://www.youtube.com/feed/trending',
5846 'info_dict': {
5847 'id': 'trending',
5848 'title': 'trending - Now',
5849 'tags': [],
5850 },
5851 'playlist_mincount': 30,
5852 }, {
5853 # Trending Gaming Tab. tab id is empty
5854 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5855 'info_dict': {
5856 'id': 'trending',
5857 'title': 'trending - Gaming',
5858 'tags': [],
5859 },
5860 'playlist_mincount': 30,
4dc23a80
M
5861 }, {
5862 # Shorts url result in shorts tab
5863 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
5864 'info_dict': {
5865 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5866 'title': 'cole-dlp-test-acc - Shorts',
5867 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5868 'channel': 'cole-dlp-test-acc',
5869 'channel_follower_count': int,
5870 'description': 'test description',
5871 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5872 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5873 'tags': [],
5874 'uploader': 'cole-dlp-test-acc',
5875 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5876
5877 },
5878 'playlist': [{
5879 'info_dict': {
5880 '_type': 'url',
5881 'ie_key': 'Youtube',
5882 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
5883 'id': 'sSM9J5YH_60',
5884 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5885 'title': 'SHORT short',
5886 'channel': 'cole-dlp-test-acc',
5887 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5888 'view_count': int,
5889 'thumbnails': list,
5890 }
5891 }],
5892 'params': {'extract_flat': True},
5893 }, {
5894 # Live video status should be extracted
5895 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
5896 'info_dict': {
5897 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5898 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
5899 'tags': []
5900 },
5901 'playlist': [{
5902 'info_dict': {
5903 '_type': 'url',
5904 'ie_key': 'Youtube',
5905 'url': 'startswith:https://www.youtube.com/watch?v=',
5906 'id': str,
5907 'title': str,
5908 'live_status': 'is_live',
5909 'channel_id': str,
5910 'channel_url': str,
5911 'concurrent_view_count': int,
5912 'channel': str,
5913 }
5914 }],
5915 'params': {'extract_flat': True},
5916 'playlist_mincount': 1
a6213a49 5917 }]
5918
5919 @classmethod
5920 def suitable(cls, url):
86e5f3ed 5921 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5922
86973308
M
5923 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
5924
5925 def _get_url_mobj(self, url):
5926 mobj = self._URL_RE.match(url).groupdict()
5927 mobj.update((k, '') for k, v in mobj.items() if v is None)
5928 return mobj
5929
5930 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
5931 tab_name = (tab.get('title') or '').lower()
5932 tab_url = urljoin(base_url, traverse_obj(
5933 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5934
bd7e919a 5935 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
5936 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 5937 if tab_id:
bd7e919a 5938 return {
5939 'TAB_ID_SPONSORSHIPS': 'membership',
5940 }.get(tab_id, tab_id), tab_name
86973308
M
5941
5942 # Fallback to tab name if we cannot get the tab id.
5943 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
5944 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 5945 if tab_name:
5946 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
5947 return {
5948 'home': 'featured',
5949 'live': 'streams',
5950 }.get(tab_name, tab_name), tab_name
5951
5952 def _has_tab(self, tabs, tab_id):
5953 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 5954
182bda88 5955 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5956 def _real_extract(self, url, smuggled_data):
cd7c66cf 5957 item_id = self._match_id(url)
14f25df2 5958 url = urllib.parse.urlunparse(
5959 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5960 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5961
86973308
M
5962 mobj = self._get_url_mobj(url)
5963 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 5964 if is_channel and smuggled_data.get('is_music_url'):
5965 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5966 return self.url_result(
5967 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
5968 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5969 mdata = self._extract_tab_endpoint(
5970 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5971 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5972 get_all=False, expected_type=str)
5973 if not murl:
5974 raise ExtractorError('Failed to resolve album to playlist')
5975 return self.url_result(murl, YoutubeTabIE)
5976 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5977 return self.url_result(
5978 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
5979
5980 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 5981 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 5982 url = f'{pre}/videos{post}'
cd7c66cf 5983
5984 # Handle both video/playlist URLs
201c1459 5985 qs = parse_qs(url)
bd7e919a 5986 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 5987 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5988 if not playlist_id:
fe03a6cd 5989 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 5990 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 5991 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5992 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 5993 return self.url_result(
5994 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 5995
86973308
M
5996 if not self._yes_playlist(playlist_id, video_id):
5997 return self.url_result(
5998 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 5999
bd7e919a 6000 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6001
7c219ea6 6002 # YouTube may provide a non-standard redirect to the regional channel
6003 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6004 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6005 redirect_url = traverse_obj(
6006 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6007 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6008 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6009 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6010 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6011
bd7e919a 6012 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6013 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6014 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6015 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6016 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6017
6018 if not original_tab_id and selected_tab_name:
bd7e919a 6019 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6020 'To download only the videos in a specific tab, pass the tab\'s URL')
6021 if self._has_tab(tabs, 'streams'):
bd7e919a 6022 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6023 if self._has_tab(tabs, 'shorts'):
bd7e919a 6024 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6025 # XXX: Members-only tab should also be extracted
6026
bd7e919a 6027 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6028 # Channel does not have streams, shorts or videos tabs
6029 if item_id[:2] != 'UC':
6030 raise ExtractorError('This channel has no uploads', expected=True)
6031
6032 # Topic channels don't have /videos. Use the equivalent playlist instead
6033 pl_id = f'UU{item_id[2:]}'
6034 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6035 try:
6036 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6037 except ExtractorError:
6038 raise ExtractorError('This channel has no uploads', expected=True)
64f36541 6039 else:
86973308
M
6040 item_id, url = pl_id, pl_url
6041 self.to_screen(
6042 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6043
bd7e919a 6044 elif extra_tabs and selected_tab_id != 'videos':
86973308 6045 # When there are shorts/live tabs but not videos tab
bd7e919a 6046 url, data = f'{pre}{post}', None
86973308
M
6047
6048 elif (original_tab_id or 'videos') != selected_tab_id:
6049 if original_tab_id == 'live':
6050 # Live tab should have redirected to the video
6051 # Except in the case the channel has an actual live tab
6052 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6053 raise UserNotLive(video_id=item_id)
86973308
M
6054 elif selected_tab_name:
6055 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6056
6057 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6058 url = f'{pre}{post}'
18db7548 6059
358de58c 6060 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6061 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6062 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6063 self._extract_and_report_alerts(data, only_once=True)
86973308 6064
bd7e919a 6065 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6066 if tabs:
bd7e919a 6067 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6068 entries[0].update({
86973308
M
6069 'extractor_key': YoutubeTabIE.ie_key(),
6070 'extractor': YoutubeTabIE.IE_NAME,
6071 'webpage_url': url,
6072 })
bd7e919a 6073 if self.get_param('playlist_items') == '0':
6074 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6075 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6076 entries.extend(map(self._real_extract, extra_tabs))
6077
6078 if len(entries) == 1:
6079 return entries[0]
6080 elif entries:
6081 metadata = self._extract_metadata_from_tabs(item_id, data)
6082 uploads_url = 'the Uploads (UU) playlist URL'
6083 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6084 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6085 self.to_screen(
6086 'Downloading as multiple playlists, separated by tabs. '
6087 f'To download as a single playlist instead, pass {uploads_url}')
6088 return self.playlist_result(entries, item_id, **metadata)
6089
6090 # Inline playlist
37e57a9f 6091 playlist = traverse_obj(
6092 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6093 if playlist:
ac56cf38 6094 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6095
37e57a9f 6096 video_id = traverse_obj(
6097 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6098 if video_id:
bd7e919a 6099 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6100 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6101 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6102
8bdd16b4 6103 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6104
c5e8d7af 6105
8bdd16b4 6106class YoutubePlaylistIE(InfoExtractor):
96565c7e 6107 IE_DESC = 'YouTube playlists'
8bdd16b4 6108 _VALID_URL = r'''(?x)(?:
6109 (?:https?://)?
6110 (?:\w+\.)?
6111 (?:
6112 (?:
6113 youtube(?:kids)?\.com|
d9190e44 6114 %(invidious)s
8bdd16b4 6115 )
6116 /.*?\?.*?\blist=
6117 )?
6118 (?P<id>%(playlist_id)s)
d9190e44
RH
6119 )''' % {
6120 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6121 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6122 }
8bdd16b4 6123 IE_NAME = 'youtube:playlist'
cdc628a4 6124 _TESTS = [{
8bdd16b4 6125 'note': 'issue #673',
6126 'url': 'PLBB231211A4F62143',
cdc628a4 6127 'info_dict': {
8bdd16b4 6128 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6129 'id': 'PLBB231211A4F62143',
976ae3ea 6130 'uploader': 'Wickman',
8bdd16b4 6131 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 6132 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6133 'view_count': int,
86973308 6134 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
976ae3ea 6135 'modified_date': r're:\d{8}',
6136 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6137 'channel': 'Wickman',
6138 'tags': [],
86973308
M
6139 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6140 'availability': 'public',
8bdd16b4 6141 },
6142 'playlist_mincount': 29,
6143 }, {
6144 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6145 'info_dict': {
6146 'title': 'YDL_safe_search',
6147 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6148 },
6149 'playlist_count': 2,
6150 'skip': 'This playlist is private',
9558dcec 6151 }, {
8bdd16b4 6152 'note': 'embedded',
6153 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6154 'playlist_count': 4,
9558dcec 6155 'info_dict': {
8bdd16b4 6156 'title': 'JODA15',
6157 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6158 'uploader': 'milan',
6159 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 6160 'description': '',
6161 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6162 'tags': [],
6163 'modified_date': '20140919',
6164 'view_count': int,
6165 'channel': 'milan',
6166 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6167 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
c26f9b99 6168 'availability': 'public',
976ae3ea 6169 },
86973308 6170 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
cdc628a4 6171 }, {
8bdd16b4 6172 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6173 'playlist_mincount': 455,
8bdd16b4 6174 'info_dict': {
6175 'title': '2018 Chinese New Singles (11/6 updated)',
6176 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6177 'uploader': 'LBK',
6178 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 6179 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6180 'channel': 'LBK',
6181 'view_count': int,
6182 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6183 'tags': [],
6184 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6185 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6186 'modified_date': r're:\d{8}',
c26f9b99 6187 'availability': 'public',
976ae3ea 6188 },
6189 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6190 }, {
29f7c58a 6191 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6192 'only_matching': True,
6193 }, {
6194 # music album playlist
6195 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6196 'only_matching': True,
6197 }]
6198
6199 @classmethod
6200 def suitable(cls, url):
201c1459 6201 if YoutubeTabIE.suitable(url):
6202 return False
49a57e70 6203 from ..utils import parse_qs
201c1459 6204 qs = parse_qs(url)
6205 if qs.get('v', [None])[0]:
6206 return False
86e5f3ed 6207 return super().suitable(url)
29f7c58a 6208
6209 def _real_extract(self, url):
6210 playlist_id = self._match_id(url)
46953e7e 6211 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6212 url = update_url_query(
6213 'https://www.youtube.com/playlist',
6214 parse_qs(url) or {'list': playlist_id})
6215 if is_music_url:
6216 url = smuggle_url(url, {'is_music_url': True})
6217 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6218
6219
6220class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6221 IE_DESC = 'youtu.be'
29f7c58a 6222 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6223 _TESTS = [{
8bdd16b4 6224 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6225 'info_dict': {
6226 'id': 'yeWKywCrFtk',
6227 'ext': 'mp4',
6228 'title': 'Small Scale Baler and Braiding Rugs',
6229 'uploader': 'Backus-Page House Museum',
6230 'uploader_id': 'backuspagemuseum',
6231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6232 'upload_date': '20161008',
6233 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6234 'categories': ['Nonprofits & Activism'],
6235 'tags': list,
6236 'like_count': int,
976ae3ea 6237 'age_limit': 0,
6238 'playable_in_embed': True,
6239 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6240 'channel': 'Backus-Page House Museum',
6241 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6242 'live_status': 'not_live',
6243 'view_count': int,
6244 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6245 'availability': 'public',
6246 'duration': 59,
12a1b225
A
6247 'comment_count': int,
6248 'channel_follower_count': int
8bdd16b4 6249 },
6250 'params': {
6251 'noplaylist': True,
6252 'skip_download': True,
6253 },
39e7107d 6254 }, {
8bdd16b4 6255 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6256 'only_matching': True,
cdc628a4
PH
6257 }]
6258
8bdd16b4 6259 def _real_extract(self, url):
5ad28e7f 6260 mobj = self._match_valid_url(url)
29f7c58a 6261 video_id = mobj.group('id')
6262 playlist_id = mobj.group('playlist_id')
8bdd16b4 6263 return self.url_result(
29f7c58a 6264 update_url_query('https://www.youtube.com/watch', {
6265 'v': video_id,
6266 'list': playlist_id,
6267 'feature': 'youtu.be',
6268 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6269
6270
b6ce9bb0 6271class YoutubeLivestreamEmbedIE(InfoExtractor):
6272 IE_DESC = 'YouTube livestream embeds'
6273 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6274 _TESTS = [{
6275 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6276 'only_matching': True,
6277 }]
6278
6279 def _real_extract(self, url):
6280 channel_id = self._match_id(url)
6281 return self.url_result(
6282 f'https://www.youtube.com/channel/{channel_id}/live',
6283 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6284
6285
8bdd16b4 6286class YoutubeYtUserIE(InfoExtractor):
96565c7e 6287 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6288 IE_NAME = 'youtube:user'
8bdd16b4 6289 _VALID_URL = r'ytuser:(?P<id>.+)'
6290 _TESTS = [{
6291 'url': 'ytuser:phihag',
6292 'only_matching': True,
6293 }]
6294
6295 def _real_extract(self, url):
6296 user_id = self._match_id(url)
08270da5 6297 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6298
b05654f0 6299
3d3dddc9 6300class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6301 IE_NAME = 'youtube:favorites'
96565c7e 6302 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6303 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6304 _LOGIN_REQUIRED = True
6305 _TESTS = [{
6306 'url': ':ytfav',
6307 'only_matching': True,
6308 }, {
6309 'url': ':ytfavorites',
6310 'only_matching': True,
6311 }]
6312
6313 def _real_extract(self, url):
6314 return self.url_result(
6315 'https://www.youtube.com/playlist?list=LL',
6316 ie=YoutubeTabIE.ie_key())
6317
6318
ca5300c7 6319class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6320 IE_NAME = 'youtube:notif'
6321 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6322 _VALID_URL = r':ytnotif(?:ication)?s?'
6323 _LOGIN_REQUIRED = True
6324 _TESTS = [{
6325 'url': ':ytnotif',
6326 'only_matching': True,
6327 }, {
6328 'url': ':ytnotifications',
6329 'only_matching': True,
6330 }]
6331
6332 def _extract_notification_menu(self, response, continuation_list):
6333 notification_list = traverse_obj(
6334 response,
6335 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6336 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6337 expected_type=list) or []
6338 continuation_list[0] = None
6339 for item in notification_list:
6340 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6341 if entry:
6342 yield entry
6343 continuation = item.get('continuationItemRenderer')
6344 if continuation:
6345 continuation_list[0] = continuation
6346
6347 def _extract_notification_renderer(self, notification):
6348 video_id = traverse_obj(
6349 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6350 url = f'https://www.youtube.com/watch?v={video_id}'
6351 channel_id = None
6352 if not video_id:
6353 browse_ep = traverse_obj(
6354 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6355 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6356 post_id = self._search_regex(
6357 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6358 'post id', default=None)
6359 if not channel_id or not post_id:
6360 return
6361 # The direct /post url redirects to this in the browser
6362 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6363
6364 channel = traverse_obj(
6365 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6366 expected_type=str)
c7a7baaa 6367 notification_title = self._get_text(notification, 'shortMessage')
6368 if notification_title:
6369 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6370 # TODO: handle recommended videos
ca5300c7 6371 title = self._search_regex(
c7a7baaa 6372 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6373 'video title', default=None)
5225df50 6374 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6375 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6376 else None)
ca5300c7 6377 return {
6378 '_type': 'url',
6379 'url': url,
6380 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6381 'video_id': video_id,
6382 'title': title,
6383 'channel_id': channel_id,
6384 'channel': channel,
6385 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6386 'timestamp': timestamp,
ca5300c7 6387 }
6388
6389 def _notification_menu_entries(self, ytcfg):
6390 continuation_list = [None]
6391 response = None
6392 for page in itertools.count(1):
6393 ctoken = traverse_obj(
6394 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6395 response = self._extract_response(
6396 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6397 ep='notification/get_notification_menu', check_get_keys='actions',
6398 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6399 yield from self._extract_notification_menu(response, continuation_list)
6400 if not continuation_list[0]:
6401 break
6402
6403 def _real_extract(self, url):
6404 display_id = 'notifications'
6405 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6406 self._report_playlist_authcheck(ytcfg)
6407 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6408
6409
a6213a49 6410class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6411 IE_DESC = 'YouTube search'
78caa52a 6412 IE_NAME = 'youtube:search'
b05654f0 6413 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6414 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6415 _TESTS = [{
6416 'url': 'ytsearch5:youtube-dl test video',
6417 'playlist_count': 5,
6418 'info_dict': {
6419 'id': 'youtube-dl test video',
6420 'title': 'youtube-dl test video',
6421 }
6422 }]
b05654f0 6423
a61fd4cf 6424
5f7cb91a 6425class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6426 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6427 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6428 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6429 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6430 _TESTS = [{
6431 'url': 'ytsearchdate5:youtube-dl test video',
6432 'playlist_count': 5,
6433 'info_dict': {
6434 'id': 'youtube-dl test video',
6435 'title': 'youtube-dl test video',
6436 }
6437 }]
75dff0ee 6438
c9ae7b95 6439
a6213a49 6440class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6441 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6442 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6443 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6444 _TESTS = [{
6445 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6446 'playlist_mincount': 5,
6447 'info_dict': {
11f9be09 6448 'id': 'youtube-dl test video',
3462ffa8 6449 'title': 'youtube-dl test video',
6450 }
a61fd4cf 6451 }, {
6452 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6453 'playlist_mincount': 5,
6454 'info_dict': {
6455 'id': 'python',
6456 'title': 'python',
6457 }
ad210f4f 6458 }, {
6459 'url': 'https://www.youtube.com/results?search_query=%23cats',
6460 'playlist_mincount': 1,
6461 'info_dict': {
6462 'id': '#cats',
6463 'title': '#cats',
12a1b225
A
6464 # The test suite does not have support for nested playlists
6465 # 'entries': [{
6466 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6467 # 'title': '#cats',
6468 # }],
ad210f4f 6469 },
3462ffa8 6470 }, {
6471 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6472 'only_matching': True,
6473 }]
6474
6475 def _real_extract(self, url):
4dfbf869 6476 qs = parse_qs(url)
386e1dd9 6477 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6478 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6479
6480
16aa9ea4 6481class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6482 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6483 IE_NAME = 'youtube:music:search_url'
6484 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6485 _TESTS = [{
6486 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6487 'playlist_count': 16,
6488 'info_dict': {
6489 'id': 'royalty free music',
6490 'title': 'royalty free music',
6491 }
6492 }, {
6493 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6494 'playlist_mincount': 30,
6495 'info_dict': {
6496 'id': 'royalty free music - songs',
6497 'title': 'royalty free music - songs',
6498 },
6499 'params': {'extract_flat': 'in_playlist'}
6500 }, {
6501 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6502 'playlist_mincount': 30,
6503 'info_dict': {
6504 'id': 'royalty free music - community playlists',
6505 'title': 'royalty free music - community playlists',
6506 },
6507 'params': {'extract_flat': 'in_playlist'}
6508 }]
6509
6510 _SECTIONS = {
6511 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6512 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6513 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6514 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6515 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6516 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6517 }
6518
6519 def _real_extract(self, url):
6520 qs = parse_qs(url)
6521 query = (qs.get('search_query') or qs.get('q'))[0]
6522 params = qs.get('sp', (None,))[0]
6523 if params:
6524 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6525 else:
ac668111 6526 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6527 params = self._SECTIONS.get(section)
6528 if not params:
6529 section = None
6530 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6531 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6532
6533
182bda88 6534class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6535 """
25f14e9f 6536 Base class for feed extractors
82d02080 6537 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6538 """
b2e8bc1b 6539 _LOGIN_REQUIRED = True
82d02080 6540 _FEED_NAME = 'feeds'
a25bca9f 6541
6542 def _real_initialize(self):
6543 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6544
82d02080 6545 @classproperty
d7ae0639 6546 def IE_NAME(self):
82d02080 6547 return f'youtube:{self._FEED_NAME}'
04cc9617 6548
3853309f 6549 def _real_extract(self, url):
3d3dddc9 6550 return self.url_result(
182bda88 6551 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6552
6553
ef2f3c7f 6554class YoutubeWatchLaterIE(InfoExtractor):
6555 IE_NAME = 'youtube:watchlater'
96565c7e 6556 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6557 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6558 _TESTS = [{
8bdd16b4 6559 'url': ':ytwatchlater',
bc7a9cd8
S
6560 'only_matching': True,
6561 }]
25f14e9f
S
6562
6563 def _real_extract(self, url):
ef2f3c7f 6564 return self.url_result(
6565 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6566
6567
25f14e9f 6568class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6569 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6570 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6571 _FEED_NAME = 'recommended'
45db527f 6572 _LOGIN_REQUIRED = False
3d3dddc9 6573 _TESTS = [{
6574 'url': ':ytrec',
6575 'only_matching': True,
6576 }, {
6577 'url': ':ytrecommended',
6578 'only_matching': True,
6579 }, {
6580 'url': 'https://youtube.com',
6581 'only_matching': True,
6582 }]
1ed5b5c9 6583
1ed5b5c9 6584
25f14e9f 6585class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6586 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6587 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6588 _FEED_NAME = 'subscriptions'
3d3dddc9 6589 _TESTS = [{
6590 'url': ':ytsubs',
6591 'only_matching': True,
6592 }, {
6593 'url': ':ytsubscriptions',
6594 'only_matching': True,
6595 }]
1ed5b5c9 6596
1ed5b5c9 6597
25f14e9f 6598class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6599 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6600 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6601 _FEED_NAME = 'history'
3d3dddc9 6602 _TESTS = [{
6603 'url': ':ythistory',
6604 'only_matching': True,
6605 }]
1ed5b5c9
JMF
6606
6607
6e634cbe 6608class YoutubeStoriesIE(InfoExtractor):
6609 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6610 IE_NAME = 'youtube:stories'
6611 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6612 _TESTS = [{
6613 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6614 'only_matching': True,
6615 }]
6616
6617 def _real_extract(self, url):
6618 playlist_id = f'RLTD{self._match_id(url)}'
6619 return self.url_result(
50ac0e54 6620 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6621 ie=YoutubeTabIE, video_id=playlist_id)
6622
6623
80eb0bd9 6624class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 6625 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 6626 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 6627 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 6628 _TESTS = [{
1dd18a88 6629 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 6630 'only_matching': True,
6631 }]
6632
6633 @staticmethod
6634 def _generate_audio_pivot_params(video_id):
6635 """
6636 Generates sfv_audio_pivot browse params for this video id
6637 """
6638 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6639 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6640
6641 def _real_extract(self, url):
6642 video_id = self._match_id(url)
6643 return self.url_result(
6644 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6645 ie=YoutubeTabIE)
6646
6647
15870e90
PH
6648class YoutubeTruncatedURLIE(InfoExtractor):
6649 IE_NAME = 'youtube:truncated_url'
6650 IE_DESC = False # Do not list
975d35db 6651 _VALID_URL = r'''(?x)
b95aab84
PH
6652 (?:https?://)?
6653 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6654 (?:watch\?(?:
c4808c60 6655 feature=[a-z_]+|
b95aab84
PH
6656 annotation_id=annotation_[^&]+|
6657 x-yt-cl=[0-9]+|
c1708b89 6658 hl=[^&]*|
287be8c6 6659 t=[0-9]+
b95aab84
PH
6660 )?
6661 |
6662 attribution_link\?a=[^&]+
6663 )
6664 $
975d35db 6665 '''
15870e90 6666
c4808c60 6667 _TESTS = [{
2d3d2997 6668 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6669 'only_matching': True,
dc2fc736 6670 }, {
2d3d2997 6671 'url': 'https://www.youtube.com/watch?',
dc2fc736 6672 'only_matching': True,
b95aab84
PH
6673 }, {
6674 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6675 'only_matching': True,
6676 }, {
6677 'url': 'https://www.youtube.com/watch?feature=foo',
6678 'only_matching': True,
c1708b89
PH
6679 }, {
6680 'url': 'https://www.youtube.com/watch?hl=en-GB',
6681 'only_matching': True,
287be8c6
PH
6682 }, {
6683 'url': 'https://www.youtube.com/watch?t=2372',
6684 'only_matching': True,
c4808c60
PH
6685 }]
6686
15870e90
PH
6687 def _real_extract(self, url):
6688 raise ExtractorError(
78caa52a
PH
6689 'Did you forget to quote the URL? Remember that & is a meta '
6690 'character in most shells, so you want to put the URL in quotes, '
3867038a 6691 'like youtube-dl '
2d3d2997 6692 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6693 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6694 expected=True)
772fd5cc
PH
6695
6696
471d0367 6697class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6698 IE_NAME = 'youtube:clip'
471d0367 6699 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6700 _TESTS = [{
6701 # FIXME: Other metadata should be extracted from the clip, not from the base video
6702 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6703 'info_dict': {
6704 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6705 'ext': 'mp4',
6706 'section_start': 29.0,
6707 'section_end': 39.7,
6708 'duration': 10.7,
12a1b225
A
6709 'age_limit': 0,
6710 'availability': 'public',
6711 'categories': ['Gaming'],
6712 'channel': 'Scott The Woz',
6713 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6714 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6715 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6716 'like_count': int,
6717 'playable_in_embed': True,
6718 'tags': 'count:17',
6719 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6720 'title': 'Mobile Games on Console - Scott The Woz',
6721 'upload_date': '20210920',
6722 'uploader': 'Scott The Woz',
6723 'uploader_id': 'scottthewoz',
6724 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6725 'view_count': int,
6726 'live_status': 'not_live',
6727 'channel_follower_count': int
471d0367 6728 }
6729 }]
3cd786db 6730
6731 def _real_extract(self, url):
471d0367 6732 clip_id = self._match_id(url)
6733 _, data = self._extract_webpage(url, clip_id)
6734
6735 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6736 if not video_id:
6737 raise ExtractorError('Unable to find video ID')
6738
6739 clip_data = traverse_obj(data, (
6740 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6741 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6742 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6743 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6744
6745 return {
6746 '_type': 'url_transparent',
6747 'url': f'https://www.youtube.com/watch?v={video_id}',
6748 'ie_key': YoutubeIE.ie_key(),
6749 'id': clip_id,
6750 'section_start': int(clip_data['startTimeMs']) / 1000,
6751 'section_end': int(clip_data['endTimeMs']) / 1000,
6752 }
3cd786db 6753
6754
772fd5cc
PH
6755class YoutubeTruncatedIDIE(InfoExtractor):
6756 IE_NAME = 'youtube:truncated_id'
6757 IE_DESC = False # Do not list
b95aab84 6758 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6759
6760 _TESTS = [{
6761 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6762 'only_matching': True,
6763 }]
6764
6765 def _real_extract(self, url):
6766 video_id = self._match_id(url)
6767 raise ExtractorError(
86e5f3ed 6768 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6769 expected=True)