]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/crunchyroll] Add intro chapter (#6023)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
fe93e2c4 5import datetime
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
46383212 14import sys
f8271158 15import threading
8a784c74 16import time
e0df6211 17import traceback
14f25df2 18import urllib.error
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
4bb4a188 25from ..utils import (
f8271158 26 NO_DEFAULT,
27 ExtractorError,
4d37720a 28 LazyList,
693f0600 29 UserNotLive,
720c3099 30 bug_reports_message,
82d02080 31 classproperty,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
7a32c70d 35 filter_dict,
2d30521a 36 float_or_none,
11f9be09 37 format_field,
ff91cf74 38 get_first,
dd27fd17 39 int_or_none,
641ad5d8 40 is_html,
34921b43 41 join_nonempty,
48416bc4 42 js_to_json,
94278f72 43 mimetype2ext,
9c0d7f49 44 network_exceptions,
11f9be09 45 orderedSet,
6310acf5 46 parse_codecs,
49bd8c66 47 parse_count,
7c80519c 48 parse_duration,
7ea65411 49 parse_iso8601,
4dfbf869 50 parse_qs,
dca3ff4a 51 qualities,
3995d37d 52 remove_start,
cf7e015f 53 smuggle_url,
dbdaaa23 54 str_or_none,
c93d53f5 55 str_to_int,
f3aa3c3f 56 strftime_or_none,
7c365c21 57 traverse_obj,
556dbe7f 58 try_get,
c5e8d7af
PH
59 unescapeHTML,
60 unified_strdate,
f0d785d3 61 unified_timestamp,
cf7e015f 62 unsmuggle_url,
8bdd16b4 63 update_url_query,
21c340b8 64 url_or_none,
fe93e2c4 65 urljoin,
7c365c21 66 variadic,
c5e8d7af
PH
67)
68
962ffcf8 69# any clients starting with _ cannot be explicitly requested by the user
000c15a4 70INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
a0c830f4 76 'clientVersion': '2.20220801.00.00',
000c15a4 77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 86 'clientVersion': '1.20220731.00.00',
000c15a4 87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
a0c830f4 97 'clientVersion': '1.20220727.01.00',
000c15a4 98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
e7e94f2a 102 'web_creator': {
18c7683d 103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
a0c830f4 107 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
000c15a4 112 'android': {
18c7683d 113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
50ac0e54 117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 123 'REQUIRE_JS_PLAYER': False
000c15a4 124 },
125 'android_embedded': {
18c7683d 126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
a0c830f4 143 'clientVersion': '5.16.51',
50ac0e54 144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 149 'REQUIRE_JS_PLAYER': False
000c15a4 150 },
e7e94f2a 151 'android_creator': {
18c7683d 152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
50ac0e54 156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
18c7683d 164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 166 'ios': {
18c7683d 167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
224b5a35 171 'clientVersion': '17.33.2',
18c7683d 172 'deviceModel': 'iPhone14,3',
224b5a35 173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 174 }
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_embedded': {
000c15a4 180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 183 'clientVersion': '17.33.2',
18c7683d 184 'deviceModel': 'iPhone14,3',
224b5a35 185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 186 },
187 },
b6de707d 188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
000c15a4 190 },
191 'ios_music': {
18c7683d 192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
224b5a35
SF
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 199 },
200 },
b6de707d 201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
000c15a4 203 },
e7e94f2a
D
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
224b5a35
SF
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
211 },
212 },
b6de707d 213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
e7e94f2a 215 },
3619f78d 216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 218 'mweb': {
18c7683d 219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
a0c830f4 223 'clientVersion': '2.20220801.00.00',
000c15a4 224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
000c15a4 240}
241
242
e7870111
D
243def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
000c15a4 251def build_innertube_clients():
2e4cacd0 252 THIRD_PARTY = {
e7870111 253 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 254 }
e7870111 255 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 256 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 257
258 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 259 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 260 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 261 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 262 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 263
e7870111 264 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 265 ytcfg['priority'] = 10 * priority(base_client)
266
e48b3875 267 if not variant:
e7870111
D
268 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
269 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
270 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
271 embedscreen['priority'] -= 3
272 elif variant == 'embedded':
e48b3875 273 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 274 ytcfg['priority'] -= 2
e48b3875 275 else:
000c15a4 276 ytcfg['priority'] -= 3
277
278
279build_innertube_clients()
280
281
c26f9b99 282class BadgeType(enum.Enum):
283 AVAILABILITY_UNLISTED = enum.auto()
284 AVAILABILITY_PRIVATE = enum.auto()
285 AVAILABILITY_PUBLIC = enum.auto()
286 AVAILABILITY_PREMIUM = enum.auto()
287 AVAILABILITY_SUBSCRIPTION = enum.auto()
288 LIVE_NOW = enum.auto()
289
290
de7f3446 291class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 292 """Provide base functions for Youtube extractors"""
e00eb564 293
3462ffa8 294 _RESERVED_NAMES = (
08e29b9f 295 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 296 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 297 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 298 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 299
3619f78d 300 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
301
52efa4b3 302 # _NETRC_MACHINE = 'youtube'
3619f78d 303
b2e8bc1b
JMF
304 # If True it will raise an error if no login info is provided
305 _LOGIN_REQUIRED = False
306
d9190e44
RH
307 _INVIDIOUS_SITES = (
308 # invidious-redirect websites
309 r'(?:www\.)?redirect\.invidious\.io',
310 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 311 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
312 r'(?:www\.)?invidious\.pussthecat\.org',
313 r'(?:www\.)?invidious\.zee\.li',
314 r'(?:www\.)?invidious\.ethibox\.fr',
315 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
316 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
317 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
318 # youtube-dl invidious instances list
319 r'(?:(?:www|no)\.)?invidiou\.sh',
320 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
321 r'(?:www\.)?invidious\.kabi\.tk',
322 r'(?:www\.)?invidious\.mastodon\.host',
323 r'(?:www\.)?invidious\.zapashcanon\.fr',
324 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
325 r'(?:www\.)?invidious\.tinfoil-hat\.net',
326 r'(?:www\.)?invidious\.himiko\.cloud',
327 r'(?:www\.)?invidious\.reallyancient\.tech',
328 r'(?:www\.)?invidious\.tube',
329 r'(?:www\.)?invidiou\.site',
330 r'(?:www\.)?invidious\.site',
331 r'(?:www\.)?invidious\.xyz',
332 r'(?:www\.)?invidious\.nixnet\.xyz',
333 r'(?:www\.)?invidious\.048596\.xyz',
334 r'(?:www\.)?invidious\.drycat\.fr',
335 r'(?:www\.)?inv\.skyn3t\.in',
336 r'(?:www\.)?tube\.poal\.co',
337 r'(?:www\.)?tube\.connect\.cafe',
338 r'(?:www\.)?vid\.wxzm\.sx',
339 r'(?:www\.)?vid\.mint\.lgbt',
340 r'(?:www\.)?vid\.puffyan\.us',
341 r'(?:www\.)?yewtu\.be',
342 r'(?:www\.)?yt\.elukerio\.org',
343 r'(?:www\.)?yt\.lelux\.fi',
344 r'(?:www\.)?invidious\.ggc-project\.de',
345 r'(?:www\.)?yt\.maisputain\.ovh',
346 r'(?:www\.)?ytprivate\.com',
347 r'(?:www\.)?invidious\.13ad\.de',
348 r'(?:www\.)?invidious\.toot\.koeln',
349 r'(?:www\.)?invidious\.fdn\.fr',
350 r'(?:www\.)?watch\.nettohikari\.com',
351 r'(?:www\.)?invidious\.namazso\.eu',
352 r'(?:www\.)?invidious\.silkky\.cloud',
353 r'(?:www\.)?invidious\.exonip\.de',
354 r'(?:www\.)?invidious\.riverside\.rocks',
355 r'(?:www\.)?invidious\.blamefran\.net',
356 r'(?:www\.)?invidious\.moomoo\.de',
357 r'(?:www\.)?ytb\.trom\.tf',
358 r'(?:www\.)?yt\.cyberhost\.uk',
359 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
360 r'(?:www\.)?qklhadlycap4cnod\.onion',
361 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
362 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
363 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
364 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
365 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
366 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
367 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
368 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
369 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
370 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
371 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
372 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 373 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 374 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 375 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
376 r'(?:www\.)?watch\.whatever\.social',
377 r'(?:www\.)?piped\.garudalinux\.org',
378 r'(?:www\.)?piped\.rivo\.lol',
379 r'(?:www\.)?piped-libre\.kavin\.rocks',
380 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 381 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
382 r'(?:www\.)?il\.ax',
383 r'(?:www\.)?piped\.esmailelbob\.xyz',
384 r'(?:www\.)?piped\.projectsegfau\.lt',
385 r'(?:www\.)?piped\.privacydev\.net',
386 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
387 r'(?:www\.)?piped\.smnz\.de',
388 r'(?:www\.)?piped\.adminforge\.de',
389 r'(?:www\.)?watch\.whatevertinfoil\.de',
390 r'(?:www\.)?piped\.qdi\.fi',
bc87dac7
B
391 r'(?:www\.)?piped\.video',
392 r'(?:www\.)?piped\.aeong\.one',
d9190e44
RH
393 )
394
c26f9b99 395 # extracted from account/account_menu ep
396 # XXX: These are the supported YouTube UI and API languages,
397 # which is slightly different from languages supported for translation in YouTube studio
398 _SUPPORTED_LANG_CODES = [
399 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
400 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
401 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
402 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
403 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
404 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
405 ]
406
a057779d 407 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
408
c26f9b99 409 @functools.cached_property
410 def _preferred_lang(self):
411 """
412 Returns a language code supported by YouTube for the user preferred language.
413 Returns None if no preferred language set.
414 """
415 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
416 if not preferred_lang:
417 return
418 if preferred_lang not in self._SUPPORTED_LANG_CODES:
419 raise ExtractorError(
420 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
421 expected=True)
422 elif preferred_lang != 'en':
423 self.report_warning(
424 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
425 return preferred_lang
426
cce889b9 427 def _initialize_consent(self):
428 cookies = self._get_cookies('https://www.youtube.com/')
429 if cookies.get('__Secure-3PSID'):
430 return
431 consent_id = None
432 consent = cookies.get('CONSENT')
433 if consent:
434 if 'YES' in consent.value:
435 return
436 consent_id = self._search_regex(
437 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
438 if not consent_id:
439 consent_id = random.randint(100, 999)
440 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 441
f3aa3c3f 442 def _initialize_pref(self):
443 cookies = self._get_cookies('https://www.youtube.com/')
444 pref_cookie = cookies.get('PREF')
445 pref = {}
446 if pref_cookie:
447 try:
14f25df2 448 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 449 except ValueError:
450 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 451 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 452 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 453
b2e8bc1b 454 def _real_initialize(self):
f3aa3c3f 455 self._initialize_pref()
cce889b9 456 self._initialize_consent()
a25bca9f 457 self._check_login_required()
458
459 def _check_login_required(self):
24146491 460 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 461 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 462
b7c47b74 463 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
464 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 465
000c15a4 466 def _get_default_ytcfg(self, client='web'):
467 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 468
000c15a4 469 def _get_innertube_host(self, client='web'):
470 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 471
000c15a4 472 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 473 # try_get but with fallback to default ytcfg client values when present
474 _func = lambda y: try_get(y, getter, expected_type)
475 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
476
000c15a4 477 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 478 return self._ytcfg_get_safe(
479 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 480 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 481
000c15a4 482 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 483 return self._ytcfg_get_safe(
484 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 485 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 486
2ae778b8 487 def _select_api_hostname(self, req_api_hostname, default_client=None):
488 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
489 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
490
000c15a4 491 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 493
000c15a4 494 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 495 context = get_first(
496 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 497 # Enforce language and tz for extraction
498 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 499 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 500 return context
501
cf87314d 502 _SAPISID = None
503
109dd3b2 504 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 505 time_now = round(time.time())
cf87314d 506 if self._SAPISID is None:
507 yt_cookies = self._get_cookies('https://www.youtube.com')
508 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
509 # See: https://github.com/yt-dlp/yt-dlp/issues/393
510 sapisid_cookie = dict_get(
511 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
512 if sapisid_cookie and sapisid_cookie.value:
513 self._SAPISID = sapisid_cookie.value
514 self.write_debug('Extracted SAPISID cookie')
515 # SAPISID cookie is required if not already present
516 if not yt_cookies.get('SAPISID'):
517 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
518 self._set_cookie(
519 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
520 else:
521 self._SAPISID = False
522 if not self._SAPISID:
523 return None
1974e99f 524 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
525 sapisidhash = hashlib.sha1(
86e5f3ed 526 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 527 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
528
529 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 530 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 531 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 532
109dd3b2 533 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 534 data.update(query)
11f9be09 535 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 536 real_headers.update({'content-type': 'application/json'})
537 if headers:
538 real_headers.update(headers)
2ae778b8 539 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
540 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 541 return self._download_json(
2ae778b8 542 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 543 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 544 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 545 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 546
65141660 547 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
548 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 549
99e9e001 550 @staticmethod
551 def _extract_session_index(*data):
552 """
553 Index of current account in account list.
554 See: https://github.com/yt-dlp/yt-dlp/pull/519
555 """
556 for ytcfg in data:
557 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
558 if session_index is not None:
559 return session_index
560
561 # Deprecated?
562 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 563 if ytcfg:
14f25df2 564 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
565 if token:
566 return token
99e9e001 567 if webpage:
568 return self._search_regex(
569 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
570 'identity token', default=None, fatal=False)
a1c5d2ca
M
571
572 @staticmethod
fe93e2c4 573 def _extract_account_syncid(*args):
8ea3f7b9 574 """
575 Extract syncId required to download private playlists of secondary channels
fe93e2c4 576 @params response and/or ytcfg
8ea3f7b9 577 """
fe93e2c4 578 for data in args:
579 # ytcfg includes channel_syncid if on secondary channel
14f25df2 580 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 581 if delegated_sid:
582 return delegated_sid
583 sync_ids = (try_get(
584 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 585 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 586 if len(sync_ids) >= 2 and sync_ids[1]:
587 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
588 # and just "user_syncid||" for primary channel. We only want the channel_syncid
589 return sync_ids[0]
a1c5d2ca 590
ac56cf38 591 @staticmethod
592 def _extract_visitor_data(*args):
593 """
594 Extracts visitorData from an API response or ytcfg
595 Appears to be used to track session state
596 """
9222c381 597 return get_first(
6c73052c 598 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 599 expected_type=str)
ac56cf38 600
2762dbb1 601 @functools.cached_property
99e9e001 602 def is_authenticated(self):
603 return bool(self._generate_sapisidhash_header())
604
11f9be09 605 def extract_ytcfg(self, video_id, webpage):
8c54a305 606 if not webpage:
607 return {}
29f7c58a 608 return self._parse_json(
609 self._search_regex(
610 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 611 default='{}'), video_id, fatal=False) or {}
612
11f9be09 613 def generate_api_headers(
99e9e001 614 self, *, ytcfg=None, account_syncid=None, session_index=None,
615 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
616
2ae778b8 617 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 618 headers = {
14f25df2 619 'X-YouTube-Client-Name': str(
11f9be09 620 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
621 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 622 'Origin': origin,
623 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
624 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 625 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
626 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 627 }
628 if session_index is None:
314ee305 629 session_index = self._extract_session_index(ytcfg)
630 if account_syncid or session_index is not None:
631 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 632
109dd3b2 633 auth = self._generate_sapisidhash_header(origin)
f4f751af 634 if auth is not None:
635 headers['Authorization'] = auth
109dd3b2 636 headers['X-Origin'] = origin
7a32c70d 637 return filter_dict(headers)
29f7c58a 638
a25bca9f 639 def _download_ytcfg(self, client, video_id):
640 url = {
641 'web': 'https://www.youtube.com',
642 'web_music': 'https://music.youtube.com',
643 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
644 }.get(client)
645 if not url:
646 return {}
647 webpage = self._download_webpage(
648 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
649 return self.extract_ytcfg(video_id, webpage) or {}
650
2d6659b9 651 @staticmethod
652 def _build_api_continuation_query(continuation, ctp=None):
653 query = {
654 'continuation': continuation
655 }
656 # TODO: Inconsistency with clickTrackingParams.
657 # Currently we have a fixed ctp contained within context (from ytcfg)
658 # and a ctp in root query for continuation.
659 if ctp:
660 query['clickTracking'] = {'clickTrackingParams': ctp}
661 return query
662
2d6659b9 663 @classmethod
664 def _extract_next_continuation_data(cls, renderer):
665 next_continuation = try_get(
666 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
667 lambda x: x['continuation']['reloadContinuationData']), dict)
668 if not next_continuation:
669 return
670 continuation = next_continuation.get('continuation')
671 if not continuation:
672 return
673 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 674 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 675
676 @classmethod
677 def _extract_continuation_ep_data(cls, continuation_ep: dict):
678 if isinstance(continuation_ep, dict):
679 continuation = try_get(
14f25df2 680 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 681 if not continuation:
682 return
683 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 684 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 685
686 @classmethod
687 def _extract_continuation(cls, renderer):
688 next_continuation = cls._extract_next_continuation_data(renderer)
689 if next_continuation:
690 return next_continuation
fe93e2c4 691
7a32c70d 692 return traverse_obj(renderer, (
693 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
694 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
695 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 696
fe93e2c4 697 @classmethod
698 def _extract_alerts(cls, data):
109dd3b2 699 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
700 if not isinstance(alert_dict, dict):
701 continue
702 for alert in alert_dict.values():
703 alert_type = alert.get('type')
704 if not alert_type:
705 continue
052e1350 706 message = cls._get_text(alert, 'text')
109dd3b2 707 if message:
708 yield alert_type, message
709
c0ac49bc 710 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 711 errors, warnings = [], []
109dd3b2 712 for alert_type, alert_message in alerts:
641ad5d8 713 if alert_type.lower() == 'error' and fatal:
109dd3b2 714 errors.append([alert_type, alert_message])
a057779d 715 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 716 warnings.append([alert_type, alert_message])
717
718 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 719 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 720 if errors:
721 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
722
723 def _extract_and_report_alerts(self, data, *args, **kwargs):
724 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
725
47193e02 726 def _extract_badges(self, renderer: dict):
c26f9b99 727 privacy_icon_map = {
728 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
729 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
730 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
731 }
732
733 badge_style_map = {
734 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
735 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
736 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
737 }
738
739 label_map = {
740 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
741 'private': BadgeType.AVAILABILITY_PRIVATE,
742 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
743 'live': BadgeType.LIVE_NOW,
744 'premium': BadgeType.AVAILABILITY_PREMIUM
745 }
746
747 badges = []
6839ae1f 748 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
c26f9b99 749 badge_type = (
750 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
751 or badge_style_map.get(traverse_obj(badge, 'style'))
752 )
753 if badge_type:
754 badges.append({'type': badge_type})
755 continue
756
757 # fallback, won't work in some languages
758 label = traverse_obj(badge, 'label', expected_type=str, default='')
759 for match, label_badge_type in label_map.items():
760 if match in label.lower():
761 badges.append({'type': badge_type})
762 continue
763
47193e02 764 return badges
765
c26f9b99 766 @staticmethod
767 def _has_badge(badges, badge_type):
768 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
769
47193e02 770 @staticmethod
052e1350 771 def _get_text(data, *path_list, max_runs=None):
772 for path in path_list or [None]:
773 if path is None:
774 obj = [data]
775 else:
776 obj = traverse_obj(data, path, default=[])
777 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
778 obj = [obj]
779 for item in obj:
14f25df2 780 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 781 if text:
782 return text
783 runs = try_get(item, lambda x: x['runs'], list) or []
784 if not runs and isinstance(item, list):
785 runs = item
786
787 runs = runs[:min(len(runs), max_runs or len(runs))]
6839ae1f 788 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
052e1350 789 if text:
790 return text
47193e02 791
f0d785d3 792 def _get_count(self, data, *path_list):
793 count_text = self._get_text(data, *path_list) or ''
794 count = parse_count(count_text)
795 if count is None:
796 count = str_to_int(
797 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
798 return count
799
a709d873 800 @staticmethod
801 def _extract_thumbnails(data, *path_list):
802 """
803 Extract thumbnails from thumbnails dict
804 @param path_list: path list to level that contains 'thumbnails' key
805 """
806 thumbnails = []
807 for path in path_list or [()]:
6839ae1f 808 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
a709d873 809 thumbnail_url = url_or_none(thumbnail.get('url'))
810 if not thumbnail_url:
811 continue
812 # Sometimes youtube gives a wrong thumbnail URL. See:
813 # https://github.com/yt-dlp/yt-dlp/issues/233
814 # https://github.com/ytdl-org/youtube-dl/issues/28023
815 if 'maxresdefault' in thumbnail_url:
816 thumbnail_url = thumbnail_url.split('?')[0]
817 thumbnails.append({
818 'url': thumbnail_url,
819 'height': int_or_none(thumbnail.get('height')),
820 'width': int_or_none(thumbnail.get('width')),
821 })
822 return thumbnails
823
f3aa3c3f 824 @staticmethod
825 def extract_relative_time(relative_time_text):
826 """
827 Extracts a relative time from string and converts to dt object
f0d785d3 828 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 829 """
f0d785d3 830 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 831 if mobj:
f0d785d3 832 start = mobj.group('start')
833 if start:
834 return datetime_from_str(start)
f3aa3c3f 835 try:
f0d785d3 836 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 837 except ValueError:
838 return None
839
c26f9b99 840 def _parse_time_text(self, text):
841 if not text:
842 return
f3aa3c3f 843 dt = self.extract_relative_time(text)
844 timestamp = None
845 if isinstance(dt, datetime.datetime):
846 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 847
848 if timestamp is None:
849 timestamp = (
850 unified_timestamp(text) or unified_timestamp(
851 self._search_regex(
17322130 852 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 853 text.lower(), 'time text', default=None)))
f0d785d3 854
c26f9b99 855 if text and timestamp is None and self._preferred_lang in (None, 'en'):
856 self.report_warning(
857 f'Cannot parse localized time text "{text}"', only_once=True)
858 return timestamp
f3aa3c3f 859
109dd3b2 860 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
861 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 862 default_client='web'):
be5c1ae8 863 for retry in self.RetryManager():
109dd3b2 864 try:
865 response = self._call_api(
866 ep=ep, fatal=True, headers=headers,
be5c1ae8 867 video_id=item_id, query=query, note=note,
109dd3b2 868 context=self._extract_context(ytcfg, default_client),
869 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 870 api_hostname=api_hostname, default_client=default_client)
109dd3b2 871 except ExtractorError as e:
be5c1ae8 872 if not isinstance(e.cause, network_exceptions):
873 return self._error_or_warning(e, fatal=fatal)
874 elif not isinstance(e.cause, urllib.error.HTTPError):
875 retry.error = e
876 continue
109dd3b2 877
be5c1ae8 878 first_bytes = e.cause.read(512)
879 if not is_html(first_bytes):
880 yt_error = try_get(
881 self._parse_json(
882 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
883 lambda x: x['error']['message'], str)
884 if yt_error:
885 self._report_alerts([('ERROR', yt_error)], fatal=False)
886 # Downloading page may result in intermittent 5xx HTTP error
887 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
888 # We also want to catch all other network exceptions since errors in later pages can be troublesome
889 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
890 if e.cause.code not in (403, 429):
891 retry.error = e
892 continue
893 return self._error_or_warning(e, fatal=fatal)
894
895 try:
896 self._extract_and_report_alerts(response, only_once=True)
897 except ExtractorError as e:
898 # YouTube servers may return errors we want to retry on in a 200 OK response
899 # See: https://github.com/yt-dlp/yt-dlp/issues/839
900 if 'unknown error' in e.msg.lower():
901 retry.error = e
902 continue
903 return self._error_or_warning(e, fatal=fatal)
904 # Youtube sometimes sends incomplete data
905 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
906 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 907 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 908 continue
909
910 return response
109dd3b2 911
9297939e 912 @staticmethod
913 def is_music_url(url):
914 return re.match(r'https?://music\.youtube\.com/', url) is not None
915
30a074c2 916 def _extract_video(self, renderer):
917 video_id = renderer.get('videoId')
4dc23a80
M
918
919 reel_header_renderer = traverse_obj(renderer, (
920 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
921 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
922
923 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 924 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
925
926 duration = int_or_none(renderer.get('lengthSeconds'))
927 if duration is None:
928 duration = parse_duration(self._get_text(
929 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 930 if duration is None:
4dc23a80 931 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 932 duration = parse_duration(self._search_regex(
933 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
934 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
935 video_id, default=None, group='duration'))
936
f3aa3c3f 937 channel_id = traverse_obj(
a44ca5a4 938 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
939 expected_type=str, get_all=False)
4dc23a80
M
940 if not channel_id:
941 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
942
f3aa3c3f 943 overlay_style = traverse_obj(
a44ca5a4 944 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
945 get_all=False, expected_type=str)
f3aa3c3f 946 badges = self._extract_badges(renderer)
4dc23a80 947
fd2ad7cb 948 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 949 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
950 expected_type=str)) or ''
fd2ad7cb 951 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 952 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 953 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 954
4dc23a80
M
955 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
956 or self._get_text(reel_header_renderer, 'timestampText') or '')
957 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
958
867c66ff
M
959 live_status = (
960 'is_upcoming' if scheduled_timestamp is not None
961 else 'was_live' if 'streamed' in time_text.lower()
962 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
963 else None)
964
4dc23a80
M
965 # videoInfo is a string like '50K views • 10 years ago'.
966 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
967 view_count = (0 if 'no views' in view_count_text.lower()
968 else self._get_count({'simpleText': view_count_text}))
969 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
970
30a074c2 971 return {
39ed931e 972 '_type': 'url',
30a074c2 973 'ie_key': YoutubeIE.ie_key(),
974 'id': video_id,
fd2ad7cb 975 'url': url,
30a074c2 976 'title': title,
977 'description': description,
978 'duration': duration,
f3aa3c3f 979 'channel_id': channel_id,
4dc23a80
M
980 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
981 or self._get_text(reel_header_renderer, 'channelTitleText')),
982 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
983 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 984 'timestamp': (self._parse_time_text(time_text)
985 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
986 else None),
f3aa3c3f 987 'release_timestamp': scheduled_timestamp,
c26f9b99 988 'availability':
989 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
990 else self._availability(
991 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
992 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
993 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 994 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 995 view_count_field: view_count,
e63faa10 996 'live_status': live_status
30a074c2 997 }
998
0c148415 999
360e1ca5 1000class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1001 IE_DESC = 'YouTube'
cb7dfeea 1002 _VALID_URL = r"""(?x)^
c5e8d7af 1003 (
edb53e2d 1004 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1005 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1006 (?:www\.)?deturl\.com/www\.youtube\.com|
1007 (?:www\.)?pwnyoutube\.com|
1008 (?:www\.)?hooktube\.com|
1009 (?:www\.)?yourepeat\.com|
1010 tube\.majestyc\.net|
1011 %(invidious)s|
1012 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1013 (?:.*?\#/)? # handle anchor (#/) redirect urls
1014 (?: # the various things that can precede the ID:
dad2210c 1015 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1016 |(?: # or the v= param in all its forms
f7000f3a 1017 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1018 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1019 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1020 v=
1021 )
f4b05232 1022 ))
cbaed4bb
S
1023 |(?:
1024 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1025 vid\.plus| # or vid.plus/xxxx
1026 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1027 %(invidious)s
cbaed4bb 1028 )/
edb53e2d 1029 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1030 )
c5e8d7af 1031 )? # all until now is optional -> you can pass the naked ID
201c1459 1032 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1033 (?(1).+)? # if we found the ID, everything can follow
9297939e 1034 (?:\#|$)""" % {
d9190e44 1035 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1036 }
7c6eb424 1037 _EMBED_REGEX = [
1038 r'''(?x)
1039 (?:
0ca0f881 1040 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1041 data-video-url=|
1042 <embed[^>]+?src=|
1043 embedSWF\(?:\s*|
1044 <object[^>]+data=|
1045 new\s+SWFObject\(
1046 )
1047 (["\'])
1048 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1049 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1050 \1''',
1051 # https://wordpress.org/plugins/lazy-load-for-videos/
1052 r'''(?xs)
1053 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1054 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1055 ]
6368e2e6 1056 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1057
e40c758c 1058 _PLAYER_INFO_RE = (
cc2db878 1059 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1060 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1061 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1062 )
2c62dc26 1063 _formats = {
c2d3cb4c 1064 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1065 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1066 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1067 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1068 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1069 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1070 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1071 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1072 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1073 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1074 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1075 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1076 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1077 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1078 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1079 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1080 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1081 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1082
1083
1084 # 3D videos
c2d3cb4c 1085 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1086 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1087 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1088 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1089 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1090 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1091 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1092
96fb5605 1093 # Apple HTTP Live Streaming
11f12195 1094 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1095 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1096 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1097 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1098 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1099 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1100 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1101 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1102
1103 # DASH mp4 video
d23028a8
S
1104 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1106 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1107 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1109 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1110 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1111 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1112 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1113 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1114 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1115 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1116
f6f1fc92 1117 # Dash mp4 audio
d23028a8
S
1118 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1119 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1120 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1121 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1122 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1123 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1124 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1125
1126 # Dash webm
d23028a8
S
1127 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1131 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1132 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1133 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1134 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1139 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1140 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1142 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1143 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1144 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1145 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1146 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1147 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1148 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1149
1150 # Dash webm audio
d23028a8
S
1151 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1152 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1153
0857baad 1154 # Dash webm audio with opus inside
d23028a8
S
1155 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1156 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1157 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1158
ce6b9a2d
PH
1159 # RTMP (unnamed)
1160 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1161
1162 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1163 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1164 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1165 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1166 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1167 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1168 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1169 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1170 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1171 }
29f7c58a 1172 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1173
fd5c4aab
S
1174 _GEO_BYPASS = False
1175
78caa52a 1176 IE_NAME = 'youtube'
2eb88d95
PH
1177 _TESTS = [
1178 {
2d3d2997 1179 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1180 'info_dict': {
1181 'id': 'BaW_jenozKc',
1182 'ext': 'mp4',
3867038a 1183 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1184 'uploader': 'Philipp Hagemeister',
1185 'uploader_id': 'phihag',
ec85ded8 1186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1187 'channel': 'Philipp Hagemeister',
dd4c4492
S
1188 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1189 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1190 'upload_date': '20121002',
ff9f925b 1191 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1192 'categories': ['Science & Technology'],
3867038a 1193 'tags': ['youtube-dl'],
556dbe7f 1194 'duration': 10,
dbdaaa23 1195 'view_count': int,
3e7c1224 1196 'like_count': int,
ff9f925b 1197 'availability': 'public',
1198 'playable_in_embed': True,
1199 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1200 'live_status': 'not_live',
1201 'age_limit': 0,
7c80519c 1202 'start_time': 1,
297a564b 1203 'end_time': 9,
12a1b225 1204 'comment_count': int,
6c73052c 1205 'channel_follower_count': int
2eb88d95 1206 }
0e853ca4 1207 },
fccd3771 1208 {
4bc3a23e
PH
1209 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1210 'note': 'Embed-only video (#1746)',
1211 'info_dict': {
1212 'id': 'yZIXLfi8CZQ',
1213 'ext': 'mp4',
1214 'upload_date': '20120608',
1215 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1216 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1217 'uploader': 'SET India',
94bfcd23 1218 'uploader_id': 'setindia',
ec85ded8 1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1220 'age_limit': 18,
545cc85d 1221 },
1222 'skip': 'Private video',
fccd3771 1223 },
11b56058 1224 {
8bdd16b4 1225 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1226 'note': 'Use the first video ID in the URL',
1227 'info_dict': {
1228 'id': 'BaW_jenozKc',
1229 'ext': 'mp4',
3867038a 1230 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1231 'uploader': 'Philipp Hagemeister',
1232 'uploader_id': 'phihag',
ec85ded8 1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1234 'channel': 'Philipp Hagemeister',
1235 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1236 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1237 'upload_date': '20121002',
976ae3ea 1238 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1239 'categories': ['Science & Technology'],
3867038a 1240 'tags': ['youtube-dl'],
556dbe7f 1241 'duration': 10,
dbdaaa23 1242 'view_count': int,
11b56058 1243 'like_count': int,
976ae3ea 1244 'availability': 'public',
1245 'playable_in_embed': True,
1246 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1247 'live_status': 'not_live',
1248 'age_limit': 0,
12a1b225 1249 'comment_count': int,
6c73052c 1250 'channel_follower_count': int
34a7de29
S
1251 },
1252 'params': {
1253 'skip_download': True,
1254 },
11b56058 1255 },
dd27fd17 1256 {
2d3d2997 1257 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1258 'note': '256k DASH audio (format 141) via DASH manifest',
1259 'info_dict': {
1260 'id': 'a9LDPn-MO4I',
1261 'ext': 'm4a',
1262 'upload_date': '20121002',
1263 'uploader_id': '8KVIDEO',
ec85ded8 1264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1265 'description': '',
1266 'uploader': '8KVIDEO',
1267 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1268 },
4bc3a23e
PH
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141',
4919603f 1272 },
de3c7fe0 1273 'skip': 'format 141 not served anymore',
dd27fd17 1274 },
8bdd16b4 1275 # DASH manifest with encrypted signature
1276 {
1277 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1278 'info_dict': {
1279 'id': 'IB3lcPjvWLA',
1280 'ext': 'm4a',
1281 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1282 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1283 'duration': 244,
1284 'uploader': 'AfrojackVEVO',
1285 'uploader_id': 'AfrojackVEVO',
1286 'upload_date': '20131011',
cc2db878 1287 'abr': 129.495,
976ae3ea 1288 'like_count': int,
1289 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1290 'playable_in_embed': True,
1291 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1292 'view_count': int,
1293 'track': 'The Spark',
1294 'live_status': 'not_live',
1295 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1296 'channel': 'Afrojack',
1297 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1298 'tags': 'count:19',
1299 'availability': 'public',
1300 'categories': ['Music'],
1301 'age_limit': 0,
1302 'alt_title': 'The Spark',
6c73052c 1303 'channel_follower_count': int
8bdd16b4 1304 },
1305 'params': {
1306 'youtube_include_dash_manifest': True,
1307 'format': '141/bestaudio[ext=m4a]',
1308 },
1309 },
65c2fde2 1310 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1311 {
65c2fde2 1312 'note': 'Embed allowed age-gate video',
2d3d2997 1313 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1314 'info_dict': {
1315 'id': 'HtVdAasjOgU',
1316 'ext': 'mp4',
1317 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1318 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1319 'duration': 142,
c522adb1
JMF
1320 'uploader': 'The Witcher',
1321 'uploader_id': 'WitcherGame',
ec85ded8 1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1323 'upload_date': '20140605',
34952f09 1324 'age_limit': 18,
976ae3ea 1325 'categories': ['Gaming'],
1326 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1327 'availability': 'needs_auth',
1328 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1329 'like_count': int,
1330 'channel': 'The Witcher',
1331 'live_status': 'not_live',
1332 'tags': 'count:17',
1333 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1334 'playable_in_embed': True,
1335 'view_count': int,
6c73052c 1336 'channel_follower_count': int
c522adb1
JMF
1337 },
1338 },
65c2fde2 1339 {
1340 'note': 'Age-gate video with embed allowed in public site',
1341 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1342 'info_dict': {
1343 'id': 'HsUATh_Nc2U',
1344 'ext': 'mp4',
1345 'title': 'Godzilla 2 (Official Video)',
1346 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1347 'upload_date': '20200408',
1348 'uploader_id': 'FlyingKitty900',
1349 'uploader': 'FlyingKitty',
1350 'age_limit': 18,
976ae3ea 1351 'availability': 'needs_auth',
1352 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1353 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1354 'channel': 'FlyingKitty',
1355 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1356 'view_count': int,
1357 'categories': ['Entertainment'],
1358 'live_status': 'not_live',
1359 'tags': ['Flyingkitty', 'godzilla 2'],
1360 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1361 'like_count': int,
1362 'duration': 177,
1363 'playable_in_embed': True,
6c73052c 1364 'channel_follower_count': int
65c2fde2 1365 },
1366 },
1367 {
1368 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1369 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1370 'info_dict': {
1371 'id': 'Tq92D6wQ1mg',
1372 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1373 'ext': 'mp4',
17322130 1374 'upload_date': '20191228',
65c2fde2 1375 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1376 'uploader': 'Projekt Melody',
1377 'description': 'md5:17eccca93a786d51bc67646756894066',
1378 'age_limit': 18,
976ae3ea 1379 'like_count': int,
1380 'availability': 'needs_auth',
1381 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1382 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1383 'view_count': int,
1384 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1385 'channel': 'Projekt Melody',
1386 'live_status': 'not_live',
1387 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1388 'playable_in_embed': True,
1389 'categories': ['Entertainment'],
1390 'duration': 106,
1391 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1392 'comment_count': int,
6c73052c 1393 'channel_follower_count': int
65c2fde2 1394 },
1395 },
1396 {
1397 'note': 'Non-Agegated non-embeddable video',
1398 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1399 'info_dict': {
1400 'id': 'MeJVWBSsPAY',
1401 'ext': 'mp4',
1402 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1403 'uploader': 'Herr Lurik',
1404 'uploader_id': 'st3in234',
1405 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1406 'upload_date': '20130730',
976ae3ea 1407 'track': 'Such mich find mich',
1408 'age_limit': 0,
1409 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1410 'like_count': int,
1411 'playable_in_embed': False,
1412 'creator': 'OOMPH!',
1413 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1414 'view_count': int,
1415 'alt_title': 'Such mich find mich',
1416 'duration': 210,
1417 'channel': 'Herr Lurik',
1418 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1419 'categories': ['Music'],
1420 'availability': 'public',
1421 'uploader_url': 'http://www.youtube.com/user/st3in234',
1422 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1423 'live_status': 'not_live',
1424 'artist': 'OOMPH!',
6c73052c 1425 'channel_follower_count': int
65c2fde2 1426 },
1427 },
1428 {
1429 'note': 'Non-bypassable age-gated video',
1430 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1431 'only_matching': True,
1432 },
8bdd16b4 1433 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1434 # YouTube Red ad is not captured for creator
1435 {
1436 'url': '__2ABJjxzNo',
1437 'info_dict': {
1438 'id': '__2ABJjxzNo',
1439 'ext': 'mp4',
1440 'duration': 266,
1441 'upload_date': '20100430',
1442 'uploader_id': 'deadmau5',
1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1444 'creator': 'deadmau5',
1445 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1446 'uploader': 'deadmau5',
1447 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1448 'alt_title': 'Some Chords',
976ae3ea 1449 'availability': 'public',
1450 'tags': 'count:14',
1451 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1452 'view_count': int,
1453 'live_status': 'not_live',
1454 'channel': 'deadmau5',
1455 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1456 'like_count': int,
1457 'track': 'Some Chords',
1458 'artist': 'deadmau5',
1459 'playable_in_embed': True,
1460 'age_limit': 0,
1461 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1462 'categories': ['Music'],
1463 'album': 'Some Chords',
6c73052c 1464 'channel_follower_count': int
8bdd16b4 1465 },
1466 'expected_warnings': [
1467 'DASH manifest missing',
1468 ]
1469 },
067aa17e 1470 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1471 {
1472 'url': 'lqQg6PlCWgI',
1473 'info_dict': {
1474 'id': 'lqQg6PlCWgI',
1475 'ext': 'mp4',
556dbe7f 1476 'duration': 6085,
90227264 1477 'upload_date': '20150827',
cbe2bd91 1478 'uploader_id': 'olympic',
ec85ded8 1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1480 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1481 'uploader': 'Olympics',
cbe2bd91 1482 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1483 'like_count': int,
1484 'release_timestamp': 1343767800,
1485 'playable_in_embed': True,
1486 'categories': ['Sports'],
1487 'release_date': '20120731',
1488 'channel': 'Olympics',
1489 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1490 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1491 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1492 'age_limit': 0,
1493 'availability': 'public',
1494 'live_status': 'was_live',
1495 'view_count': int,
1496 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1497 'channel_follower_count': int
cbe2bd91
PH
1498 },
1499 'params': {
1500 'skip_download': 'requires avconv',
e52a40ab 1501 }
cbe2bd91 1502 },
6271f1ca
PH
1503 # Non-square pixels
1504 {
1505 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1506 'info_dict': {
1507 'id': '_b-2C3KPAM0',
1508 'ext': 'mp4',
1509 'stretched_ratio': 16 / 9.,
556dbe7f 1510 'duration': 85,
6271f1ca
PH
1511 'upload_date': '20110310',
1512 'uploader_id': 'AllenMeow',
ec85ded8 1513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1514 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1515 'uploader': '孫ᄋᄅ',
6271f1ca 1516 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1517 'playable_in_embed': True,
1518 'channel': '孫ᄋᄅ',
1519 'age_limit': 0,
1520 'tags': 'count:11',
1521 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1522 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1523 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1524 'view_count': int,
1525 'categories': ['People & Blogs'],
1526 'like_count': int,
1527 'live_status': 'not_live',
1528 'availability': 'unlisted',
12a1b225 1529 'comment_count': int,
6c73052c 1530 'channel_follower_count': int
6271f1ca 1531 },
06b491eb
S
1532 },
1533 # url_encoded_fmt_stream_map is empty string
1534 {
1535 'url': 'qEJwOuvDf7I',
1536 'info_dict': {
1537 'id': 'qEJwOuvDf7I',
f57b7835 1538 'ext': 'webm',
06b491eb
S
1539 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1540 'description': '',
1541 'upload_date': '20150404',
1542 'uploader_id': 'spbelect',
1543 'uploader': 'Наблюдатели Петербурга',
1544 },
1545 'params': {
1546 'skip_download': 'requires avconv',
e323cf3f
S
1547 },
1548 'skip': 'This live event has ended.',
06b491eb 1549 },
067aa17e 1550 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1551 {
1552 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1553 'info_dict': {
1554 'id': 'FIl7x6_3R5Y',
eb6793ba 1555 'ext': 'webm',
da77d856
S
1556 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1557 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1558 'duration': 220,
da77d856
S
1559 'upload_date': '20150625',
1560 'uploader_id': 'dorappi2000',
ec85ded8 1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1562 'uploader': 'dorappi2000',
eb6793ba 1563 'formats': 'mincount:31',
da77d856 1564 },
eb6793ba 1565 'skip': 'not actual anymore',
2ee8f5d8 1566 },
8a1a26ce
YCH
1567 # DASH manifest with segment_list
1568 {
1569 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1570 'md5': '8ce563a1d667b599d21064e982ab9e31',
1571 'info_dict': {
1572 'id': 'CsmdDsKjzN8',
1573 'ext': 'mp4',
17ee98e1 1574 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1575 'uploader': 'Airtek',
1576 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1577 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1578 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1579 },
1580 'params': {
1581 'youtube_include_dash_manifest': True,
1582 'format': '135', # bestvideo
be49068d
S
1583 },
1584 'skip': 'This live event has ended.',
2ee8f5d8 1585 },
cf7e015f 1586 {
6368e2e6 1587 # Multifeed videos (multiple cameras), URL can be of any Camera
1588 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1589 'info_dict': {
6368e2e6 1590 'id': 'zaPI8MvL8pg',
1591 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1592 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1593 },
1594 'playlist': [{
1595 'info_dict': {
6368e2e6 1596 'id': 'j5yGuxZ8lLU',
cf7e015f 1597 'ext': 'mp4',
6368e2e6 1598 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1599 'uploader': 'WiiLikeToPlay',
1600 'description': 'md5:563ccbc698b39298481ca3c571169519',
1601 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1602 'duration': 10120,
1603 'channel_follower_count': int,
1604 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1605 'availability': 'public',
1606 'playable_in_embed': True,
1607 'upload_date': '20131105',
1608 'uploader_id': 'WiiRikeToPray',
1609 'categories': ['Gaming'],
1610 'live_status': 'was_live',
1611 'tags': 'count:24',
1612 'release_timestamp': 1383701910,
1613 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1614 'comment_count': int,
1615 'age_limit': 0,
1616 'like_count': int,
1617 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1618 'channel': 'WiiLikeToPlay',
1619 'view_count': int,
1620 'release_date': '20131106',
cf7e015f
S
1621 },
1622 }, {
1623 'info_dict': {
6368e2e6 1624 'id': 'zaPI8MvL8pg',
cf7e015f 1625 'ext': 'mp4',
6368e2e6 1626 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1627 'uploader_id': 'WiiRikeToPray',
1628 'availability': 'public',
1629 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1630 'channel': 'WiiLikeToPlay',
1631 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1632 'channel_follower_count': int,
1633 'description': 'md5:563ccbc698b39298481ca3c571169519',
1634 'duration': 10108,
1635 'age_limit': 0,
1636 'like_count': int,
1637 'tags': 'count:24',
1638 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1639 'uploader': 'WiiLikeToPlay',
1640 'release_timestamp': 1383701915,
1641 'comment_count': int,
1642 'upload_date': '20131105',
1643 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1644 'release_date': '20131106',
1645 'playable_in_embed': True,
1646 'live_status': 'was_live',
1647 'categories': ['Gaming'],
1648 'view_count': int,
cf7e015f
S
1649 },
1650 }, {
1651 'info_dict': {
6368e2e6 1652 'id': 'R7r3vfO7Hao',
cf7e015f 1653 'ext': 'mp4',
6368e2e6 1654 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1655 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1656 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1657 'like_count': int,
1658 'availability': 'public',
1659 'playable_in_embed': True,
1660 'upload_date': '20131105',
1661 'description': 'md5:563ccbc698b39298481ca3c571169519',
1662 'uploader_id': 'WiiRikeToPray',
1663 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1664 'channel_follower_count': int,
1665 'tags': 'count:24',
1666 'release_date': '20131106',
1667 'uploader': 'WiiLikeToPlay',
1668 'comment_count': int,
1669 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1670 'channel': 'WiiLikeToPlay',
1671 'categories': ['Gaming'],
1672 'release_timestamp': 1383701914,
1673 'live_status': 'was_live',
1674 'age_limit': 0,
1675 'duration': 10128,
1676 'view_count': int,
cf7e015f
S
1677 },
1678 }],
6368e2e6 1679 'params': {'skip_download': True},
cbaed4bb 1680 },
f9f49d87 1681 {
067aa17e 1682 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1683 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1684 'info_dict': {
1685 'id': 'gVfLd0zydlo',
1686 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1687 },
1688 'playlist_count': 2,
be49068d 1689 'skip': 'Not multifeed anymore',
f9f49d87 1690 },
cbaed4bb 1691 {
2d3d2997 1692 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1693 'only_matching': True,
0e49d9a6 1694 },
6d4fc66b 1695 {
2d3d2997 1696 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1697 'only_matching': True,
1698 },
0e49d9a6 1699 {
067aa17e 1700 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1701 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1702 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1703 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1704 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1705 'info_dict': {
1706 'id': 'lsguqyKfVQg',
1707 'ext': 'mp4',
1708 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1709 'alt_title': 'Dark Walk',
0e49d9a6 1710 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1711 'duration': 133,
0e49d9a6
LL
1712 'upload_date': '20151119',
1713 'uploader_id': 'IronSoulElf',
ec85ded8 1714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1715 'uploader': 'IronSoulElf',
11f9be09 1716 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1717 'track': 'Dark Walk',
1718 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1719 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1720 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1721 'categories': ['Film & Animation'],
1722 'view_count': int,
1723 'live_status': 'not_live',
1724 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1725 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1726 'tags': 'count:13',
1727 'availability': 'public',
1728 'channel': 'IronSoulElf',
1729 'playable_in_embed': True,
1730 'like_count': int,
1731 'age_limit': 0,
6c73052c 1732 'channel_follower_count': int
0e49d9a6
LL
1733 },
1734 'params': {
1735 'skip_download': True,
1736 },
1737 },
61f92af1 1738 {
067aa17e 1739 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1740 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1741 'only_matching': True,
1742 },
313dfc45
LL
1743 {
1744 # Video with yt:stretch=17:0
1745 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1746 'info_dict': {
1747 'id': 'Q39EVAstoRM',
1748 'ext': 'mp4',
1749 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1750 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1751 'upload_date': '20151107',
1752 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1753 'uploader': 'CH GAMER DROID',
1754 },
1755 'params': {
1756 'skip_download': True,
1757 },
be49068d 1758 'skip': 'This video does not exist.',
313dfc45 1759 },
201c1459 1760 {
1761 # Video with incomplete 'yt:stretch=16:'
1762 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1763 'only_matching': True,
1764 },
7caf9830
S
1765 {
1766 # Video licensed under Creative Commons
1767 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1768 'info_dict': {
1769 'id': 'M4gD1WSo5mA',
1770 'ext': 'mp4',
1771 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1772 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1773 'duration': 721,
17322130 1774 'upload_date': '20150128',
7caf9830 1775 'uploader_id': 'BerkmanCenter',
ec85ded8 1776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1777 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1778 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1779 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1780 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1781 'like_count': int,
1782 'age_limit': 0,
1783 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1784 'channel': 'The Berkman Klein Center for Internet & Society',
1785 'availability': 'public',
1786 'view_count': int,
1787 'categories': ['Education'],
1788 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1789 'live_status': 'not_live',
1790 'playable_in_embed': True,
12a1b225 1791 'comment_count': int,
d5d1df8a 1792 'channel_follower_count': int,
1793 'chapters': list,
7caf9830
S
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 },
fd050249
S
1799 {
1800 # Channel-like uploader_url
1801 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1802 'info_dict': {
1803 'id': 'eQcmzGIKrzg',
1804 'ext': 'mp4',
1805 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1806 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1807 'duration': 4060,
17322130 1808 'upload_date': '20151120',
eb6793ba 1809 'uploader': 'Bernie Sanders',
fd050249 1810 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1812 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1813 'playable_in_embed': True,
1814 'tags': 'count:12',
1815 'like_count': int,
1816 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1817 'age_limit': 0,
1818 'availability': 'public',
1819 'categories': ['News & Politics'],
1820 'channel': 'Bernie Sanders',
1821 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1822 'view_count': int,
1823 'live_status': 'not_live',
1824 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1825 'comment_count': int,
d5d1df8a 1826 'channel_follower_count': int,
1827 'chapters': list,
fd050249
S
1828 },
1829 'params': {
1830 'skip_download': True,
1831 },
1832 },
040ac686
S
1833 {
1834 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1835 'only_matching': True,
7f29cf54
S
1836 },
1837 {
067aa17e 1838 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1839 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1840 'only_matching': True,
6496ccb4
S
1841 },
1842 {
1843 # Rental video preview
1844 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1845 'info_dict': {
1846 'id': 'uGpuVWrhIzE',
1847 'ext': 'mp4',
1848 'title': 'Piku - Trailer',
1849 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1850 'upload_date': '20150811',
1851 'uploader': 'FlixMatrix',
1852 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1854 'license': 'Standard YouTube License',
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
eb6793ba 1859 'skip': 'This video is not available.',
022a5d66 1860 },
12afdc2a
S
1861 {
1862 # YouTube Red video with episode data
1863 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1864 'info_dict': {
1865 'id': 'iqKdEhx-dD4',
1866 'ext': 'mp4',
1867 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1868 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1869 'duration': 2085,
12afdc2a
S
1870 'upload_date': '20170118',
1871 'uploader': 'Vsauce',
1872 'uploader_id': 'Vsauce',
1873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1874 'series': 'Mind Field',
1875 'season_number': 1,
1876 'episode_number': 1,
976ae3ea 1877 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1878 'tags': 'count:12',
1879 'view_count': int,
1880 'availability': 'public',
1881 'age_limit': 0,
1882 'channel': 'Vsauce',
1883 'episode': 'Episode 1',
1884 'categories': ['Entertainment'],
1885 'season': 'Season 1',
1886 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1887 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1888 'like_count': int,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
6c73052c 1891 'channel_follower_count': int
12afdc2a
S
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 'expected_warnings': [
1897 'Skipping DASH manifest',
1898 ],
1899 },
c7121fa7
S
1900 {
1901 # The following content has been identified by the YouTube community
1902 # as inappropriate or offensive to some audiences.
1903 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1904 'info_dict': {
1905 'id': '6SJNVb0GnPI',
1906 'ext': 'mp4',
1907 'title': 'Race Differences in Intelligence',
1908 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1909 'duration': 965,
1910 'upload_date': '20140124',
1911 'uploader': 'New Century Foundation',
1912 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1914 },
1915 'params': {
1916 'skip_download': True,
1917 },
545cc85d 1918 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1919 },
022a5d66
S
1920 {
1921 # itag 212
1922 'url': '1t24XAntNCY',
1923 'only_matching': True,
fd5c4aab
S
1924 },
1925 {
1926 # geo restricted to JP
1927 'url': 'sJL6WA-aGkQ',
1928 'only_matching': True,
1929 },
cd5a74a2
S
1930 {
1931 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1932 'only_matching': True,
1933 },
bc2ca1bb 1934 {
1935 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1936 'only_matching': True,
1937 },
1938 {
1939 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1940 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1941 'only_matching': True,
1942 },
825cd268
RA
1943 {
1944 # DRM protected
1945 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1946 'only_matching': True,
4fe54c12
S
1947 },
1948 {
1949 # Video with unsupported adaptive stream type formats
1950 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1951 'info_dict': {
1952 'id': 'Z4Vy8R84T1U',
1953 'ext': 'mp4',
1954 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1955 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1956 'duration': 433,
1957 'upload_date': '20130923',
1958 'uploader': 'Amelia Putri Harwita',
1959 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1961 'formats': 'maxcount:10',
1962 },
1963 'params': {
1964 'skip_download': True,
1965 'youtube_include_dash_manifest': False,
1966 },
5429d6a9 1967 'skip': 'not actual anymore',
5caabd3c 1968 },
1969 {
822b9d9c 1970 # Youtube Music Auto-generated description
5caabd3c 1971 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1972 'info_dict': {
1973 'id': 'MgNrAu2pzNs',
1974 'ext': 'mp4',
1975 'title': 'Voyeur Girl',
1976 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1977 'upload_date': '20190312',
5429d6a9
S
1978 'uploader': 'Stephen - Topic',
1979 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1980 'artist': 'Stephen',
1981 'track': 'Voyeur Girl',
1982 'album': 'it\'s too much love to know my dear',
1983 'release_date': '20190313',
1984 'release_year': 2019,
976ae3ea 1985 'alt_title': 'Voyeur Girl',
1986 'view_count': int,
1987 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1988 'playable_in_embed': True,
1989 'like_count': int,
1990 'categories': ['Music'],
1991 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1992 'channel': 'Stephen',
1993 'availability': 'public',
1994 'creator': 'Stephen',
1995 'duration': 169,
1996 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1997 'age_limit': 0,
1998 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1999 'tags': 'count:11',
2000 'live_status': 'not_live',
6c73052c 2001 'channel_follower_count': int
5caabd3c 2002 },
2003 'params': {
2004 'skip_download': True,
2005 },
2006 },
66b48727
RA
2007 {
2008 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2009 'only_matching': True,
2010 },
011e75e6
S
2011 {
2012 # invalid -> valid video id redirection
2013 'url': 'DJztXj2GPfl',
2014 'info_dict': {
2015 'id': 'DJztXj2GPfk',
2016 'ext': 'mp4',
2017 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2018 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2019 'upload_date': '20090125',
2020 'uploader': 'Prochorowka',
2021 'uploader_id': 'Prochorowka',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
2023 'artist': 'Panjabi MC',
2024 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2025 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2026 },
2027 'params': {
2028 'skip_download': True,
2029 },
545cc85d 2030 'skip': 'Video unavailable',
ea74e00b
DP
2031 },
2032 {
2033 # empty description results in an empty string
2034 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2035 'info_dict': {
2036 'id': 'x41yOUIvK2k',
2037 'ext': 'mp4',
2038 'title': 'IMG 3456',
2039 'description': '',
2040 'upload_date': '20170613',
2041 'uploader_id': 'ElevageOrVert',
2042 'uploader': 'ElevageOrVert',
976ae3ea 2043 'view_count': int,
2044 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2045 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2046 'like_count': int,
2047 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2048 'tags': [],
2049 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2050 'availability': 'public',
2051 'age_limit': 0,
2052 'categories': ['Pets & Animals'],
2053 'duration': 7,
2054 'playable_in_embed': True,
2055 'live_status': 'not_live',
2056 'channel': 'ElevageOrVert',
6c73052c 2057 'channel_follower_count': int
ea74e00b
DP
2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
a0566bbf 2063 {
29f7c58a 2064 # with '};' inside yt initial data (see [1])
2065 # see [2] for an example with '};' inside ytInitialPlayerResponse
2066 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2067 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2068 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2069 'info_dict': {
2070 'id': 'CHqg6qOn4no',
2071 'ext': 'mp4',
2072 'title': 'Part 77 Sort a list of simple types in c#',
2073 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2074 'upload_date': '20130831',
2075 'uploader_id': 'kudvenkat',
2076 'uploader': 'kudvenkat',
976ae3ea 2077 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2078 'like_count': int,
2079 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2080 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2081 'live_status': 'not_live',
2082 'categories': ['Education'],
2083 'availability': 'public',
2084 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2085 'tags': 'count:12',
2086 'playable_in_embed': True,
2087 'age_limit': 0,
2088 'view_count': int,
2089 'duration': 522,
2090 'channel': 'kudvenkat',
12a1b225 2091 'comment_count': int,
d5d1df8a 2092 'channel_follower_count': int,
2093 'chapters': list,
a0566bbf 2094 },
2095 'params': {
2096 'skip_download': True,
2097 },
2098 },
29f7c58a 2099 {
2100 # another example of '};' in ytInitialData
2101 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2102 'only_matching': True,
2103 },
2104 {
2105 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2106 'only_matching': True,
2107 },
545cc85d 2108 {
cc2db878 2109 # https://github.com/ytdl-org/youtube-dl/pull/28094
2110 'url': 'OtqTfy26tG0',
2111 'info_dict': {
2112 'id': 'OtqTfy26tG0',
2113 'ext': 'mp4',
2114 'title': 'Burn Out',
2115 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2116 'upload_date': '20141120',
2117 'uploader': 'The Cinematic Orchestra - Topic',
2118 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2119 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2120 'artist': 'The Cinematic Orchestra',
2121 'track': 'Burn Out',
2122 'album': 'Every Day',
976ae3ea 2123 'like_count': int,
2124 'live_status': 'not_live',
2125 'alt_title': 'Burn Out',
2126 'duration': 614,
2127 'age_limit': 0,
2128 'view_count': int,
2129 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2130 'creator': 'The Cinematic Orchestra',
2131 'channel': 'The Cinematic Orchestra',
2132 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2133 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2134 'availability': 'public',
2135 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2136 'categories': ['Music'],
2137 'playable_in_embed': True,
6c73052c 2138 'channel_follower_count': int
cc2db878 2139 },
2140 'params': {
2141 'skip_download': True,
2142 },
545cc85d 2143 },
bc2ca1bb 2144 {
2145 # controversial video, only works with bpctr when authenticated with cookies
2146 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2147 'only_matching': True,
2148 },
a1a7907b 2149 {
2150 # controversial video, requires bpctr/contentCheckOk
2151 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2152 'info_dict': {
2153 'id': 'SZJvDhaSDnc',
2154 'ext': 'mp4',
2155 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2156 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2157 'uploader': 'CBS Mornings',
11f9be09 2158 'uploader_id': 'CBSThisMorning',
a1a7907b 2159 'upload_date': '20140716',
976ae3ea 2160 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2161 'duration': 170,
2162 'categories': ['News & Politics'],
2163 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2164 'view_count': int,
2165 'channel': 'CBS Mornings',
2166 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2167 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2168 'age_limit': 18,
2169 'availability': 'needs_auth',
2170 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2171 'like_count': int,
2172 'live_status': 'not_live',
2173 'playable_in_embed': True,
6c73052c 2174 'channel_follower_count': int
a1a7907b 2175 }
2176 },
f7ad7160 2177 {
2178 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2179 'url': 'cBvYw8_A0vQ',
2180 'info_dict': {
2181 'id': 'cBvYw8_A0vQ',
2182 'ext': 'mp4',
2183 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2184 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2185 'upload_date': '20201120',
2186 'uploader': 'Walk around Japan',
2187 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2189 'duration': 1456,
2190 'categories': ['Travel & Events'],
2191 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2192 'view_count': int,
2193 'channel': 'Walk around Japan',
2194 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2195 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2196 'age_limit': 0,
2197 'availability': 'public',
2198 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2199 'live_status': 'not_live',
2200 'playable_in_embed': True,
6c73052c 2201 'channel_follower_count': int
f7ad7160 2202 },
2203 'params': {
2204 'skip_download': True,
2205 },
0fb983f6 2206 }, {
2207 # Has multiple audio streams
2208 'url': 'WaOKSUlf4TM',
2209 'only_matching': True
9297939e 2210 }, {
2211 # Requires Premium: has format 141 when requested using YTM url
2212 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2213 'only_matching': True
2214 }, {
120916da 2215 # multiple subtitles with same lang_code
2216 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2217 'only_matching': True,
109dd3b2 2218 }, {
2219 # Force use android client fallback
2220 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2221 'info_dict': {
2222 'id': 'YOelRv7fMxY',
11f9be09 2223 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2224 'ext': '3gp',
2225 'upload_date': '20210624',
2226 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2227 'uploader': 'colinfurze',
11f9be09 2228 'uploader_id': 'colinfurze',
109dd3b2 2229 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2230 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2231 'duration': 596,
2232 'categories': ['Entertainment'],
2233 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2234 'view_count': int,
2235 'channel': 'colinfurze',
2236 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2237 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2238 'age_limit': 0,
2239 'availability': 'public',
2240 'like_count': int,
2241 'live_status': 'not_live',
2242 'playable_in_embed': True,
d5d1df8a 2243 'channel_follower_count': int,
2244 'chapters': list,
109dd3b2 2245 },
2246 'params': {
2247 'format': '17', # 3gp format available on android
2248 'extractor_args': {'youtube': {'player_client': ['android']}},
2249 },
120916da 2250 },
109dd3b2 2251 {
2252 # Skip download of additional client configs (remix client config in this case)
2253 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2254 'only_matching': True,
2255 'params': {
2256 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2257 },
8fc54b12 2258 }, {
2259 # shorts
2260 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2261 'only_matching': True,
9222c381 2262 }, {
2263 'note': 'Storyboards',
2264 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2265 'info_dict': {
2266 'id': '5KLPxDtMqe8',
2267 'ext': 'mhtml',
2268 'format_id': 'sb0',
2269 'title': 'Your Brain is Plastic',
2270 'uploader_id': 'scishow',
2271 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2272 'upload_date': '20140324',
2273 'uploader': 'SciShow',
976ae3ea 2274 'like_count': int,
2275 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2276 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2277 'view_count': int,
2278 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2279 'playable_in_embed': True,
2280 'tags': 'count:12',
2281 'uploader_url': 'http://www.youtube.com/user/scishow',
2282 'availability': 'public',
2283 'channel': 'SciShow',
2284 'live_status': 'not_live',
2285 'duration': 248,
2286 'categories': ['Education'],
2287 'age_limit': 0,
d5d1df8a 2288 'channel_follower_count': int,
2289 'chapters': list,
9222c381 2290 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2291 }, {
2292 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2293 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2294 'info_dict': {
2295 'id': '2NUZ8W2llS4',
2296 'ext': 'mp4',
2297 'title': 'The NP that test your phone performance 🙂',
2298 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2299 'uploader': 'Leon Nguyen',
2300 'uploader_id': 'VNSXIII',
2301 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2302 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2303 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2304 'duration': 21,
2305 'view_count': int,
2306 'age_limit': 0,
2307 'categories': ['Gaming'],
2308 'tags': 'count:23',
2309 'playable_in_embed': True,
2310 'live_status': 'not_live',
2311 'upload_date': '20220103',
2312 'like_count': int,
2313 'availability': 'public',
2314 'channel': 'Leon Nguyen',
2315 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2316 'comment_count': int,
992f9a73 2317 'channel_follower_count': int
2318 }
1ff88b7a 2319 }, {
2320 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2321 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2322 'info_dict': {
2323 'id': '2NUZ8W2llS4',
2324 'ext': 'mp4',
2325 'title': 'The NP that test your phone performance 🙂',
2326 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2327 'uploader': 'Leon Nguyen',
2328 'uploader_id': 'VNSXIII',
2329 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2330 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2331 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2332 'duration': 21,
2333 'view_count': int,
2334 'age_limit': 0,
2335 'categories': ['Gaming'],
2336 'tags': 'count:23',
2337 'playable_in_embed': True,
2338 'live_status': 'not_live',
2339 'upload_date': '20220102',
2340 'like_count': int,
2341 'availability': 'public',
2342 'channel': 'Leon Nguyen',
2343 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2344 'comment_count': int,
2345 'channel_follower_count': int
2346 },
2347 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2348 }, {
2349 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2350 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2351 'info_dict': {
2352 'id': 'mzZzzBU6lrM',
2353 'ext': 'mp4',
2354 'title': 'I Met GeorgeNotFound In Real Life...',
2355 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2356 'uploader': 'Quackity',
2357 'uploader_id': 'QuackityHQ',
2358 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2359 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2360 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2361 'duration': 955,
2362 'view_count': int,
2363 'age_limit': 0,
2364 'categories': ['Entertainment'],
2365 'tags': 'count:26',
2366 'playable_in_embed': True,
2367 'live_status': 'not_live',
2368 'release_timestamp': 1641172509,
2369 'release_date': '20220103',
2370 'upload_date': '20220103',
2371 'like_count': int,
2372 'availability': 'public',
2373 'channel': 'Quackity',
2374 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2375 'channel_follower_count': int
2376 }
2377 },
2378 { # continuous livestream. Microformat upload date should be preferred.
2379 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2380 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2381 'info_dict': {
2382 'id': 'kgx4WGK0oNU',
2383 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2384 'ext': 'mp4',
2385 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2386 'availability': 'public',
2387 'age_limit': 0,
2388 'release_timestamp': 1637975704,
2389 'upload_date': '20210619',
2390 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2391 'live_status': 'is_live',
2392 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2393 'uploader': '阿鲍Abao',
2394 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2395 'channel': 'Abao in Tokyo',
2396 'channel_follower_count': int,
2397 'release_date': '20211127',
2398 'tags': 'count:39',
2399 'categories': ['People & Blogs'],
2400 'like_count': int,
2401 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2402 'view_count': int,
2403 'playable_in_embed': True,
2404 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2405 'concurrent_view_count': int,
992f9a73 2406 },
2407 'params': {'skip_download': True}
6e634cbe 2408 }, {
2409 # Story. Requires specific player params to work.
ee27297f 2410 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2411 'info_dict': {
ee27297f 2412 'id': 'vv8qTUWmulI',
6e634cbe 2413 'ext': 'mp4',
ee27297f 2414 'availability': 'unlisted',
2415 'view_count': int,
2416 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2417 'upload_date': '20220526',
2418 'categories': ['Education'],
2419 'title': 'Story',
2420 'channel': 'IT\'S HISTORY',
2421 'description': '',
2422 'uploader_id': 'BlastfromthePast',
2423 'duration': 12,
2424 'uploader': 'IT\'S HISTORY',
6e634cbe 2425 'playable_in_embed': True,
6e634cbe 2426 'age_limit': 0,
6e634cbe 2427 'live_status': 'not_live',
ee27297f 2428 'tags': [],
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2430 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2431 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2432 },
2433 'skip': 'stories get removed after some period of time',
ee27297f 2434 }, {
2435 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2436 'info_dict': {
2437 'id': 'tjjjtzRLHvA',
2438 'ext': 'mp4',
2439 'title': 'ハッシュタグ無し };if window.ytcsi',
2440 'upload_date': '20220323',
2441 'like_count': int,
2442 'availability': 'unlisted',
2443 'channel': 'nao20010128nao',
2444 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2445 'age_limit': 0,
2446 'uploader': 'nao20010128nao',
2447 'uploader_id': 'nao20010128nao',
2448 'categories': ['Music'],
6e634cbe 2449 'view_count': int,
2450 'description': '',
ee27297f 2451 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2452 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2453 'live_status': 'not_live',
2454 'playable_in_embed': True,
2455 'channel_follower_count': int,
2456 'duration': 6,
2457 'tags': [],
2458 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2459 }
c26f9b99 2460 }, {
2461 # Prefer primary title+description language metadata by default
2462 # Do not prefer translated description if primary is empty
2463 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2464 'info_dict': {
2465 'id': 'el3E4MbxRqQ',
2466 'ext': 'mp4',
2467 'title': 'dlp test video 2 - primary sv no desc',
2468 'description': '',
2469 'channel': 'cole-dlp-test-acc',
2470 'tags': [],
2471 'view_count': int,
2472 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2473 'like_count': int,
2474 'playable_in_embed': True,
2475 'availability': 'unlisted',
2476 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2477 'age_limit': 0,
2478 'duration': 5,
2479 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2480 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2481 'live_status': 'not_live',
2482 'upload_date': '20220908',
2483 'categories': ['People & Blogs'],
2484 'uploader': 'cole-dlp-test-acc',
2485 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2486 },
2487 'params': {'skip_download': True}
2488 }, {
2489 # Extractor argument: prefer translated title+description
2490 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2491 'info_dict': {
2492 'id': 'gHKT4uU8Zng',
2493 'ext': 'mp4',
2494 'channel': 'cole-dlp-test-acc',
2495 'tags': [],
2496 'duration': 5,
2497 'live_status': 'not_live',
2498 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2499 'upload_date': '20220728',
2500 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2501 'view_count': int,
2502 'categories': ['People & Blogs'],
2503 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2504 'title': 'dlp test video title translated (fr)',
2505 'availability': 'public',
2506 'uploader': 'cole-dlp-test-acc',
2507 'age_limit': 0,
2508 'description': 'dlp test video description translated (fr)',
2509 'playable_in_embed': True,
2510 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2511 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2512 },
2513 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2514 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2515 }, {
2516 'note': '6 channel audio',
2517 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2518 'only_matching': True,
a4894d3e 2519 }, {
2520 'note': 'Multiple HLS formats with same itag',
2521 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2522 'info_dict': {
2523 'id': 'kX3nB4PpJko',
2524 'ext': 'mp4',
2525 'categories': ['Entertainment'],
2526 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2527 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
2528 'live_status': 'not_live',
2529 'duration': 937,
2530 'channel_follower_count': int,
2531 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2532 'title': 'Last To Take Hand Off Jet, Keeps It!',
2533 'channel': 'MrBeast',
2534 'playable_in_embed': True,
2535 'view_count': int,
2536 'upload_date': '20221112',
2537 'uploader': 'MrBeast',
2538 'uploader_id': 'MrBeast6000',
2539 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2540 'age_limit': 0,
2541 'availability': 'public',
2542 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2543 'like_count': int,
2544 'tags': [],
2545 },
2546 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2547 }, {
2548 'note': 'Audio formats with Dynamic Range Compression',
2549 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2550 'info_dict': {
2551 'id': 'Tq92D6wQ1mg',
2552 'ext': 'weba',
2553 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2554 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2555 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2556 'channel_follower_count': int,
2557 'description': 'md5:17eccca93a786d51bc67646756894066',
2558 'upload_date': '20191228',
2559 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2560 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2561 'playable_in_embed': True,
2562 'like_count': int,
2563 'categories': ['Entertainment'],
2564 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2565 'age_limit': 18,
2566 'channel': 'Projekt Melody',
2567 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2568 'view_count': int,
2569 'availability': 'needs_auth',
2570 'comment_count': int,
2571 'live_status': 'not_live',
2572 'uploader': 'Projekt Melody',
2573 'duration': 106,
2574 },
2575 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
dad2210c 2576 },
2577 {
2578 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2579 'info_dict': {
2580 'id': 'qVv6vCqciTM',
2581 'ext': 'mp4',
2582 'age_limit': 0,
2583 'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2584 'comment_count': int,
2585 'chapters': 'count:13',
2586 'upload_date': '20221223',
2587 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2588 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2589 'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2590 'like_count': int,
2591 'release_date': '20221223',
2592 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2593 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2594 'view_count': int,
2595 'playable_in_embed': True,
2596 'duration': 4438,
2597 'availability': 'public',
2598 'channel_follower_count': int,
2599 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2600 'categories': ['Entertainment'],
2601 'live_status': 'was_live',
2602 'release_timestamp': 1671793345,
2603 'channel': 'さなちゃんねる',
2604 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2605 'uploader': 'さなちゃんねる',
2606 },
2607 },
2eb88d95
PH
2608 ]
2609
f2e8dbcc 2610 _WEBPAGE_TESTS = [
2611 # YouTube <object> embed
2612 {
2613 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2614 'md5': '873c81d308b979f0e23ee7e620b312a3',
2615 'info_dict': {
2616 'id': 'msN87y-iEx0',
2617 'ext': 'mp4',
2618 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2619 'upload_date': '20080526',
2620 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2621 'uploader': 'Christopher Sykes',
2622 'uploader_id': 'ChristopherJSykes',
2623 'age_limit': 0,
2624 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2625 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2626 'playable_in_embed': True,
2627 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2628 'like_count': int,
2629 'comment_count': int,
2630 'channel': 'Christopher Sykes',
2631 'live_status': 'not_live',
2632 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2633 'availability': 'public',
2634 'duration': 195,
2635 'view_count': int,
2636 'categories': ['Science & Technology'],
2637 'channel_follower_count': int,
2638 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2639 },
2640 'params': {
2641 'skip_download': True,
2642 }
2643 },
2644 ]
2645
201c1459 2646 @classmethod
2647 def suitable(cls, url):
4dfbf869 2648 from ..utils import parse_qs
2649
201c1459 2650 qs = parse_qs(url)
2651 if qs.get('list', [None])[0]:
2652 return False
86e5f3ed 2653 return super().suitable(url)
201c1459 2654
e0df6211 2655 def __init__(self, *args, **kwargs):
86e5f3ed 2656 super().__init__(*args, **kwargs)
545cc85d 2657 self._code_cache = {}
83799698 2658 self._player_cache = {}
e0df6211 2659
4d37720a 2660 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2661 lock = threading.Lock()
185bf310 2662 start_time = time.time()
adbc4ec4
THD
2663 formats = [f for f in formats if f.get('is_from_start')]
2664
185bf310 2665 def refetch_manifest(format_id, delay):
2666 nonlocal formats, start_time, is_live
2667 if time.time() <= start_time + delay:
adbc4ec4
THD
2668 return
2669
2670 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
6839ae1f 2671 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
adbc4ec4
THD
2672 microformats = traverse_obj(
2673 prs, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 2674 expected_type=dict)
4d37720a
L
2675 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2676 is_live = live_status == 'is_live'
185bf310 2677 start_time = time.time()
adbc4ec4 2678
185bf310 2679 def mpd_feed(format_id, delay):
adbc4ec4
THD
2680 """
2681 @returns (manifest_url, manifest_stream_number, is_live) or None
2682 """
253ac4ba 2683 for retry in self.RetryManager(fatal=False):
2684 with lock:
2685 refetch_manifest(format_id, delay)
2686
2687 f = next((f for f in formats if f['format_id'] == format_id), None)
2688 if not f:
2689 if not is_live:
2690 retry.error = f'{video_id}: Video is no longer live'
2691 else:
2692 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2693 continue
2694 return f['manifest_url'], f['manifest_stream_number'], is_live
2695 return None
adbc4ec4
THD
2696
2697 for f in formats:
4d37720a
L
2698 f['is_live'] = is_live
2699 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2700 live_start_time, mpd_feed, not is_live and f.copy())
2701 if is_live:
2702 f['fragments'] = gen
2703 f['protocol'] = 'http_dash_segments_generator'
2704 else:
2705 f['fragments'] = LazyList(gen({}))
2706 del f['is_from_start']
adbc4ec4 2707
4d37720a 2708 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2709 FETCH_SPAN, MAX_DURATION = 5, 432000
2710
2711 mpd_url, stream_number, is_live = None, None, True
2712
2713 begin_index = 0
2714 download_start_time = ctx.get('start') or time.time()
2715
2716 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2717 if lack_early_segments:
2718 self.report_warning(bug_reports_message(
2719 'Starting download from the last 120 hours of the live stream since '
2720 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2721 lack_early_segments = True
2722
2723 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2724 fragments, fragment_base_url = None, None
2725
a539f065 2726 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2727 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2728 # Obtain from MPD's maximum seq value
2729 old_mpd_url = mpd_url
185bf310 2730 last_error = ctx.pop('last_error', None)
14f25df2 2731 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2732 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2733 or (mpd_url, stream_number, False))
2734 if not refresh_sequence:
2735 if expire_fast and not is_live:
2736 return False, last_seq
2737 elif old_mpd_url == mpd_url:
2738 return True, last_seq
4d37720a
L
2739 if manifestless_orig_fmt:
2740 fmt_info = manifestless_orig_fmt
2741 else:
2742 try:
2743 fmts, _ = self._extract_mpd_formats_and_subtitles(
2744 mpd_url, None, note=False, errnote=False, fatal=False)
2745 except ExtractorError:
2746 fmts = None
2747 if not fmts:
2748 no_fragment_score += 2
2749 return False, last_seq
2750 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2751 fragments = fmt_info['fragments']
2752 fragment_base_url = fmt_info['fragment_base_url']
2753 assert fragment_base_url
2754
2755 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2756 return True, _last_seq
2757
4d37720a 2758 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2759 while is_live:
2760 fetch_time = time.time()
2761 if no_fragment_score > 30:
2762 return
2763 if last_segment_url:
2764 # Obtain from "X-Head-Seqnum" header value from each segment
2765 try:
2766 urlh = self._request_webpage(
2767 last_segment_url, None, note=False, errnote=False, fatal=False)
2768 except ExtractorError:
2769 urlh = None
2770 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2771 if last_seq is None:
a539f065 2772 no_fragment_score += 2
adbc4ec4
THD
2773 last_segment_url = None
2774 continue
2775 else:
a539f065
LNO
2776 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2777 no_fragment_score += 2
185bf310 2778 if not should_continue:
adbc4ec4
THD
2779 continue
2780
2781 if known_idx > last_seq:
2782 last_segment_url = None
2783 continue
2784
2785 last_seq += 1
2786
2787 if begin_index < 0 and known_idx < 0:
2788 # skip from the start when it's negative value
2789 known_idx = last_seq + begin_index
2790 if lack_early_segments:
2791 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2792 try:
2793 for idx in range(known_idx, last_seq):
2794 # do not update sequence here or you'll get skipped some part of it
a539f065 2795 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2796 if not should_continue:
adbc4ec4
THD
2797 known_idx = idx - 1
2798 raise ExtractorError('breaking out of outer loop')
2799 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2800 yield {
2801 'url': last_segment_url,
36195c44 2802 'fragment_count': last_seq,
adbc4ec4
THD
2803 }
2804 if known_idx == last_seq:
2805 no_fragment_score += 5
2806 else:
2807 no_fragment_score = 0
2808 known_idx = last_seq
2809 except ExtractorError:
2810 continue
2811
4d37720a
L
2812 if manifestless_orig_fmt:
2813 # Stop at the first iteration if running for post-live manifestless;
2814 # fragment count no longer increase since it starts
2815 break
2816
adbc4ec4
THD
2817 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2818
b6de707d 2819 def _extract_player_url(self, *ytcfgs, webpage=None):
2820 player_url = traverse_obj(
2821 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2822 get_all=False, expected_type=str)
11f9be09 2823 if not player_url:
b6de707d 2824 return
60f393e4 2825 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2826
b6de707d 2827 def _download_player_url(self, video_id, fatal=False):
2828 res = self._download_webpage(
2829 'https://www.youtube.com/iframe_api',
2830 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2831 if res:
2832 player_version = self._search_regex(
2833 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2834 if player_version:
2835 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2836
60064c53
PH
2837 def _signature_cache_id(self, example_sig):
2838 """ Return a string representation of a signature """
14f25df2 2839 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2840
e40c758c
S
2841 @classmethod
2842 def _extract_player_info(cls, player_url):
2843 for player_re in cls._PLAYER_INFO_RE:
2844 id_m = re.search(player_re, player_url)
2845 if id_m:
2846 break
2847 else:
c081b35c 2848 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2849 return id_m.group('id')
e40c758c 2850
404f611f 2851 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2852 player_id = self._extract_player_info(player_url)
2853 if player_id not in self._code_cache:
1276a43a 2854 code = self._download_webpage(
109dd3b2 2855 player_url, video_id, fatal=fatal,
2856 note='Downloading player ' + player_id,
2857 errnote='Download of %s failed' % player_url)
1276a43a 2858 if code:
2859 self._code_cache[player_id] = code
404f611f 2860 return self._code_cache.get(player_id)
109dd3b2 2861
e40c758c 2862 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2863 player_id = self._extract_player_info(player_url)
e0df6211 2864
c4417ddb 2865 # Read from filesystem cache
86e5f3ed 2866 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2867 assert os.path.basename(func_id) == func_id
a0e07d31 2868
ae61d108 2869 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2870 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2871
580ce007 2872 if not cache_spec:
2873 code = self._load_player(video_id, player_url)
404f611f 2874 if code:
109dd3b2 2875 res = self._parse_sig_js(code)
ac668111 2876 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2877 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2878 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2879
2880 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2881
60064c53 2882 def _print_sig_code(self, func, example_sig):
404f611f 2883 if not self.get_param('youtube_print_sig_code'):
2884 return
2885
edf3e38e
PH
2886 def gen_sig_code(idxs):
2887 def _genslice(start, end, step):
78caa52a 2888 starts = '' if start == 0 else str(start)
8bcc8756 2889 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2890 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2891 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2892
2893 step = None
7af808a5
PH
2894 # Quelch pyflakes warnings - start will be set when step is set
2895 start = '(Never used)'
edf3e38e
PH
2896 for i, prev in zip(idxs[1:], idxs[:-1]):
2897 if step is not None:
2898 if i - prev == step:
2899 continue
2900 yield _genslice(start, prev, step)
2901 step = None
2902 continue
2903 if i - prev in [-1, 1]:
2904 step = i - prev
2905 start = prev
2906 continue
2907 else:
78caa52a 2908 yield 's[%d]' % prev
edf3e38e 2909 if step is None:
78caa52a 2910 yield 's[%d]' % i
edf3e38e
PH
2911 else:
2912 yield _genslice(start, i, step)
2913
ac668111 2914 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2915 cache_res = func(test_string)
edf3e38e 2916 cache_spec = [ord(c) for c in cache_res]
78caa52a 2917 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2918 signature_id_tuple = '(%s)' % (
14f25df2 2919 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2920 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2921 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2922 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2923
e0df6211
PH
2924 def _parse_sig_js(self, jscode):
2925 funcname = self._search_regex(
abefc03f
S
2926 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2927 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2928 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2929 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2930 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2931 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2932 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2933 # Obsolete patterns
2934 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2935 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2936 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2937 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2938 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2939 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2940 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2941 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2942 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2943
2944 jsi = JSInterpreter(jscode)
2945 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2946 return lambda s: initial_function([s])
2947
580ce007 2948 def _cached(self, func, *cache_id):
2949 def inner(*args, **kwargs):
2950 if cache_id not in self._player_cache:
2951 try:
2952 self._player_cache[cache_id] = func(*args, **kwargs)
2953 except ExtractorError as e:
2954 self._player_cache[cache_id] = e
2955 except Exception as e:
2956 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2957
2958 ret = self._player_cache[cache_id]
2959 if isinstance(ret, Exception):
2960 raise ret
2961 return ret
2962 return inner
2963
545cc85d 2964 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2965 """Turn the encrypted s field into a working signature"""
580ce007 2966 extract_sig = self._cached(
2967 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2968 func = extract_sig(video_id, player_url, s)
2969 self._print_sig_code(func, s)
2970 return func(s)
404f611f 2971
2972 def _decrypt_nsig(self, s, video_id, player_url):
2973 """Turn the encrypted n field into a working signature"""
2974 if player_url is None:
2975 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2976 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2977
b505e851 2978 try:
2979 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2980 except ExtractorError as e:
2981 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2982 if self.get_param('youtube_print_sig_code'):
2983 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2984
25836db6 2985 try:
2986 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2987 ret = extract_nsig(jsi, func_code)(s)
2988 except JSInterpreter.Exception as e:
2989 try:
992dc6b4 2990 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2991 except ExtractorError:
2992 raise e
2993 self.report_warning(
2994 f'Native nsig extraction failed: Trying with PhantomJS\n'
2995 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 2996 self.write_debug(e, only_once=True)
25836db6 2997
2998 args, func_body = func_code
2999 ret = jsi.execute(
3000 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3001 video_id=video_id, note='Executing signature code').strip()
580ce007 3002
3003 self.write_debug(f'Decrypted nsig {s} => {ret}')
3004 return ret
3005
90a1df30 3006 def _extract_n_function_name(self, jscode):
3007 funcname, idx = self._search_regex(
3008 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3009 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3010 if not idx:
3011 return funcname
3012
3013 return json.loads(js_to_json(self._search_regex(
3014 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
3015 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3016
580ce007 3017 def _extract_n_function_code(self, video_id, player_url):
404f611f 3018 player_id = self._extract_player_info(player_url)
05deb747 3019 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 3020 jscode = func_code or self._load_player(video_id, player_url)
3021 jsi = JSInterpreter(jscode)
404f611f 3022
3023 if func_code:
580ce007 3024 return jsi, player_id, func_code
404f611f 3025
b505e851 3026 func_name = self._extract_n_function_name(jscode)
3027
3028 # For redundancy
3029 func_code = self._search_regex(
3030 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3031 # NB: The end of the regex is intentionally kept strict
3032 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3033 jscode, 'nsig function', group=('var', 'code'), default=None)
3034 if func_code:
3035 func_code = ([func_code[0]], func_code[1])
3036 else:
3037 self.write_debug('Extracting nsig function with jsinterp')
3038 func_code = jsi.extract_function_code(func_name)
3039
580ce007 3040 self.cache.store('youtube-nsig', player_id, func_code)
3041 return jsi, player_id, func_code
3042
3043 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3044 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3045
580ce007 3046 def extract_nsig(s):
25836db6 3047 try:
3048 ret = func([s])
3049 except JSInterpreter.Exception:
3050 raise
3051 except Exception as e:
3052 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3053
f6ca640b 3054 if ret.startswith('enhanced_except_'):
25836db6 3055 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3056 return ret
580ce007 3057
3058 return extract_nsig
e0df6211 3059
109dd3b2 3060 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3061 """
3062 Extract signatureTimestamp (sts)
3063 Required to tell API what sig/player version is in use.
3064 """
3065 sts = None
3066 if isinstance(ytcfg, dict):
3067 sts = int_or_none(ytcfg.get('STS'))
3068
3069 if not sts:
3070 # Attempt to extract from player
3071 if player_url is None:
3072 error_msg = 'Cannot extract signature timestamp without player_url.'
3073 if fatal:
3074 raise ExtractorError(error_msg)
3075 self.report_warning(error_msg)
3076 return
404f611f 3077 code = self._load_player(video_id, player_url, fatal=fatal)
3078 if code:
109dd3b2 3079 sts = int_or_none(self._search_regex(
3080 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3081 'JS player signature timestamp', group='sts', fatal=fatal))
3082 return sts
3083
11f9be09 3084 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3085 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3086 label = 'fully ' if is_full else ''
3087 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3088 expected_type=url_or_none)
3089 if not url:
3090 self.report_warning(f'Unable to mark {label}watched')
3091 return
14f25df2 3092 parsed_url = urllib.parse.urlparse(url)
3093 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3094
3095 # cpn generation algorithm is reverse engineered from base.js.
3096 # In fact it works even with dummy cpn.
3097 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3098 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3099
3100 # # more consistent results setting it to right before the end
3101 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3102
3103 qs.update({
3104 'ver': ['2'],
3105 'cpn': [cpn],
3106 'cmt': video_length,
3107 'el': 'detailpage', # otherwise defaults to "shorts"
3108 })
3109
3110 if is_full:
3111 # these seem to mark watchtime "history" in the real world
3112 # they're required, so send in a single value
3113 qs.update({
5318156f 3114 'st': 0,
06cc8f10
B
3115 'et': video_length,
3116 })
3117
14f25df2 3118 url = urllib.parse.urlunparse(
3119 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3120
3121 self._download_webpage(
3122 url, video_id, f'Marking {label}watched',
3123 'Unable to mark watched', fatal=False)
d77ab8e2 3124
bfd973ec 3125 @classmethod
3126 def _extract_from_webpage(cls, url, webpage):
3127 # Invidious Instances
3128 # https://github.com/yt-dlp/yt-dlp/issues/195
3129 # https://github.com/iv-org/invidious/pull/1730
3130 mobj = re.search(
3131 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3132 webpage)
3133 if mobj:
3134 yield cls.url_result(mobj.group('url'), cls)
3135 raise cls.StopExtraction()
3136
3137 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3138
3139 # lazyYT YouTube embed
bfd973ec 3140 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3141 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3142
3143 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3144 for m in re.findall(r'''(?x)<div[^>]+
3145 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3146 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3147 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3148
97665381
PH
3149 @classmethod
3150 def extract_id(cls, url):
ae61d108 3151 video_id = cls.get_temp_id(url)
3152 if not video_id:
3153 raise ExtractorError(f'Invalid URL: {url}')
3154 return video_id
c5e8d7af 3155
7c365c21 3156 def _extract_chapters_from_json(self, data, duration):
3157 chapter_list = traverse_obj(
3158 data, (
3159 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3160 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3161 ), expected_type=list)
3162
3163 return self._extract_chapters(
3164 chapter_list,
3165 chapter_time=lambda chapter: float_or_none(
3166 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3167 chapter_title=lambda chapter: traverse_obj(
3168 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3169 duration=duration)
3170
3171 def _extract_chapters_from_engagement_panel(self, data, duration):
3172 content_list = traverse_obj(
8bdd16b4 3173 data,
7c365c21 3174 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
6839ae1f 3175 expected_type=list)
052e1350 3176 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3177 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3178
1890fc63 3179 return next(filter(None, (
3180 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3181 chapter_time, chapter_title, duration)
3182 for contents in content_list)), [])
7c365c21 3183
1890fc63 3184 def _extract_chapters_from_description(self, description, duration):
2e30b46f 3185 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3186 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
1890fc63 3187 return self._extract_chapters(
2e30b46f 3188 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
1890fc63 3189 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2e30b46f 3190 duration=duration, strict=False) or self._extract_chapters(
3191 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3192 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
1890fc63 3193 duration=duration, strict=False)
84213ea8 3194
1890fc63 3195 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3196 if not duration:
3197 return
3198 chapter_list = [{
3199 'start_time': chapter_time(chapter),
3200 'title': chapter_title(chapter),
3201 } for chapter in chapter_list or []]
3202 if not strict:
3203 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3204
a3976e07 3205 chapters = [{'start_time': 0}]
1890fc63 3206 for idx, chapter in enumerate(chapter_list):
a3976e07 3207 if chapter['start_time'] is None:
1890fc63 3208 self.report_warning(f'Incomplete chapter {idx}')
3209 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 3210 chapters.append(chapter)
709ee214 3211 elif chapter not in chapters:
3212 self.report_warning(
3213 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
a3976e07 3214 return chapters[1:]
84213ea8 3215
a1c5d2ca
M
3216 def _extract_comment(self, comment_renderer, parent=None):
3217 comment_id = comment_renderer.get('commentId')
3218 if not comment_id:
3219 return
fe93e2c4 3220
052e1350 3221 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 3222
c26f9b99 3223 # Timestamp is an estimate calculated from the current time and time_text
3224 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3225 timestamp = self._parse_time_text(time_text)
3226
052e1350 3227 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 3228 author_id = try_get(comment_renderer,
14f25df2 3229 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 3230
49bd8c66 3231 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 3232 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 3233 author_thumbnail = try_get(comment_renderer,
14f25df2 3234 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
3235
3236 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 3237 is_favorited = 'creatorHeart' in (try_get(
3238 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
3239 return {
3240 'id': comment_id,
3241 'text': text,
d92f5d5a 3242 'timestamp': timestamp,
a1c5d2ca
M
3243 'time_text': time_text,
3244 'like_count': votes,
97524332 3245 'is_favorited': is_favorited,
a1c5d2ca
M
3246 'author': author,
3247 'author_id': author_id,
3248 'author_thumbnail': author_thumbnail,
3249 'author_is_uploader': author_is_uploader,
3250 'parent': parent or 'root'
3251 }
3252
46383212 3253 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3254
3255 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3256
3257 def extract_header(contents):
2d6659b9 3258 _continuation = None
3259 for content in contents:
46383212 3260 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3261 expected_comment_count = self._get_count(
3262 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3263
2d6659b9 3264 if expected_comment_count:
46383212 3265 tracker['est_total'] = expected_comment_count
3266 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3267 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3268
3269 sort_menu_item = try_get(
3270 comments_header_renderer,
3271 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3272 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3273
3274 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3275 if not _continuation:
3276 continue
3277
46383212 3278 sort_text = str_or_none(sort_menu_item.get('title'))
3279 if not sort_text:
2d6659b9 3280 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3281 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3282 break
a2160aa4 3283 return _continuation
a1c5d2ca 3284
2d6659b9 3285 def extract_thread(contents):
a1c5d2ca 3286 if not parent:
46383212 3287 tracker['current_page_thread'] = 0
a1c5d2ca 3288 for content in contents:
46383212 3289 if not parent and tracker['total_parent_comments'] >= max_parents:
3290 yield
a1c5d2ca 3291 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3292 comment_renderer = get_first(
3293 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3294 expected_type=dict, default={})
a1c5d2ca 3295
a1c5d2ca
M
3296 comment = self._extract_comment(comment_renderer, parent)
3297 if not comment:
3298 continue
46383212 3299
3300 tracker['running_total'] += 1
3301 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3302 yield comment
46383212 3303
a1c5d2ca
M
3304 # Attempt to get the replies
3305 comment_replies_renderer = try_get(
3306 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3307
3308 if comment_replies_renderer:
46383212 3309 tracker['current_page_thread'] += 1
a1c5d2ca 3310 comment_entries_iter = self._comment_entries(
99e9e001 3311 comment_replies_renderer, ytcfg, video_id,
46383212 3312 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3313 yield from itertools.islice(comment_entries_iter, min(
3314 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3315
46383212 3316 # Keeps track of counts across recursive calls
3317 if not tracker:
3318 tracker = dict(
3319 running_total=0,
3320 est_total=0,
3321 current_page_thread=0,
3322 total_parent_comments=0,
3323 total_reply_comments=0)
3324
3325 # TODO: Deprecated
2d6659b9 3326 # YouTube comments have a max depth of 2
46383212 3327 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3328 if max_depth:
da4db748 3329 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3330 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3331 if max_depth == 1 and parent:
3332 return
a1c5d2ca 3333
46383212 3334 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3335 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3336
46383212 3337 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3338
46383212 3339 response = None
6e634cbe 3340 is_forced_continuation = False
2d6659b9 3341 is_first_continuation = parent is None
6e634cbe 3342 if is_first_continuation and not continuation:
3343 # Sometimes you can get comments by generating the continuation yourself,
3344 # even if YouTube initially reports them being disabled - e.g. stories comments.
3345 # Note: if the comment section is actually disabled, YouTube may return a response with
3346 # required check_get_keys missing. So we will disable that check initially in this case.
3347 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3348 is_forced_continuation = True
a1c5d2ca
M
3349
3350 for page_num in itertools.count(0):
3351 if not continuation:
3352 break
46383212 3353 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3354 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3355 if page_num == 0:
3356 if is_first_continuation:
3357 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3358 else:
2d6659b9 3359 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3360 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3361 else:
3362 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3363 ' ' if parent else '', ' replies' if parent else '',
3364 page_num, comment_prog_str)
e72e48c5
M
3365 try:
3366 response = self._extract_response(
3367 item_id=None, query=continuation,
3368 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3369 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3370 except ExtractorError as e:
3371 # Ignore incomplete data error for replies if retries didn't work.
3372 # This is to allow any other parent comments and comment threads to be downloaded.
3373 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3374 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3375 self.report_warning(
3376 'Received incomplete data for a comment reply thread and retrying did not help. '
3377 'Ignoring to let other comments be downloaded.')
3378 else:
3379 raise
6e634cbe 3380 is_forced_continuation = False
46383212 3381 continuation_contents = traverse_obj(
3382 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3383
2d6659b9 3384 continuation = None
46383212 3385 for continuation_section in continuation_contents:
3386 continuation_items = traverse_obj(
3387 continuation_section,
3388 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3389 get_all=False, expected_type=list) or []
3390 if is_first_continuation:
3391 continuation = extract_header(continuation_items)
3392 is_first_continuation = False
2d6659b9 3393 if continuation:
a1c5d2ca 3394 break
46383212 3395 continue
a1c5d2ca 3396
46383212 3397 for entry in extract_thread(continuation_items):
3398 if not entry:
3399 return
3400 yield entry
3401 continuation = self._extract_continuation({'contents': continuation_items})
3402 if continuation:
2d6659b9 3403 break
a1c5d2ca 3404
6e634cbe 3405 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3406 if message and not parent and tracker['running_total'] == 0:
3407 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3408 raise self.CommentsDisabled
6e634cbe 3409
3410 @staticmethod
3411 def _generate_comment_continuation(video_id):
3412 """
3413 Generates initial comment section continuation token from given video id
3414 """
3415 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3416 return base64.b64encode(token.encode()).decode()
3417
a2160aa4 3418 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3419 """Entry for comment extraction"""
2d6659b9 3420 def _real_comment_extract(contents):
aae16f6e 3421 renderer = next((
3422 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3423 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3424 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3425
a2160aa4 3426 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3427 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3428
109dd3b2 3429 @staticmethod
99e9e001 3430 def _get_checkok_params():
3431 return {'contentCheckOk': True, 'racyCheckOk': True}
3432
3433 @classmethod
3434 def _generate_player_context(cls, sts=None):
109dd3b2 3435 context = {
3436 'html5Preference': 'HTML5_PREF_WANTS',
3437 }
3438 if sts is not None:
3439 context['signatureTimestamp'] = sts
3440 return {
3441 'playbackContext': {
3442 'contentPlaybackContext': context
a1a7907b 3443 },
99e9e001 3444 **cls._get_checkok_params()
109dd3b2 3445 }
3446
e7e94f2a
D
3447 @staticmethod
3448 def _is_agegated(player_response):
3449 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3450 return True
e7e94f2a 3451
6839ae1f 3452 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
e7e94f2a
D
3453 AGE_GATE_REASONS = (
3454 'confirm your age', 'age-restricted', 'inappropriate', # reason
3455 'age_verification_required', 'age_check_required', # status
3456 )
3457 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3458
3459 @staticmethod
3460 def _is_unplayable(player_response):
3461 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3462
50ac0e54 3463 _STORY_PLAYER_PARAMS = '8AEB'
3464
3465 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3466
11f9be09 3467 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3468 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3469 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3470 headers = self.generate_api_headers(
99e9e001 3471 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3472
6e634cbe 3473 yt_query = {
3474 'videoId': video_id,
6e634cbe 3475 }
50ac0e54 3476 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3477 yt_query['params'] = self._STORY_PLAYER_PARAMS
3478
11f9be09 3479 yt_query.update(self._generate_player_context(sts))
3480 return self._extract_response(
3481 item_id=video_id, ep='player', query=yt_query,
379e44ed 3482 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3483 default_client=client,
11f9be09 3484 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3485 ) or None
3486
11f9be09 3487 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3488 requested_clients = []
d0d012d4 3489 default = ['android', 'web']
000c15a4 3490 allowed_clients = sorted(
86e5f3ed 3491 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3492 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3493 for client in self._configuration_arg('player_client'):
3494 if client in allowed_clients:
3495 requested_clients.append(client)
d0d012d4 3496 elif client == 'default':
3497 requested_clients.extend(default)
b4c055ba 3498 elif client == 'all':
3499 requested_clients.extend(allowed_clients)
3500 else:
3501 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3502 if not requested_clients:
d0d012d4 3503 requested_clients = default
cf7e015f 3504
11f9be09 3505 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3506 requested_clients.extend(
e7e94f2a 3507 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3508
11f9be09 3509 return orderedSet(requested_clients)
cf7e015f 3510
50ac0e54 3511 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3512 initial_pr = None
3513 if webpage:
b7c47b74 3514 initial_pr = self._search_json(
3515 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3516
ae729626 3517 all_clients = set(clients)
c0bc527b 3518 clients = clients[::-1]
b6de707d 3519 prs = []
e7e94f2a 3520
ae729626 3521 def append_client(*client_names):
e7870111 3522 """ Append the first client name that exists but not already used """
ae729626 3523 for client_name in client_names:
e7870111
D
3524 actual_client = _split_innertube_client(client_name)[0]
3525 if actual_client in INNERTUBE_CLIENTS:
3526 if actual_client not in all_clients:
ae729626 3527 clients.append(client_name)
e7870111
D
3528 all_clients.add(actual_client)
3529 return
e7e94f2a 3530
379e44ed 3531 # Android player_response does not have microFormats which are needed for
3532 # extraction of some data. So we return the initial_pr with formats
3533 # stripped out even if not requested by the user
3534 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3535 if initial_pr:
3536 pr = dict(initial_pr)
3537 pr['streamingData'] = None
b6de707d 3538 prs.append(pr)
379e44ed 3539
3540 last_error = None
b6de707d 3541 tried_iframe_fallback = False
3542 player_url = None
c0bc527b 3543 while clients:
e7870111 3544 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3545 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3546 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3547 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3548
b6de707d 3549 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3550 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3551 if 'js' in self._configuration_arg('player_skip'):
3552 require_js_player = False
3553 player_url = None
3554
3555 if not player_url and not tried_iframe_fallback and require_js_player:
3556 player_url = self._download_player_url(video_id)
3557 tried_iframe_fallback = True
3558
379e44ed 3559 try:
3560 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3561 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3562 except ExtractorError as e:
3563 if last_error:
3564 self.report_warning(last_error)
3565 last_error = e
3566 continue
3567
11f9be09 3568 if pr:
a3e96421 3569 # YouTube may return a different video player response than expected.
3570 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3571 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3572 if pr_video_id and pr_video_id != video_id:
3573 self.report_warning(
c7dcf0b3 3574 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3575 else:
3576 prs.append(pr)
c0bc527b 3577
e7e94f2a 3578 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3579 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3580 append_client(f'{base_client}_creator')
e7e94f2a 3581 elif self._is_agegated(pr):
e7870111
D
3582 if variant == 'tv_embedded':
3583 append_client(f'{base_client}_embedded')
3584 elif not variant:
3585 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3586
379e44ed 3587 if last_error:
b6de707d 3588 if not len(prs):
379e44ed 3589 raise last_error
3590 self.report_warning(last_error)
b6de707d 3591 return prs, player_url
11f9be09 3592
4d37720a
L
3593 def _needs_live_processing(self, live_status, duration):
3594 if (live_status == 'is_live' and self.get_param('live_from_start')
3595 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3596 return live_status
3597
3598 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
a4894d3e 3599 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3600 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3601 q = qualities([
2a9c6dcd 3602 # Normally tiny is the smallest video-only formats. But
3603 # audio-only formats with unknown quality may get tagged as tiny
3604 'tiny',
3605 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3606 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3607 ])
6839ae1f 3608 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
9297939e 3609
545cc85d 3610 for fmt in streaming_formats:
727029c5 3611 if fmt.get('targetDurationSec'):
545cc85d 3612 continue
321bf820 3613
cc2db878 3614 itag = str_or_none(fmt.get('itag'))
9297939e 3615 audio_track = fmt.get('audioTrack') or {}
9bb85699 3616 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
9297939e 3617 if stream_id in stream_ids:
3618 continue
3619
cc2db878 3620 quality = fmt.get('quality')
2a9c6dcd 3621 height = int_or_none(fmt.get('height'))
d3fc8074 3622 if quality == 'tiny' or not quality:
3623 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3624 # The 3gp format (17) in android client has a quality of "small",
3625 # but is actually worse than other formats
3626 if itag == '17':
3627 quality = 'tiny'
3628 if quality:
3629 if itag:
3630 itag_qualities[itag] = quality
3631 if height:
3632 res_qualities[height] = quality
cc2db878 3633 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3634 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3635 # number of fragment that would subsequently requested with (`&sq=N`)
3636 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3637 continue
3638
545cc85d 3639 fmt_url = fmt.get('url')
3640 if not fmt_url:
14f25df2 3641 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3642 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3643 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3644 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3645 continue
52023f12 3646 try:
3647 fmt_url += '&%s=%s' % (
3648 traverse_obj(sc, ('sp', -1)) or 'signature',
3649 self._decrypt_signature(encrypted_sig, video_id, player_url)
3650 )
3651 except ExtractorError as e:
580ce007 3652 self.report_warning('Signature extraction failed: Some formats may be missing',
3653 video_id=video_id, only_once=True)
52023f12 3654 self.write_debug(e, only_once=True)
201e9eaa 3655 continue
545cc85d 3656
404f611f 3657 query = parse_qs(fmt_url)
3658 throttled = False
b2916526 3659 if query.get('n'):
404f611f 3660 try:
580ce007 3661 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3662 fmt_url = update_url_query(fmt_url, {
580ce007 3663 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3664 })
404f611f 3665 except ExtractorError as e:
25836db6 3666 phantomjs_hint = ''
3667 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3668 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3669 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3670 if player_url:
3671 self.report_warning(
3672 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3673 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3674 self.write_debug(e, only_once=True)
3675 else:
3676 self.report_warning(
3677 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3678 video_id=video_id, only_once=True)
404f611f 3679 throttled = True
3680
0ad92dfb 3681 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3682 language_preference = (
3683 10 if audio_track.get('audioIsDefault') and 10
3684 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3685 else -1)
0ad92dfb 3686 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3687 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3688 # Make sure to avoid false positives with small duration differences.
62b58c09 3689 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3690 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3691 if is_damaged:
0f06bcd7 3692 self.report_warning(
3693 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3694 dct = {
3695 'asr': int_or_none(fmt.get('audioSampleRate')),
3696 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3697 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3698 'format_note': join_nonempty(
26e8e044 3699 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3700 ' (default)' if language_preference > 0 else ''),
404f611f 3701 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
9bb85699 3702 'DRC' if fmt.get('isDrc') else None,
a4166234 3703 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3704 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3705 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3706 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3707 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3708 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3709 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3710 'height': height,
9bb85699 3711 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3712 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3713 'tbr': tbr,
545cc85d 3714 'url': fmt_url,
2a9c6dcd 3715 'width': int_or_none(fmt.get('width')),
ab6df717 3716 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3717 'desc' if language_preference < -1 else '') or None,
ab6df717 3718 'language_preference': language_preference,
a405b38f 3719 # Strictly de-prioritize damaged and 3gp formats
3720 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3721 }
60bdb7bd 3722 mime_mobj = re.match(
3723 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3724 if mime_mobj:
3725 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3726 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3727 no_audio = dct.get('acodec') == 'none'
3728 no_video = dct.get('vcodec') == 'none'
3729 if no_audio:
3730 dct['vbr'] = tbr
3731 if no_video:
3732 dct['abr'] = tbr
3733 if no_audio or no_video:
545cc85d 3734 dct['downloader_options'] = {
3735 # Youtube throttles chunks >~10M
3736 'http_chunk_size': 10485760,
bf1317d2 3737 }
7c60c33e 3738 if dct.get('ext'):
3739 dct['container'] = dct['ext'] + '_dash'
a4894d3e 3740
3741 if itag:
3742 itags[itag].add(('https', dct.get('language')))
3743 stream_ids.append(stream_id)
11f9be09 3744 yield dct
545cc85d 3745
4d37720a
L
3746 needs_live_processing = self._needs_live_processing(live_status, duration)
3747 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3748
3749 skip_manifests = set(self._configuration_arg('skip'))
3750 if (not self.get_param('youtube_include_hls_manifest', True)
3751 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3752 or needs_live_processing and skip_bad_formats):
3753 skip_manifests.add('hls')
3754
0f06bcd7 3755 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3756 skip_manifests.add('dash')
3757 if self._configuration_arg('include_live_dash'):
3758 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3759 'Use include_incomplete_formats extractor argument instead')
3760 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3761 skip_manifests.add('dash')
5d3a0e79 3762
a0bb6ce5 3763 def process_manifest_format(f, proto, itag):
a4894d3e 3764 key = (proto, f.get('language'))
3765 if key in itags[itag]:
3766 return False
3767 itags[itag].add(key)
3768
3769 if any(p != proto for p, _ in itags[itag]):
3770 f['format_id'] = f'{itag}-{proto}'
3771 elif itag:
a0bb6ce5 3772 f['format_id'] = itag
a0bb6ce5 3773
b25cac65 3774 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3775 if f['quality'] == -1 and f.get('height'):
3776 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3777 return True
2a9c6dcd 3778
c646d76f 3779 subtitles = {}
11f9be09 3780 for sd in streaming_data:
4d37720a 3781 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3782 if hls_manifest_url:
4d37720a
L
3783 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3784 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3785 subtitles = self._merge_subtitles(subs, subtitles)
3786 for f in fmts:
a0bb6ce5 3787 if process_manifest_format(f, 'hls', self._search_regex(
3788 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3789 yield f
545cc85d 3790
4d37720a 3791 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3792 if dash_manifest_url:
c646d76f 3793 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3794 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3795 for f in formats:
a0bb6ce5 3796 if process_manifest_format(f, 'dash', f['format_id']):
3797 f['filesize'] = int_or_none(self._search_regex(
3798 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3799 if needs_live_processing:
adbc4ec4
THD
3800 f['is_from_start'] = True
3801
a0bb6ce5 3802 yield f
c646d76f 3803 yield subtitles
11f9be09 3804
720c3099 3805 def _extract_storyboard(self, player_responses, duration):
3806 spec = get_first(
3807 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3808 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3809 if not base_url:
720c3099 3810 return
720c3099 3811 L = len(spec) - 1
3812 for i, args in enumerate(spec):
3813 args = args.split('#')
3814 counts = list(map(int_or_none, args[:5]))
3815 if len(args) != 8 or not all(counts):
3816 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3817 continue
3818 width, height, frame_count, cols, rows = counts
3819 N, sigh = args[6:]
3820
3821 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3822 fragment_count = frame_count / (cols * rows)
3823 fragment_duration = duration / fragment_count
3824 yield {
3825 'format_id': f'sb{i}',
3826 'format_note': 'storyboard',
3827 'ext': 'mhtml',
3828 'protocol': 'mhtml',
3829 'acodec': 'none',
3830 'vcodec': 'none',
3831 'url': url,
3832 'width': width,
3833 'height': height,
45e8a04e 3834 'fps': frame_count / duration,
3835 'rows': rows,
3836 'columns': cols,
720c3099 3837 'fragments': [{
b3edc806 3838 'url': url.replace('$M', str(j)),
720c3099 3839 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3840 } for j in range(math.ceil(fragment_count))],
3841 }
3842
adbc4ec4 3843 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3844 webpage = None
3845 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3846 query = {'bpctr': '9999999999', 'has_verified': '1'}
3847 if smuggled_data.get('is_story'):
3848 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3849 webpage = self._download_webpage(
50ac0e54 3850 webpage_url, video_id, fatal=False, query=query)
11f9be09 3851
3852 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3853
b6de707d 3854 player_responses, player_url = self._extract_player_responses(
11f9be09 3855 self._get_requested_clients(url, smuggled_data),
50ac0e54 3856 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3857
adbc4ec4
THD
3858 return webpage, master_ytcfg, player_responses, player_url
3859
a1b2d843 3860 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3861 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3862 is_live = get_first(video_details, 'isLive')
3863 if is_live is None:
3864 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
3865 live_content = get_first(video_details, 'isLiveContent')
3866 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
3867 post_live = get_first(video_details, 'isPostLiveDvr')
3868 live_status = ('post_live' if post_live
3869 else 'is_live' if is_live
3870 else 'is_upcoming' if is_upcoming
6678a4f0 3871 else 'was_live' if live_content
3872 else 'not_live' if False in (is_live, live_content)
3873 else None)
6839ae1f 3874 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4d37720a 3875 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
adbc4ec4 3876
4d37720a 3877 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
3878
3879 def _real_extract(self, url):
3880 url, smuggled_data = unsmuggle_url(url, {})
3881 video_id = self._match_id(url)
3882
3883 base_url = self.http_scheme() + '//www.youtube.com/'
3884 webpage_url = base_url + 'watch?v=' + video_id
3885
3886 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3887
11f9be09 3888 playability_statuses = traverse_obj(
6839ae1f 3889 player_responses, (..., 'playabilityStatus'), expected_type=dict)
11f9be09 3890
3891 trailer_video_id = get_first(
3892 playability_statuses,
3893 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3894 expected_type=str)
3895 if trailer_video_id:
3896 return self.url_result(
3897 trailer_video_id, self.ie_key(), trailer_video_id)
3898
3899 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3900 if webpage else (lambda x: None))
3901
6839ae1f 3902 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
11f9be09 3903 microformats = traverse_obj(
3904 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
6839ae1f 3905 expected_type=dict)
c26f9b99 3906
3907 translated_title = self._get_text(microformats, (..., 'title'))
3908 video_title = (self._preferred_lang and translated_title
3909 or get_first(video_details, 'title') # primary
3910 or translated_title
3911 or search_meta(['og:title', 'twitter:title', 'title']))
3912 translated_description = self._get_text(microformats, (..., 'description'))
3913 original_description = get_first(video_details, 'shortDescription')
3914 video_description = (
3915 self._preferred_lang and translated_description
3916 # If original description is blank, it will be an empty string.
3917 # Do not prefer translated description in this case.
3918 or original_description if original_description is not None else translated_description)
11f9be09 3919
d89257f3 3920 multifeed_metadata_list = get_first(
3921 player_responses,
3922 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3923 expected_type=str)
3924 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3925 if self.get_param('noplaylist'):
11f9be09 3926 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3927 else:
3928 entries = []
3929 feed_ids = []
3930 for feed in multifeed_metadata_list.split(','):
3931 # Unquote should take place before split on comma (,) since textual
3932 # fields may contain comma as well (see
3933 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3934 feed_data = urllib.parse.parse_qs(
ac668111 3935 urllib.parse.unquote_plus(feed))
d89257f3 3936
3937 def feed_entry(name):
3938 return try_get(
14f25df2 3939 feed_data, lambda x: x[name][0], str)
d89257f3 3940
3941 feed_id = feed_entry('id')
3942 if not feed_id:
3943 continue
3944 feed_title = feed_entry('title')
3945 title = video_title
3946 if feed_title:
3947 title += ' (%s)' % feed_title
3948 entries.append({
3949 '_type': 'url_transparent',
3950 'ie_key': 'Youtube',
3951 'url': smuggle_url(
3952 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3953 {'force_singlefeed': True}),
3954 'title': title,
3955 })
3956 feed_ids.append(feed_id)
3957 self.to_screen(
3958 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3959 % (', '.join(feed_ids), video_id))
3960 return self.playlist_result(
3961 entries, video_id, video_title, video_description)
11f9be09 3962
9da6612b 3963 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3964 or int_or_none(get_first(microformats, 'lengthSeconds'))
3965 or parse_duration(search_meta('duration')) or None)
a1b2d843 3966
4d37720a
L
3967 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3968 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3969 if live_status == 'post_live':
3970 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 3971
545cc85d 3972 if not formats:
11f9be09 3973 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3974 self.report_drm(video_id)
11f9be09 3975 pemr = get_first(
3976 playability_statuses,
3977 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3978 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3979 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3980 if subreason:
545cc85d 3981 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3982 countries = get_first(microformats, 'availableCountries')
545cc85d 3983 if not countries:
3984 regions_allowed = search_meta('regionsAllowed')
3985 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3986 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3987 reason += f'. {subreason}'
545cc85d 3988 if reason:
b7da73eb 3989 self.raise_no_formats(reason, expected=True)
bf1317d2 3990
11f9be09 3991 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3992 if not keywords and webpage:
3993 keywords = [
3994 unescapeHTML(m.group('content'))
3995 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3996 for keyword in keywords:
3997 if keyword.startswith('yt:stretch='):
201c1459 3998 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3999 if mobj:
4000 # NB: float is intentional for forcing float division
4001 w, h = (float(v) for v in mobj.groups())
4002 if w > 0 and h > 0:
4003 ratio = w / h
4004 for f in formats:
4005 if f.get('vcodec') != 'none':
4006 f['stretched_ratio'] = ratio
4007 break
a709d873 4008 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 4009 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4010 if thumbnail_url:
4011 thumbnails.append({
4012 'url': thumbnail_url,
ff2751ac 4013 })
fccf5021 4014 original_thumbnails = thumbnails.copy()
4015
0ba692ac 4016 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 4017 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 4018 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 4019 thumbnail_names = [
962ffcf8 4020 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 4021 # in resolution, these are not the custom thumbnail. So de-prioritize them
4022 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4023 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 4024 ]
cca80fe6 4025 n_thumbnail_names = len(thumbnail_names)
0ba692ac 4026 thumbnails.extend({
4027 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4028 video_id=video_id, name=name, ext=ext,
4d37720a 4029 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4030 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4031 for thumb in thumbnails:
cca80fe6 4032 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4033 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4034 self._remove_duplicate_formats(thumbnails)
fccf5021 4035 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4036
7ea65411 4037 category = get_first(microformats, 'category') or search_meta('genre')
4038 channel_id = str_or_none(
4039 get_first(video_details, 'channelId')
4040 or get_first(microformats, 'externalChannelId')
4041 or search_meta('channelId'))
7ea65411 4042 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4043
adbc4ec4
THD
4044 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4045 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4046 if not duration and live_end_time and live_start_time:
4047 duration = live_end_time - live_start_time
4048
4d37720a
L
4049 needs_live_processing = self._needs_live_processing(live_status, duration)
4050
4051 def is_bad_format(fmt):
4052 if needs_live_processing and not fmt.get('is_from_start'):
4053 return True
4054 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4055 and fmt.get('protocol') == 'http_dash_segments'):
4056 return True
4057
4058 for fmt in filter(is_bad_format, formats):
4059 fmt['preference'] = (fmt.get('preference') or -1) - 10
4060 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
4061
4062 if needs_live_processing:
4063 self._prepare_live_from_start_formats(
4064 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4065
720c3099 4066 formats.extend(self._extract_storyboard(player_responses, duration))
4067
545cc85d 4068 info = {
4069 'id': video_id,
39ca3b5c 4070 'title': video_title,
545cc85d 4071 'formats': formats,
4072 'thumbnails': thumbnails,
fccf5021 4073 # The best thumbnail that we are sure exists. Prevents unnecessary
4074 # URL checking if user don't care about getting the best possible thumbnail
4075 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4076 'description': video_description,
11f9be09 4077 'uploader': get_first(video_details, 'author'),
545cc85d 4078 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
4079 'uploader_url': owner_profile_url,
4080 'channel_id': channel_id,
a70635b8 4081 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 4082 'duration': duration,
4083 'view_count': int_or_none(
11f9be09 4084 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4085 or search_meta('interactionCount')),
11f9be09 4086 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4087 'age_limit': 18 if (
11f9be09 4088 get_first(microformats, 'isFamilySafe') is False
545cc85d 4089 or search_meta('isFamilyFriendly') == 'false'
4090 or search_meta('og:restrictions:age') == '18+') else 0,
4091 'webpage_url': webpage_url,
4092 'categories': [category] if category else None,
4093 'tags': keywords,
11f9be09 4094 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4095 'live_status': live_status,
adbc4ec4 4096 'release_timestamp': live_start_time,
9f14daf2 4097 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4098 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
545cc85d 4099 }
b477fc13 4100
c646d76f 4101 subtitles = {}
3944e7af 4102 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4103 if pctr:
ecdc9049 4104 def get_lang_code(track):
4105 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4106 or track.get('languageCode'))
4107
4108 # Converted into dicts to remove duplicates
4109 captions = {
4110 get_lang_code(sub): sub
6839ae1f 4111 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
ecdc9049 4112 translation_languages = {
4113 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
6839ae1f 4114 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
ecdc9049 4115
774d79cc 4116 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4117 lang_subs = container.setdefault(lang_code, [])
545cc85d 4118 for fmt in self._SUBTITLE_FORMATS:
4119 query.update({
4120 'fmt': fmt,
4121 })
4122 lang_subs.append({
4123 'ext': fmt,
60f393e4 4124 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4125 'name': sub_name,
545cc85d 4126 })
7e72694b 4127
07b47084 4128 # NB: Constructing the full subtitle dictionary is slow
4129 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4130 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4131 for lang_code, caption_track in captions.items():
4132 base_url = caption_track.get('baseUrl')
1235d333 4133 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4134 if not base_url:
4135 continue
ecdc9049 4136 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4137 if caption_track.get('kind') != 'asr':
545cc85d 4138 if not lang_code:
4139 continue
4140 process_language(
ecdc9049 4141 subtitles, base_url, lang_code, lang_name, {})
4142 if not caption_track.get('isTranslatable'):
4143 continue
3944e7af 4144 for trans_code, trans_name in translation_languages.items():
4145 if not trans_code:
545cc85d 4146 continue
1235d333 4147 orig_trans_code = trans_code
71eb82d1 4148 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4149 if not get_translated_subs:
18e49408 4150 continue
ecdc9049 4151 trans_code += f'-{lang_code}'
a70635b8 4152 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 4153 # Add an "-orig" label to the original language so that it can be distinguished.
4154 # The subs are returned without "-orig" as well for compatibility
1235d333 4155 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 4156 process_language(
d49669ac 4157 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4158 # Setting tlang=lang returns damaged subtitles.
d49669ac 4159 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4160 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4161
4162 info['automatic_captions'] = automatic_captions
4163 info['subtitles'] = subtitles
7e72694b 4164
14f25df2 4165 parsed_url = urllib.parse.urlparse(url)
545cc85d 4166 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4167 query = urllib.parse.parse_qs(component)
545cc85d 4168 for k, v in query.items():
4169 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4170 d_k += '_time'
4171 if d_k not in info and k in s_ks:
4172 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4173
4174 # Youtube Music Auto-generated description
822b9d9c 4175 if video_description:
1890fc63 4176 mobj = re.search(
4177 r'''(?xs)
4178 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4179 (?P<album>[^\n]+)
4180 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4181 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4182 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4183 .+\nAuto-generated\ by\ YouTube\.\s*$
4184 ''', video_description)
822b9d9c 4185 if mobj:
822b9d9c
RA
4186 release_year = mobj.group('release_year')
4187 release_date = mobj.group('release_date')
4188 if release_date:
4189 release_date = release_date.replace('-', '')
4190 if not release_year:
545cc85d 4191 release_year = release_date[:4]
4192 info.update({
4193 'album': mobj.group('album'.strip()),
4194 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4195 'track': mobj.group('track').strip(),
4196 'release_date': release_date,
cc2db878 4197 'release_year': int_or_none(release_year),
545cc85d 4198 })
7e72694b 4199
545cc85d 4200 initial_data = None
4201 if webpage:
56ba69e4 4202 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 4203 if not initial_data:
99e9e001 4204 query = {'videoId': video_id}
4205 query.update(self._get_checkok_params())
109dd3b2 4206 initial_data = self._extract_response(
4207 item_id=video_id, ep='next', fatal=False,
99e9e001 4208 ytcfg=master_ytcfg, query=query,
4209 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4210 note='Downloading initial data API JSON')
545cc85d 4211
0df111a3 4212 info['comment_count'] = traverse_obj(initial_data, (
4213 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4214 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4215 ), (
4216 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4217 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4218 ), expected_type=int_or_none, get_all=False)
4219
19a03940 4220 try: # This will error if there is no livechat
c60ee3a2 4221 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4222 except (KeyError, IndexError, TypeError):
4223 pass
4224 else:
ecdc9049 4225 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4226 # url is needed to set cookies
4227 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4228 'video_id': video_id,
4229 'ext': 'json',
4d37720a
L
4230 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4231 else 'youtube_live_chat_replay'),
c60ee3a2 4232 }]
545cc85d 4233
4234 if initial_data:
7c365c21 4235 info['chapters'] = (
4236 self._extract_chapters_from_json(initial_data, duration)
4237 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4238 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4239 or None)
545cc85d 4240
17322130 4241 contents = traverse_obj(
4242 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4243 expected_type=list, default=[])
4244
4245 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4246 if vpir:
4247 stl = vpir.get('superTitleLink')
4248 if stl:
4249 stl = self._get_text(stl)
4250 if try_get(
4251 vpir,
4252 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4253 info['location'] = stl
4254 else:
affc4fef 4255 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4256 if mobj:
545cc85d 4257 info.update({
17322130 4258 'series': mobj.group(1),
4259 'season_number': int(mobj.group(2)),
4260 'episode_number': int(mobj.group(3)),
545cc85d 4261 })
17322130 4262 for tlb in (try_get(
4263 vpir,
4264 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4265 list) or []):
3ffb2f5b 4266 tbrs = variadic(
4267 traverse_obj(
6839ae1f
SS
4268 tlb, ('toggleButtonRenderer', ...),
4269 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
3ffb2f5b 4270 for tbr in tbrs:
4271 for getter, regex in [(
4272 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4273 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4274 lambda x: x['accessibility'],
4275 lambda x: x['accessibilityData']['accessibilityData'],
4276 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4277 label = (try_get(tbr, getter, dict) or {}).get('label')
4278 if label:
4279 mobj = re.match(regex, label)
4280 if mobj:
4281 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4282 break
17322130 4283 sbr_tooltip = try_get(
4284 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4285 if sbr_tooltip:
4286 like_count, dislike_count = sbr_tooltip.split(' / ')
4287 info.update({
4288 'like_count': str_to_int(like_count),
4289 'dislike_count': str_to_int(dislike_count),
4290 })
867c66ff
M
4291 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4292 if vcr:
4293 vc = self._get_count(vcr, 'viewCount')
4294 # Upcoming premieres with waiting count are treated as live here
4295 if vcr.get('isLive'):
4296 info['concurrent_view_count'] = vc
4297 elif info.get('view_count') is None:
4298 info['view_count'] = vc
4299
17322130 4300 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4301 if vsir:
4302 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4303 info.update({
4304 'channel': self._get_text(vor, 'title'),
4305 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4306
4307 rows = try_get(
4308 vsir,
4309 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4310 list) or []
4311 multiple_songs = False
4312 for row in rows:
4313 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4314 multiple_songs = True
4315 break
4316 for row in rows:
4317 mrr = row.get('metadataRowRenderer') or {}
4318 mrr_title = mrr.get('title')
4319 if not mrr_title:
4320 continue
4321 mrr_title = self._get_text(mrr, 'title')
4322 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4323 if mrr_title == 'License':
4324 info['license'] = mrr_contents_text
4325 elif not multiple_songs:
4326 if mrr_title == 'Album':
4327 info['album'] = mrr_contents_text
4328 elif mrr_title == 'Artist':
4329 info['artist'] = mrr_contents_text
4330 elif mrr_title == 'Song':
4331 info['track'] = mrr_contents_text
545cc85d 4332
4333 fallbacks = {
4334 'channel': 'uploader',
4335 'channel_id': 'uploader_id',
4336 'channel_url': 'uploader_url',
4337 }
992f9a73 4338
17322130 4339 # The upload date for scheduled, live and past live streams / premieres in microformats
4340 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4341 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4342 upload_date = (
4343 unified_strdate(get_first(microformats, 'uploadDate'))
4344 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4345 if not upload_date or (
4d37720a 4346 live_status in ('not_live', None)
1ff88b7a 4347 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4348 ):
c26f9b99 4349 upload_date = strftime_or_none(
4350 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4351 info['upload_date'] = upload_date
992f9a73 4352
545cc85d 4353 for to, frm in fallbacks.items():
4354 if not info.get(to):
4355 info[to] = info.get(frm)
4356
4357 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4358 v = info.get(s_k)
4359 if v:
4360 info[d_k] = v
b84071c0 4361
c26f9b99 4362 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4363
4364 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4365 or get_first(video_details, 'isPrivate', expected_type=bool))
4366
4367 info['availability'] = (
4368 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4369 else self._availability(
4370 is_private=is_private,
4371 needs_premium=(
4372 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4373 or False if initial_data and is_private is not None else None),
4374 needs_subscription=(
4375 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4376 or False if initial_data and is_private is not None else None),
4377 needs_auth=info['age_limit'] >= 18,
4378 is_unlisted=None if is_private is None else (
4379 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4380 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4381
a2160aa4 4382 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4383
11f9be09 4384 self.mark_watched(video_id, player_responses)
d77ab8e2 4385
545cc85d 4386 return info
c5e8d7af 4387
a61fd4cf 4388
a6213a49 4389class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4390 @staticmethod
4391 def passthrough_smuggled_data(func):
bd7e919a 4392 def _smuggle(info, smuggled_data):
4393 if info.get('_type') not in ('url', 'url_transparent'):
4394 return info
4395 if smuggled_data.get('is_music_url'):
4396 parsed_url = urllib.parse.urlparse(info['url'])
4397 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4398 smuggled_data.pop('is_music_url')
4399 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4400 if smuggled_data:
4401 info['url'] = smuggle_url(info['url'], smuggled_data)
4402 return info
182bda88 4403
4404 @functools.wraps(func)
4405 def wrapper(self, url):
4406 url, smuggled_data = unsmuggle_url(url, {})
4407 if self.is_music_url(url):
4408 smuggled_data['is_music_url'] = True
4409 info_dict = func(self, url, smuggled_data)
bd7e919a 4410 if smuggled_data:
4411 _smuggle(info_dict, smuggled_data)
4412 if info_dict.get('entries'):
a8c754cc 4413 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4414 return info_dict
4415 return wrapper
4416
a6213a49 4417 def _extract_channel_id(self, webpage):
4418 channel_id = self._html_search_meta(
4419 'channelId', webpage, 'channel id', default=None)
4420 if channel_id:
4421 return channel_id
4422 channel_url = self._html_search_meta(
4423 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4424 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4425 'twitter:app:url:googleplay'), webpage, 'channel url')
4426 return self._search_regex(
4427 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4428 channel_url, 'channel id')
15f6397c 4429
8bdd16b4 4430 @staticmethod
cd7c66cf 4431 def _extract_basic_item_renderer(item):
4432 # Modified from _extract_grid_item_renderer
201c1459 4433 known_basic_renderers = (
a17526e4 4434 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4435 )
4436 for key, renderer in item.items():
201c1459 4437 if not isinstance(renderer, dict):
cd7c66cf 4438 continue
201c1459 4439 elif key in known_basic_renderers:
4440 return renderer
4441 elif key.startswith('grid') and key.endswith('Renderer'):
4442 return renderer
8bdd16b4 4443
c7335551
M
4444 def _extract_channel_renderer(self, renderer):
4445 channel_id = renderer['channelId']
4446 title = self._get_text(renderer, 'title')
4447 channel_url = f'https://www.youtube.com/channel/{channel_id}'
4448 return {
4449 '_type': 'url',
4450 'url': channel_url,
4451 'id': channel_id,
4452 'ie_key': YoutubeTabIE.ie_key(),
4453 'channel': title,
4454 'channel_id': channel_id,
4455 'channel_url': channel_url,
4456 'title': title,
4457 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
4458 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4459 'playlist_count': self._get_count(renderer, 'videoCountText'),
4460 'description': self._get_text(renderer, 'descriptionSnippet'),
4461 }
4462
8bdd16b4 4463 def _grid_entries(self, grid_renderer):
4464 for item in grid_renderer['items']:
4465 if not isinstance(item, dict):
39b62db1 4466 continue
cd7c66cf 4467 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4468 if not isinstance(renderer, dict):
4469 continue
052e1350 4470 title = self._get_text(renderer, 'title')
fe93e2c4 4471
8bdd16b4 4472 # playlist
4473 playlist_id = renderer.get('playlistId')
4474 if playlist_id:
4475 yield self.url_result(
4476 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4477 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4478 video_title=title)
201c1459 4479 continue
8bdd16b4 4480 # video
4481 video_id = renderer.get('videoId')
4482 if video_id:
4483 yield self._extract_video(renderer)
201c1459 4484 continue
8bdd16b4 4485 # channel
4486 channel_id = renderer.get('channelId')
4487 if channel_id:
c7335551 4488 yield self._extract_channel_renderer(renderer)
201c1459 4489 continue
4490 # generic endpoint URL support
4491 ep_url = urljoin('https://www.youtube.com/', try_get(
4492 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4493 str))
201c1459 4494 if ep_url:
4495 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4496 if ie.suitable(ep_url):
4497 yield self.url_result(
4498 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4499 break
8bdd16b4 4500
16aa9ea4 4501 def _music_reponsive_list_entry(self, renderer):
4502 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4503 if video_id:
4504 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4505 ie=YoutubeIE.ie_key(), video_id=video_id)
4506 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4507 if playlist_id:
4508 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4509 if video_id:
4510 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4511 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4512 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4513 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4514 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4515 if browse_id:
4516 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4517 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4518
3d3dddc9 4519 def _shelf_entries_from_content(self, shelf_renderer):
4520 content = shelf_renderer.get('content')
4521 if not isinstance(content, dict):
8bdd16b4 4522 return
cd7c66cf 4523 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4524 if renderer:
4525 # TODO: add support for nested playlists so each shelf is processed
4526 # as separate playlist
4527 # TODO: this includes only first N items
86e5f3ed 4528 yield from self._grid_entries(renderer)
3d3dddc9 4529 renderer = content.get('horizontalListRenderer')
4530 if renderer:
4531 # TODO
4532 pass
8bdd16b4 4533
29f7c58a 4534 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4535 ep = try_get(
4536 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4537 str)
8bdd16b4 4538 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4539 if shelf_url:
29f7c58a 4540 # Skipping links to another channels, note that checking for
4541 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4542 # will not work
4543 if skip_channels and '/channels?' in shelf_url:
4544 return
052e1350 4545 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4546 yield self.url_result(shelf_url, video_title=title)
4547 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4548 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4549
8bdd16b4 4550 def _playlist_entries(self, video_list_renderer):
4551 for content in video_list_renderer['contents']:
4552 if not isinstance(content, dict):
4553 continue
4554 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4555 if not isinstance(renderer, dict):
4556 continue
4557 video_id = renderer.get('videoId')
4558 if not video_id:
4559 continue
4560 yield self._extract_video(renderer)
07aeced6 4561
3462ffa8 4562 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4563 renderer = traverse_obj(
4564 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
3462ffa8 4565 video_id = renderer.get('videoId')
4566 if not video_id:
4567 return
4568 yield self._extract_video(renderer)
4569
8bdd16b4 4570 def _video_entry(self, video_renderer):
4571 video_id = video_renderer.get('videoId')
4572 if video_id:
4573 return self._extract_video(video_renderer)
dacb3a86 4574
ad210f4f 4575 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4576 url = urljoin('https://youtube.com', traverse_obj(
4577 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4578 if url:
4579 return self.url_result(
4580 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4581
8bdd16b4 4582 def _post_thread_entries(self, post_thread_renderer):
4583 post_renderer = try_get(
4584 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4585 if not post_renderer:
4586 return
4587 # video attachment
4588 video_renderer = try_get(
895b0931 4589 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4590 video_id = video_renderer.get('videoId')
4591 if video_id:
4592 entry = self._extract_video(video_renderer)
8bdd16b4 4593 if entry:
4594 yield entry
895b0931 4595 # playlist attachment
4596 playlist_id = try_get(
14f25df2 4597 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4598 if playlist_id:
4599 yield self.url_result(
e28f1c0a 4600 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4601 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4602 # inline video links
4603 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4604 for run in runs:
4605 if not isinstance(run, dict):
4606 continue
4607 ep_url = try_get(
14f25df2 4608 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4609 if not ep_url:
4610 continue
4611 if not YoutubeIE.suitable(ep_url):
4612 continue
4613 ep_video_id = YoutubeIE._match_id(ep_url)
4614 if video_id == ep_video_id:
4615 continue
895b0931 4616 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4617
8bdd16b4 4618 def _post_thread_continuation_entries(self, post_thread_continuation):
4619 contents = post_thread_continuation.get('contents')
4620 if not isinstance(contents, list):
4621 return
4622 for content in contents:
4623 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4624 if isinstance(renderer, dict):
4625 yield from self._post_thread_entries(renderer)
8bdd16b4 4626 continue
6b0b0a28 4627 renderer = content.get('videoRenderer')
4628 if isinstance(renderer, dict):
4629 yield self._video_entry(renderer)
07aeced6 4630
39ed931e 4631 r''' # unused
4632 def _rich_grid_entries(self, contents):
4633 for content in contents:
4634 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4635 if video_renderer:
4636 entry = self._video_entry(video_renderer)
4637 if entry:
4638 yield entry
4639 '''
52efa4b3 4640
0a5095fe 4641 def _report_history_entries(self, renderer):
4642 for url in traverse_obj(renderer, (
7a32c70d 4643 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4644 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4645 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4646 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4647
a6213a49 4648 def _extract_entries(self, parent_renderer, continuation_list):
4649 # continuation_list is modified in-place with continuation_list = [continuation_token]
4650 continuation_list[:] = [None]
4651 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4652 for content in contents:
4653 if not isinstance(content, dict):
4654 continue
16aa9ea4 4655 is_renderer = traverse_obj(
4656 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4657 expected_type=dict)
a6213a49 4658 if not is_renderer:
0a5095fe 4659 if content.get('richItemRenderer'):
4660 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4661 yield entry
4662 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4663 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4664 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4665 yield from self._report_history_entries(table)
4666 continuation_list[0] = self._extract_continuation(table)
a6213a49 4667 continue
0a5095fe 4668
a6213a49 4669 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4670 for isr_content in isr_contents:
4671 if not isinstance(isr_content, dict):
8bdd16b4 4672 continue
69184e41 4673
a6213a49 4674 known_renderers = {
4675 'playlistVideoListRenderer': self._playlist_entries,
4676 'gridRenderer': self._grid_entries,
a17526e4 4677 'reelShelfRenderer': self._grid_entries,
4678 'shelfRenderer': self._shelf_entries,
16aa9ea4 4679 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4680 'backstagePostThreadRenderer': self._post_thread_entries,
4681 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4682 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4683 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4684 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4685 }
4686 for key, renderer in isr_content.items():
4687 if key not in known_renderers:
4688 continue
4689 for entry in known_renderers[key](renderer):
4690 if entry:
4691 yield entry
4692 continuation_list[0] = self._extract_continuation(renderer)
4693 break
70d5c17b 4694
4695 if not continuation_list[0]:
a6213a49 4696 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4697
a6213a49 4698 if not continuation_list[0]:
4699 continuation_list[0] = self._extract_continuation(parent_renderer)
4700
4701 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4702 continuation_list = [None]
4703 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4704 tab_content = try_get(tab, lambda x: x['content'], dict)
4705 if not tab_content:
4706 return
3462ffa8 4707 parent_renderer = (
29f7c58a 4708 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4709 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4710 yield from extract_entries(parent_renderer)
3462ffa8 4711 continuation = continuation_list[0]
d069eca7 4712
8bdd16b4 4713 for page_num in itertools.count(1):
4714 if not continuation:
4715 break
99e9e001 4716 headers = self.generate_api_headers(
4717 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4718 response = self._extract_response(
86e5f3ed 4719 item_id=f'{item_id} page {page_num}',
fe93e2c4 4720 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4721 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4722
4723 if not response:
8bdd16b4 4724 break
ac56cf38 4725 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4726 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4727 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4728
a1b535bd 4729 known_renderers = {
e4b98809 4730 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4731 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4732 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4733 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4734 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4735 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4736 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4737 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4738 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4739 'playlistVideoListContinuation': (self._playlist_entries, None),
4740 'gridContinuation': (self._grid_entries, None),
4741 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4742 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4743 }
1fb53b94 4744
4745 continuation_items = traverse_obj(response, (
4746 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4747 'appendContinuationItemsAction', 'continuationItems'
4748 ), 'continuationContents', get_all=False)
4749 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4750
a1b535bd 4751 video_items_renderer = None
1fb53b94 4752 for key in continuation_item.keys():
a1b535bd 4753 if key not in known_renderers:
8bdd16b4 4754 continue
1fb53b94 4755 func, parent_key = known_renderers[key]
4756 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4757 continuation_list = [None]
1fb53b94 4758 yield from func(video_items_renderer)
9ba5705a 4759 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4760
4761 if not video_items_renderer:
a1b535bd 4762 break
9558dcec 4763
8bdd16b4 4764 @staticmethod
7c219ea6 4765 def _extract_selected_tab(tabs, fatal=True):
86973308
M
4766 for tab_renderer in tabs:
4767 if tab_renderer.get('selected'):
4768 return tab_renderer
4769 if fatal:
4770 raise ExtractorError('Unable to find selected tab')
4771
4772 @staticmethod
4773 def _extract_tab_renderers(response):
4774 return traverse_obj(
4775 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 4776
ac56cf38 4777 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 4778 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 4779
8bdd16b4 4780 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 4781 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4782 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4783
4784 return self.playlist_result(
4785 self._entries(
4786 selected_tab, metadata['id'], ytcfg,
4787 self._extract_account_syncid(ytcfg, data),
4788 self._extract_visitor_data(data, ytcfg)),
4789 **metadata)
39ed931e 4790
bd7e919a 4791 def _extract_metadata_from_tabs(self, item_id, data):
4792 info = {'id': item_id}
4793
4794 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 4795 if metadata_renderer:
bd7e919a 4796 info.update({
4797 'uploader': metadata_renderer.get('title'),
4798 'uploader_id': metadata_renderer.get('externalId'),
4799 'uploader_url': metadata_renderer.get('channelUrl'),
4800 })
4801 if info['uploader_id']:
4802 info['id'] = info['uploader_id']
4803 else:
4804 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 4805
301d07fc 4806 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4807 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4808 def _get_uncropped(url):
4809 return url_or_none((url or '').split('=')[0] + '=s0')
4810
6141346d 4811 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 4812 if avatar_thumbnails:
4813 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4814 if uncropped_avatar:
4815 avatar_thumbnails.append({
4816 'url': uncropped_avatar,
4817 'id': 'avatar_uncropped',
4818 'preference': 1
4819 })
4820
4821 channel_banners = self._extract_thumbnails(
bd7e919a 4822 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 4823 for banner in channel_banners:
4824 banner['preference'] = -10
4825
4826 if channel_banners:
4827 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4828 if uncropped_banner:
4829 channel_banners.append({
4830 'url': uncropped_banner,
4831 'id': 'banner_uncropped',
4832 'preference': -5
4833 })
4834
bd7e919a 4835 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4836 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4837 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4838
301d07fc 4839 primary_thumbnails = self._extract_thumbnails(
a17526e4 4840 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
4841 playlist_thumbnails = self._extract_thumbnails(
4842 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4843
bd7e919a 4844 info.update({
4845 'title': (traverse_obj(metadata_renderer, 'title')
4846 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4847 or info['id']),
4848 'availability': self._extract_availability(data),
4849 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4850 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4851 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4852 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4853 })
f0d785d3 4854
6141346d
M
4855 # Playlist stats is a text runs array containing [video count, view count, last updated].
4856 # last updated or (view count and last updated) may be missing.
4857 playlist_stats = get_first(
bd7e919a 4858 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4859
6141346d
M
4860 last_updated_unix = self._parse_time_text(
4861 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4862 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
bd7e919a 4863 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
6141346d 4864
bd7e919a 4865 info['view_count'] = self._get_count(playlist_stats, 1)
4866 if info['view_count'] is None: # 0 is allowed
4867 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4868
4869 info['playlist_count'] = self._get_count(playlist_stats, 0)
4870 if info['playlist_count'] is None: # 0 is allowed
4871 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4872
4873 if not info.get('uploader_id'):
6141346d 4874 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 4875 if not owner: # Deprecated
6141346d
M
4876 owner = traverse_obj(
4877 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4878 ('videoOwner', 'videoOwnerRenderer', 'title'))
4879 owner_text = self._get_text(owner)
4880 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 4881 info.update({
6141346d
M
4882 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4883 'uploader_id': browse_ep.get('browseId'),
4884 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
bd7e919a 4885 })
6141346d 4886
bd7e919a 4887 info.update({
4888 'channel': info['uploader'],
4889 'channel_id': info['uploader_id'],
4890 'channel_url': info['uploader_url']
4891 })
4892 return info
73c4ac2c 4893
6e634cbe 4894 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4895 first_id = last_id = response = None
2be71994 4896 for page_num in itertools.count(1):
cd7c66cf 4897 videos = list(self._playlist_entries(playlist))
4898 if not videos:
4899 return
2be71994 4900 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4901 if start >= len(videos):
4902 return
24146491 4903 yield from videos[start:]
2be71994 4904 first_id = first_id or videos[0]['id']
4905 last_id = videos[-1]['id']
79360d99 4906 watch_endpoint = try_get(
4907 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4908 headers = self.generate_api_headers(
4909 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4910 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4911 query = {
4912 'playlistId': playlist_id,
4913 'videoId': watch_endpoint.get('videoId') or last_id,
4914 'index': watch_endpoint.get('index') or len(videos),
4915 'params': watch_endpoint.get('params') or 'OAE%3D'
4916 }
4917 response = self._extract_response(
4918 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4919 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4920 check_get_keys='contents'
4921 )
cd7c66cf 4922 playlist = try_get(
79360d99 4923 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4924
ac56cf38 4925 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4926 title = playlist.get('title') or try_get(
14f25df2 4927 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4928 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4929
4930 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4931 playlist_url = urljoin(url, try_get(
4932 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4933 str))
6e634cbe 4934
4935 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4936 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4937 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4938
4939 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4940 return self.url_result(
4941 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4942 video_title=title)
cd7c66cf 4943
8bdd16b4 4944 return self.playlist_result(
6e634cbe 4945 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4946 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4947
47193e02 4948 def _extract_availability(self, data):
4949 """
4950 Gets the availability of a given playlist/tab.
4951 Note: Unless YouTube tells us explicitly, we do not assume it is public
4952 @param data: response
4953 """
6141346d
M
4954 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4955 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4956 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 4957
6141346d 4958 badges = self._extract_badges(sidebar_renderer)
47193e02 4959
4960 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
4961 privacy_setting_icon = get_first(
4962 (playlist_header_renderer, sidebar_renderer),
4963 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4964 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4965 expected_type=str)
4966
4967 microformats_is_unlisted = traverse_obj(
4968 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 4969
c26f9b99 4970 return (
4971 'public' if (
4972 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4973 or player_header_privacy == 'PUBLIC'
4974 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4975 else self._availability(
4976 is_private=(
4977 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4978 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4979 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4980 is_unlisted=(
4981 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4982 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
4983 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4984 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 4985 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4986 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4987 needs_auth=False))
47193e02 4988
4989 @staticmethod
4990 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4991 sidebar_renderer = try_get(
4992 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4993 for item in sidebar_renderer:
4994 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4995 if renderer:
4996 return renderer
4997
ac56cf38 4998 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4999 """
6141346d 5000 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 5001 """
6141346d
M
5002 is_playlist = bool(traverse_obj(
5003 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5004 if not is_playlist:
47193e02 5005 return
11f9be09 5006 headers = self.generate_api_headers(
99e9e001 5007 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 5008 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 5009 query = {
6141346d
M
5010 'params': 'wgYCCAA=',
5011 'browseId': f'VL{item_id}'
47193e02 5012 }
5013 return self._extract_response(
5014 item_id=item_id, headers=headers, query=query,
fe93e2c4 5015 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 5016 note='Redownloading playlist API JSON with unavailable videos')
358de58c 5017
2762dbb1 5018 @functools.cached_property
a25bca9f 5019 def skip_webpage(self):
5020 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5021
ac56cf38 5022 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 5023 webpage, data = None, None
5024 for retry in self.RetryManager(fatal=fatal):
ac56cf38 5025 try:
be5c1ae8 5026 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 5027 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5028 except ExtractorError as e:
5029 if isinstance(e.cause, network_exceptions):
14f25df2 5030 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 5031 retry.error = e
5032 continue
5033 self._error_or_warning(e, fatal=fatal)
14fdfea9 5034 break
ac56cf38 5035
be5c1ae8 5036 try:
5037 self._extract_and_report_alerts(data)
5038 except ExtractorError as e:
5039 self._error_or_warning(e, fatal=fatal)
5040 break
ac56cf38 5041
be5c1ae8 5042 # Sometimes youtube returns a webpage with incomplete ytInitialData
5043 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5044 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5045 retry.error = ExtractorError('Incomplete yt initial data received')
5046 continue
ac56cf38 5047
cd7c66cf 5048 return webpage, data
5049
a25bca9f 5050 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5051 """Use if failed to extract ytcfg (and data) from initial webpage"""
5052 if not ytcfg and self.is_authenticated:
5053 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5054 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5055 raise ExtractorError(
5056 f'{msg}. If you are not downloading private content, or '
5057 'your cookies are only for the first account and channel,'
5058 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5059 expected=True)
5060 self.report_warning(msg, only_once=True)
5061
ac56cf38 5062 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5063 data = None
a25bca9f 5064 if not self.skip_webpage:
ac56cf38 5065 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5066 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5067 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5068 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5069 if (url != 'https://www.youtube.com/feed/recommended'
5070 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5071 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5072 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5073 if fatal:
5074 raise ExtractorError(msg, expected=True)
5075 self.report_warning(msg, only_once=True)
ac56cf38 5076 if not data:
a25bca9f 5077 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5078 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5079 return data, ytcfg
5080
5081 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5082 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5083 resolve_response = self._extract_response(
5084 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5085 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5086 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5087 for ep_key, ep in endpoints.items():
5088 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5089 if params:
5090 return self._extract_response(
5091 item_id=item_id, query=params, ep=ep, headers=headers,
5092 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5093 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5094 err_note = 'Failed to resolve url (does the playlist exist?)'
5095 if fatal:
5096 raise ExtractorError(err_note, expected=True)
5097 self.report_warning(err_note, item_id)
5098
a6213a49 5099 _SEARCH_PARAMS = None
5100
af5c1c55 5101 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5102 data = {'query': query}
5103 if params is NO_DEFAULT:
5104 params = self._SEARCH_PARAMS
5105 if params:
5106 data['params'] = params
16aa9ea4 5107
5108 content_keys = (
5109 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5110 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5111 # ytmusic search
5112 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5113 ('continuationContents', ),
5114 )
a25bca9f 5115 display_id = f'query "{query}"'
86e5f3ed 5116 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5117 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5118 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5119
a61fd4cf 5120 continuation_list = [None]
a25bca9f 5121 search = None
a6213a49 5122 for page_num in itertools.count(1):
a61fd4cf 5123 data.update(continuation_list[0] or {})
a25bca9f 5124 headers = self.generate_api_headers(
5125 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5126 search = self._extract_response(
a25bca9f 5127 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5128 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5129 slr_contents = traverse_obj(search, *content_keys)
5130 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5131 if not continuation_list[0]:
a6213a49 5132 break
5133
5134
5135class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5136 IE_DESC = 'YouTube Tabs'
5137 _VALID_URL = r'''(?x:
5138 https?://
b032ff0f 5139 (?!consent\.)(?:\w+\.)?
a6213a49 5140 (?:
5141 youtube(?:kids)?\.com|
5142 %(invidious)s
5143 )/
5144 (?:
5145 (?P<channel_type>channel|c|user|browse)/|
5146 (?P<not_channel>
5147 feed/|hashtag/|
5148 (?:playlist|watch)\?.*?\blist=
5149 )|
5150 (?!(?:%(reserved_names)s)\b) # Direct URLs
5151 )
5152 (?P<id>[^/?\#&]+)
5153 )''' % {
5154 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5155 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5156 }
5157 IE_NAME = 'youtube:tab'
5158
5159 _TESTS = [{
5160 'note': 'playlists, multipage',
5161 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5162 'playlist_mincount': 94,
5163 'info_dict': {
5164 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5165 'title': 'Igor Kleiner - Playlists',
a6213a49 5166 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5167 'uploader': 'Igor Kleiner',
a6213a49 5168 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5169 'channel': 'Igor Kleiner',
5170 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5171 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5172 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5173 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5174 'channel_follower_count': int
a6213a49 5175 },
5176 }, {
5177 'note': 'playlists, multipage, different order',
5178 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5179 'playlist_mincount': 94,
5180 'info_dict': {
5181 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5182 'title': 'Igor Kleiner - Playlists',
a6213a49 5183 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5184 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5185 'uploader': 'Igor Kleiner',
5186 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5187 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5188 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5189 'channel': 'Igor Kleiner',
5190 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5191 'channel_follower_count': int
a6213a49 5192 },
5193 }, {
5194 'note': 'playlists, series',
5195 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5196 'playlist_mincount': 5,
5197 'info_dict': {
5198 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5199 'title': '3Blue1Brown - Playlists',
5200 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5201 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5202 'uploader': '3Blue1Brown',
976ae3ea 5203 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5204 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5205 'channel': '3Blue1Brown',
5206 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5207 'tags': ['Mathematics'],
6c73052c 5208 'channel_follower_count': int
a6213a49 5209 },
5210 }, {
5211 'note': 'playlists, singlepage',
5212 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5213 'playlist_mincount': 4,
5214 'info_dict': {
5215 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5216 'title': 'ThirstForScience - Playlists',
5217 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5218 'uploader': 'ThirstForScience',
5219 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5220 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5221 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5222 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5223 'tags': 'count:13',
5224 'channel': 'ThirstForScience',
6c73052c 5225 'channel_follower_count': int
a6213a49 5226 }
5227 }, {
5228 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5229 'only_matching': True,
5230 }, {
5231 'note': 'basic, single video playlist',
5232 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5233 'info_dict': {
5234 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5235 'uploader': 'Sergey M.',
5236 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5237 'title': 'youtube-dl public playlist',
976ae3ea 5238 'description': '',
5239 'tags': [],
5240 'view_count': int,
5241 'modified_date': '20201130',
5242 'channel': 'Sergey M.',
5243 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5244 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5245 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5246 'availability': 'public',
a6213a49 5247 },
5248 'playlist_count': 1,
5249 }, {
5250 'note': 'empty playlist',
5251 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5252 'info_dict': {
5253 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5254 'uploader': 'Sergey M.',
5255 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5256 'title': 'youtube-dl empty playlist',
976ae3ea 5257 'tags': [],
5258 'channel': 'Sergey M.',
5259 'description': '',
5260 'modified_date': '20160902',
5261 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5262 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5263 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5264 'availability': 'public',
a6213a49 5265 },
5266 'playlist_count': 0,
5267 }, {
5268 'note': 'Home tab',
5269 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5270 'info_dict': {
5271 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5272 'title': 'lex will - Home',
5273 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5274 'uploader': 'lex will',
5275 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5276 'channel': 'lex will',
5277 'tags': ['bible', 'history', 'prophesy'],
5278 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5279 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5280 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5281 'channel_follower_count': int
a6213a49 5282 },
5283 'playlist_mincount': 2,
5284 }, {
5285 'note': 'Videos tab',
5286 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5287 'info_dict': {
5288 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5289 'title': 'lex will - Videos',
5290 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5291 'uploader': 'lex will',
5292 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5293 'tags': ['bible', 'history', 'prophesy'],
5294 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5295 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5296 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5297 'channel': 'lex will',
6c73052c 5298 'channel_follower_count': int
a6213a49 5299 },
5300 'playlist_mincount': 975,
5301 }, {
5302 'note': 'Videos tab, sorted by popular',
5303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5304 'info_dict': {
5305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5306 'title': 'lex will - Videos',
5307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5308 'uploader': 'lex will',
5309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5310 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5311 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5312 'channel': 'lex will',
5313 'tags': ['bible', 'history', 'prophesy'],
5314 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5315 'channel_follower_count': int
a6213a49 5316 },
5317 'playlist_mincount': 199,
5318 }, {
5319 'note': 'Playlists tab',
5320 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5321 'info_dict': {
5322 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5323 'title': 'lex will - Playlists',
5324 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5325 'uploader': 'lex will',
5326 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5327 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5328 'channel': 'lex will',
5329 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5330 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5331 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5332 'channel_follower_count': int
a6213a49 5333 },
5334 'playlist_mincount': 17,
5335 }, {
5336 'note': 'Community tab',
5337 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5338 'info_dict': {
5339 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5340 'title': 'lex will - Community',
5341 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5342 'uploader': 'lex will',
5343 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5344 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5345 'channel': 'lex will',
5346 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5347 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5348 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5349 'channel_follower_count': int
a6213a49 5350 },
5351 'playlist_mincount': 18,
5352 }, {
5353 'note': 'Channels tab',
5354 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5355 'info_dict': {
5356 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5357 'title': 'lex will - Channels',
5358 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5359 'uploader': 'lex will',
5360 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5361 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5362 'channel': 'lex will',
5363 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5364 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5365 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5366 'channel_follower_count': int
a6213a49 5367 },
5368 'playlist_mincount': 12,
5369 }, {
5370 'note': 'Search tab',
5371 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5372 'playlist_mincount': 40,
5373 'info_dict': {
5374 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5375 'title': '3Blue1Brown - Search - linear algebra',
5376 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5377 'uploader': '3Blue1Brown',
5378 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5379 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5380 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5381 'tags': ['Mathematics'],
5382 'channel': '3Blue1Brown',
5383 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 5384 'channel_follower_count': int
a6213a49 5385 },
5386 }, {
5387 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5388 'only_matching': True,
5389 }, {
5390 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5391 'only_matching': True,
5392 }, {
5393 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5394 'only_matching': True,
5395 }, {
5396 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5397 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5398 'info_dict': {
5399 'title': '29C3: Not my department',
5400 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5401 'uploader': 'Christiaan008',
5402 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5403 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5404 'tags': [],
5405 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5406 'view_count': int,
5407 'modified_date': '20150605',
5408 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5409 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5410 'channel': 'Christiaan008',
c26f9b99 5411 'availability': 'public',
a6213a49 5412 },
5413 'playlist_count': 96,
5414 }, {
5415 'note': 'Large playlist',
5416 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5417 'info_dict': {
5418 'title': 'Uploads from Cauchemar',
5419 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5420 'uploader': 'Cauchemar',
5421 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5422 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5423 'tags': [],
5424 'modified_date': r're:\d{8}',
5425 'channel': 'Cauchemar',
5426 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5427 'view_count': int,
5428 'description': '',
5429 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5430 'availability': 'public',
a6213a49 5431 },
5432 'playlist_mincount': 1123,
976ae3ea 5433 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5434 }, {
5435 'note': 'even larger playlist, 8832 videos',
5436 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5437 'only_matching': True,
5438 }, {
5439 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5440 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5441 'info_dict': {
5442 'title': 'Uploads from Interstellar Movie',
5443 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5444 'uploader': 'Interstellar Movie',
5445 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5446 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5447 'tags': [],
5448 'view_count': int,
5449 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5450 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5451 'channel': 'Interstellar Movie',
5452 'description': '',
5453 'modified_date': r're:\d{8}',
c26f9b99 5454 'availability': 'public',
a6213a49 5455 },
5456 'playlist_mincount': 21,
5457 }, {
5458 'note': 'Playlist with "show unavailable videos" button',
5459 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5460 'info_dict': {
5461 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5462 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5463 'uploader': 'Phim Siêu Nhân Nhật Bản',
5464 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5465 'view_count': int,
5466 'channel': 'Phim Siêu Nhân Nhật Bản',
5467 'tags': [],
5468 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5469 'description': '',
5470 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5471 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5472 'modified_date': r're:\d{8}',
c26f9b99 5473 'availability': 'public',
a6213a49 5474 },
5475 'playlist_mincount': 200,
976ae3ea 5476 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5477 }, {
5478 'note': 'Playlist with unavailable videos in page 7',
5479 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5480 'info_dict': {
5481 'title': 'Uploads from BlankTV',
5482 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5483 'uploader': 'BlankTV',
5484 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5485 'channel': 'BlankTV',
5486 'channel_url': 'https://www.youtube.com/c/blanktv',
5487 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5488 'view_count': int,
5489 'tags': [],
5490 'uploader_url': 'https://www.youtube.com/c/blanktv',
5491 'modified_date': r're:\d{8}',
5492 'description': '',
c26f9b99 5493 'availability': 'public',
a6213a49 5494 },
5495 'playlist_mincount': 1000,
976ae3ea 5496 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5497 }, {
5498 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5499 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5500 'info_dict': {
5501 'title': 'Data Analysis with Dr Mike Pound',
5502 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5503 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5504 'uploader': 'Computerphile',
5505 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5506 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5507 'tags': [],
5508 'view_count': int,
5509 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5510 'channel_url': 'https://www.youtube.com/user/Computerphile',
5511 'channel': 'Computerphile',
c26f9b99 5512 'availability': 'public',
6141346d 5513 'modified_date': '20190712',
a6213a49 5514 },
5515 'playlist_mincount': 11,
5516 }, {
5517 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5518 'only_matching': True,
5519 }, {
5520 'note': 'Playlist URL that does not actually serve a playlist',
5521 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5522 'info_dict': {
5523 'id': 'FqZTN594JQw',
5524 'ext': 'webm',
5525 'title': "Smiley's People 01 detective, Adventure Series, Action",
5526 'uploader': 'STREEM',
5527 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5529 'upload_date': '20150526',
5530 'license': 'Standard YouTube License',
5531 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5532 'categories': ['People & Blogs'],
5533 'tags': list,
5534 'view_count': int,
5535 'like_count': int,
a6213a49 5536 },
5537 'params': {
5538 'skip_download': True,
5539 },
5540 'skip': 'This video is not available.',
5541 'add_ie': [YoutubeIE.ie_key()],
5542 }, {
5543 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5544 'only_matching': True,
5545 }, {
5546 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5547 'only_matching': True,
5548 }, {
5549 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5550 'info_dict': {
12a1b225 5551 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5552 'ext': 'mp4',
976ae3ea 5553 'title': str,
a6213a49 5554 'uploader': 'Sky News',
5555 'uploader_id': 'skynews',
5556 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5557 'upload_date': r're:\d{8}',
976ae3ea 5558 'description': str,
a6213a49 5559 'categories': ['News & Politics'],
5560 'tags': list,
5561 'like_count': int,
86973308 5562 'release_timestamp': int,
976ae3ea 5563 'channel': 'Sky News',
5564 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5565 'age_limit': 0,
5566 'view_count': int,
86973308 5567 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5568 'playable_in_embed': True,
86973308 5569 'release_date': r're:\d+',
976ae3ea 5570 'availability': 'public',
5571 'live_status': 'is_live',
5572 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5573 'channel_follower_count': int,
5574 'concurrent_view_count': int,
a6213a49 5575 },
5576 'params': {
5577 'skip_download': True,
5578 },
976ae3ea 5579 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5580 }, {
5581 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5582 'info_dict': {
5583 'id': 'a48o2S1cPoo',
5584 'ext': 'mp4',
5585 'title': 'The Young Turks - Live Main Show',
5586 'uploader': 'The Young Turks',
5587 'uploader_id': 'TheYoungTurks',
5588 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5589 'upload_date': '20150715',
5590 'license': 'Standard YouTube License',
5591 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5592 'categories': ['News & Politics'],
5593 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5594 'like_count': int,
a6213a49 5595 },
5596 'params': {
5597 'skip_download': True,
5598 },
5599 'only_matching': True,
5600 }, {
5601 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5602 'only_matching': True,
5603 }, {
5604 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5605 'only_matching': True,
5606 }, {
5607 'note': 'A channel that is not live. Should raise error',
5608 'url': 'https://www.youtube.com/user/numberphile/live',
5609 'only_matching': True,
5610 }, {
5611 'url': 'https://www.youtube.com/feed/trending',
5612 'only_matching': True,
5613 }, {
5614 'url': 'https://www.youtube.com/feed/library',
5615 'only_matching': True,
5616 }, {
5617 'url': 'https://www.youtube.com/feed/history',
5618 'only_matching': True,
5619 }, {
5620 'url': 'https://www.youtube.com/feed/subscriptions',
5621 'only_matching': True,
5622 }, {
5623 'url': 'https://www.youtube.com/feed/watch_later',
5624 'only_matching': True,
5625 }, {
5626 'note': 'Recommended - redirects to home page.',
5627 'url': 'https://www.youtube.com/feed/recommended',
5628 'only_matching': True,
5629 }, {
5630 'note': 'inline playlist with not always working continuations',
5631 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5632 'only_matching': True,
5633 }, {
5634 'url': 'https://www.youtube.com/course',
5635 'only_matching': True,
5636 }, {
5637 'url': 'https://www.youtube.com/zsecurity',
5638 'only_matching': True,
5639 }, {
5640 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5641 'only_matching': True,
5642 }, {
5643 'url': 'https://www.youtube.com/TheYoungTurks/live',
5644 'only_matching': True,
5645 }, {
5646 'url': 'https://www.youtube.com/hashtag/cctv9',
5647 'info_dict': {
5648 'id': 'cctv9',
5649 'title': '#cctv9',
976ae3ea 5650 'tags': [],
a6213a49 5651 },
4dc23a80 5652 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5653 }, {
5654 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5655 'only_matching': True,
5656 }, {
5657 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5658 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5659 'only_matching': True
5660 }, {
5661 'note': '/browse/ should redirect to /channel/',
5662 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5663 'only_matching': True
5664 }, {
5665 'note': 'VLPL, should redirect to playlist?list=PL...',
5666 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5667 'info_dict': {
5668 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5669 'uploader': 'NoCopyrightSounds',
5670 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5671 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5672 'title': 'NCS : All Releases 💿',
976ae3ea 5673 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5674 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5675 'modified_date': r're:\d{8}',
5676 'view_count': int,
5677 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5678 'tags': [],
5679 'channel': 'NoCopyrightSounds',
c26f9b99 5680 'availability': 'public',
a6213a49 5681 },
5682 'playlist_mincount': 166,
976ae3ea 5683 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5684 }, {
5685 'note': 'Topic, should redirect to playlist?list=UU...',
5686 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5687 'info_dict': {
5688 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5689 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5690 'title': 'Uploads from Royalty Free Music - Topic',
5691 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5692 'tags': [],
5693 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5694 'channel': 'Royalty Free Music - Topic',
5695 'view_count': int,
5696 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5697 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5698 'modified_date': r're:\d{8}',
5699 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5700 'description': '',
c26f9b99 5701 'availability': 'public',
a6213a49 5702 },
a6213a49 5703 'playlist_mincount': 101,
5704 }, {
86973308
M
5705 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5706 # Treat as a general feed
a6213a49 5707 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5708 'info_dict': {
5709 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5710 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5711 'tags': [],
a6213a49 5712 },
a6213a49 5713 'playlist_mincount': 9,
5714 }, {
5715 'note': 'Youtube music Album',
5716 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5717 'info_dict': {
5718 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5719 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5720 'tags': [],
5721 'view_count': int,
5722 'description': '',
5723 'availability': 'unlisted',
5724 'modified_date': r're:\d{8}',
a6213a49 5725 },
5726 'playlist_count': 50,
5727 }, {
5728 'note': 'unlisted single video playlist',
5729 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5730 'info_dict': {
5731 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5732 'uploader': 'colethedj',
5733 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5734 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5735 'availability': 'unlisted',
5736 'tags': [],
12a1b225 5737 'modified_date': '20220418',
976ae3ea 5738 'channel': 'colethedj',
5739 'view_count': int,
5740 'description': '',
5741 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5742 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5743 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5744 },
5745 'playlist_count': 1,
5746 }, {
5747 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5748 'url': 'https://www.youtube.com/feed/recommended',
5749 'info_dict': {
5750 'id': 'recommended',
5751 'title': 'recommended',
6c73052c 5752 'tags': [],
a6213a49 5753 },
5754 'playlist_mincount': 50,
5755 'params': {
5756 'skip_download': True,
5757 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5758 },
5759 }, {
5760 'note': 'API Fallback: /videos tab, sorted by oldest first',
5761 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5762 'info_dict': {
5763 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5764 'title': 'Cody\'sLab - Videos',
5765 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5766 'uploader': 'Cody\'sLab',
5767 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5768 'channel': 'Cody\'sLab',
5769 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5770 'tags': [],
5771 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5772 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5773 'channel_follower_count': int
a6213a49 5774 },
5775 'playlist_mincount': 650,
5776 'params': {
5777 'skip_download': True,
5778 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5779 },
86973308 5780 'skip': 'Query for sorting no longer works',
a6213a49 5781 }, {
5782 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5783 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5784 'info_dict': {
5785 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5786 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5787 'title': 'Uploads from Royalty Free Music - Topic',
5788 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5789 'modified_date': r're:\d{8}',
5790 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5791 'description': '',
5792 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5793 'tags': [],
5794 'channel': 'Royalty Free Music - Topic',
5795 'view_count': int,
5796 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
c26f9b99 5797 'availability': 'public',
a6213a49 5798 },
a6213a49 5799 'playlist_mincount': 101,
5800 'params': {
5801 'skip_download': True,
5802 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5803 },
7c219ea6 5804 }, {
5805 'note': 'non-standard redirect to regional channel',
5806 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5807 'only_matching': True
61d3665d 5808 }, {
5809 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5810 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5811 'info_dict': {
5812 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5813 'modified_date': '20220407',
5814 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5815 'tags': [],
5816 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5817 'uploader': 'pukkandan',
5818 'availability': 'unlisted',
5819 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5820 'channel': 'pukkandan',
5821 'description': 'Test for collaborative playlist',
5822 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5823 'view_count': int,
61d3665d 5824 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5825 },
5826 'playlist_mincount': 2
c26f9b99 5827 }, {
5828 'note': 'translated tab name',
5829 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5830 'info_dict': {
5831 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5832 'tags': [],
5833 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5834 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 5835 'description': 'test description',
c26f9b99 5836 'title': 'cole-dlp-test-acc - 再生リスト',
5837 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5838 'uploader': 'cole-dlp-test-acc',
5839 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5840 'channel': 'cole-dlp-test-acc',
5841 },
5842 'playlist_mincount': 1,
5843 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5844 'expected_warnings': ['Preferring "ja"'],
5845 }, {
5846 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5847 'note': 'preferred lang set with playlist with translated video titles',
5848 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5849 'info_dict': {
5850 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5851 'tags': [],
5852 'view_count': int,
5853 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5854 'uploader': 'cole-dlp-test-acc',
5855 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5856 'channel': 'cole-dlp-test-acc',
5857 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5858 'description': 'test',
5859 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5860 'title': 'dlp test playlist',
5861 'availability': 'public',
5862 },
5863 'playlist_mincount': 1,
5864 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5865 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 5866 }, {
5867 # shorts audio pivot for 2GtVksBMYFM.
5868 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5869 'info_dict': {
5870 'id': 'sfv_audio_pivot',
5871 'title': 'sfv_audio_pivot',
5872 'tags': [],
5873 },
5874 'playlist_mincount': 50,
5875
86973308
M
5876 }, {
5877 # Channel with a real live tab (not to be mistaken with streams tab)
5878 # Do not treat like it should redirect to live stream
5879 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5880 'info_dict': {
5881 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5882 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5883 'tags': [],
5884 },
5885 'playlist_mincount': 20,
5886 }, {
5887 # Tab name is not the same as tab id
5888 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5889 'info_dict': {
5890 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5891 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5892 'tags': [],
5893 },
5894 'playlist_mincount': 8,
5895 }, {
5896 # Home tab id is literally home. Not to get mistaken with featured
5897 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5898 'info_dict': {
5899 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5900 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5901 'tags': [],
5902 },
5903 'playlist_mincount': 8,
5904 }, {
5905 # Should get three playlists for videos, shorts and streams tabs
5906 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5907 'info_dict': {
5908 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 5909 'title': 'Polka Ch. 尾丸ポルカ',
5910 'channel_follower_count': int,
5911 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5912 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5913 'uploader': 'Polka Ch. 尾丸ポルカ',
5914 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5915 'channel': 'Polka Ch. 尾丸ポルカ',
5916 'tags': 'count:35',
5917 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5918 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
86973308
M
5919 },
5920 'playlist_count': 3,
5921 }, {
5922 # Shorts tab with channel with handle
5923 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5924 'info_dict': {
5925 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5926 'title': 'Not Just Bikes - Shorts',
5927 'tags': 'count:12',
5928 'uploader': 'Not Just Bikes',
5929 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5930 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5931 'channel_follower_count': int,
5932 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5933 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5934 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5935 'channel': 'Not Just Bikes',
5936 },
5937 'playlist_mincount': 10,
5938 }, {
5939 # Streams tab
5940 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5941 'info_dict': {
5942 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5943 'title': '中村悠一 - Live',
5944 'tags': 'count:7',
5945 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5946 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5947 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5948 'channel': '中村悠一',
5949 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5950 'channel_follower_count': int,
5951 'uploader': '中村悠一',
5952 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5953 },
5954 'playlist_mincount': 60,
5955 }, {
5956 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5957 # See test_youtube_lists
5958 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5959 'only_matching': True,
5960 }, {
5961 # No uploads and no UCID given. Should fail with no uploads error
5962 # See test_youtube_lists
5963 'url': 'https://www.youtube.com/news',
5964 'only_matching': True
5965 }, {
5966 # No videos tab but has a shorts tab
5967 'url': 'https://www.youtube.com/c/TKFShorts',
5968 'info_dict': {
5969 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5970 'title': 'Shorts Break - Shorts',
5971 'tags': 'count:32',
5972 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5973 'channel': 'Shorts Break',
5974 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5975 'uploader': 'Shorts Break',
5976 'channel_follower_count': int,
5977 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5978 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5979 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5980 },
5981 'playlist_mincount': 30,
5982 }, {
5983 # Trending Now Tab. tab id is empty
5984 'url': 'https://www.youtube.com/feed/trending',
5985 'info_dict': {
5986 'id': 'trending',
5987 'title': 'trending - Now',
5988 'tags': [],
5989 },
5990 'playlist_mincount': 30,
5991 }, {
5992 # Trending Gaming Tab. tab id is empty
5993 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5994 'info_dict': {
5995 'id': 'trending',
5996 'title': 'trending - Gaming',
5997 'tags': [],
5998 },
5999 'playlist_mincount': 30,
4dc23a80
M
6000 }, {
6001 # Shorts url result in shorts tab
6002 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6003 'info_dict': {
6004 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6005 'title': 'cole-dlp-test-acc - Shorts',
6006 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6007 'channel': 'cole-dlp-test-acc',
4dc23a80
M
6008 'description': 'test description',
6009 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6010 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6011 'tags': [],
6012 'uploader': 'cole-dlp-test-acc',
6013 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6014
6015 },
6016 'playlist': [{
6017 'info_dict': {
6018 '_type': 'url',
6019 'ie_key': 'Youtube',
6020 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6021 'id': 'sSM9J5YH_60',
6022 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6023 'title': 'SHORT short',
6024 'channel': 'cole-dlp-test-acc',
6025 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6026 'view_count': int,
6027 'thumbnails': list,
6028 }
6029 }],
6030 'params': {'extract_flat': True},
6031 }, {
6032 # Live video status should be extracted
6033 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6034 'info_dict': {
6035 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6036 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6037 'tags': []
6038 },
6039 'playlist': [{
6040 'info_dict': {
6041 '_type': 'url',
6042 'ie_key': 'Youtube',
6043 'url': 'startswith:https://www.youtube.com/watch?v=',
6044 'id': str,
6045 'title': str,
6046 'live_status': 'is_live',
6047 'channel_id': str,
6048 'channel_url': str,
6049 'concurrent_view_count': int,
6050 'channel': str,
6051 }
6052 }],
c7335551 6053 'params': {'extract_flat': True, 'playlist_items': '1'},
4dc23a80 6054 'playlist_mincount': 1
c7335551
M
6055 }, {
6056 # Channel renderer metadata. Contains number of videos on the channel
6057 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6058 'info_dict': {
6059 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6060 'title': 'cole-dlp-test-acc - Channels',
6061 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6062 'channel': 'cole-dlp-test-acc',
6063 'description': 'test description',
6064 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6065 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6066 'tags': [],
6067 'uploader': 'cole-dlp-test-acc',
6068 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6069
6070 },
6071 'playlist': [{
6072 'info_dict': {
6073 '_type': 'url',
6074 'ie_key': 'YoutubeTab',
6075 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6076 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6077 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6078 'title': 'PewDiePie',
6079 'channel': 'PewDiePie',
6080 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6081 'thumbnails': list,
6082 'channel_follower_count': int,
6083 'playlist_count': int
6084 }
6085 }],
6086 'params': {'extract_flat': True},
a6213a49 6087 }]
6088
6089 @classmethod
6090 def suitable(cls, url):
86e5f3ed 6091 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6092
86973308
M
6093 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6094
6095 def _get_url_mobj(self, url):
6096 mobj = self._URL_RE.match(url).groupdict()
6097 mobj.update((k, '') for k, v in mobj.items() if v is None)
6098 return mobj
6099
6100 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6101 tab_name = (tab.get('title') or '').lower()
6102 tab_url = urljoin(base_url, traverse_obj(
6103 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6104
bd7e919a 6105 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6106 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6107 if tab_id:
bd7e919a 6108 return {
6109 'TAB_ID_SPONSORSHIPS': 'membership',
6110 }.get(tab_id, tab_id), tab_name
86973308
M
6111
6112 # Fallback to tab name if we cannot get the tab id.
6113 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6114 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6115 if tab_name:
6116 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6117 return {
6118 'home': 'featured',
6119 'live': 'streams',
6120 }.get(tab_name, tab_name), tab_name
6121
6122 def _has_tab(self, tabs, tab_id):
6123 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6124
182bda88 6125 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6126 def _real_extract(self, url, smuggled_data):
cd7c66cf 6127 item_id = self._match_id(url)
14f25df2 6128 url = urllib.parse.urlunparse(
6129 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6130 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6131
86973308
M
6132 mobj = self._get_url_mobj(url)
6133 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6134 if is_channel and smuggled_data.get('is_music_url'):
6135 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6136 return self.url_result(
6137 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6138 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6139 mdata = self._extract_tab_endpoint(
6140 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6141 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6142 get_all=False, expected_type=str)
6143 if not murl:
6144 raise ExtractorError('Failed to resolve album to playlist')
6145 return self.url_result(murl, YoutubeTabIE)
6146 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6147 return self.url_result(
6148 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6149
6150 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6151 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6152 url = f'{pre}/videos{post}'
cd7c66cf 6153
6154 # Handle both video/playlist URLs
201c1459 6155 qs = parse_qs(url)
bd7e919a 6156 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 6157 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6158 if not playlist_id:
fe03a6cd 6159 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6160 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6161 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6162 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6163 return self.url_result(
6164 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6165
86973308
M
6166 if not self._yes_playlist(playlist_id, video_id):
6167 return self.url_result(
6168 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6169
bd7e919a 6170 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6171
7c219ea6 6172 # YouTube may provide a non-standard redirect to the regional channel
6173 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6174 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6175 redirect_url = traverse_obj(
6176 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6177 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6178 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6179 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6180 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6181
bd7e919a 6182 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6183 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6184 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6185 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6186 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6187
6188 if not original_tab_id and selected_tab_name:
bd7e919a 6189 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6190 'To download only the videos in a specific tab, pass the tab\'s URL')
6191 if self._has_tab(tabs, 'streams'):
bd7e919a 6192 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6193 if self._has_tab(tabs, 'shorts'):
bd7e919a 6194 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6195 # XXX: Members-only tab should also be extracted
6196
bd7e919a 6197 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6198 # Channel does not have streams, shorts or videos tabs
6199 if item_id[:2] != 'UC':
6200 raise ExtractorError('This channel has no uploads', expected=True)
6201
6202 # Topic channels don't have /videos. Use the equivalent playlist instead
6203 pl_id = f'UU{item_id[2:]}'
6204 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6205 try:
6206 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6207 except ExtractorError:
6208 raise ExtractorError('This channel has no uploads', expected=True)
64f36541 6209 else:
86973308
M
6210 item_id, url = pl_id, pl_url
6211 self.to_screen(
6212 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6213
bd7e919a 6214 elif extra_tabs and selected_tab_id != 'videos':
86973308 6215 # When there are shorts/live tabs but not videos tab
bd7e919a 6216 url, data = f'{pre}{post}', None
86973308
M
6217
6218 elif (original_tab_id or 'videos') != selected_tab_id:
6219 if original_tab_id == 'live':
6220 # Live tab should have redirected to the video
6221 # Except in the case the channel has an actual live tab
6222 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6223 raise UserNotLive(video_id=item_id)
86973308
M
6224 elif selected_tab_name:
6225 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6226
6227 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6228 url = f'{pre}{post}'
18db7548 6229
358de58c 6230 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6231 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6232 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6233 self._extract_and_report_alerts(data, only_once=True)
86973308 6234
bd7e919a 6235 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6236 if tabs:
bd7e919a 6237 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6238 entries[0].update({
86973308
M
6239 'extractor_key': YoutubeTabIE.ie_key(),
6240 'extractor': YoutubeTabIE.IE_NAME,
6241 'webpage_url': url,
6242 })
bd7e919a 6243 if self.get_param('playlist_items') == '0':
6244 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6245 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6246 entries.extend(map(self._real_extract, extra_tabs))
6247
6248 if len(entries) == 1:
6249 return entries[0]
6250 elif entries:
6251 metadata = self._extract_metadata_from_tabs(item_id, data)
6252 uploads_url = 'the Uploads (UU) playlist URL'
6253 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6254 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6255 self.to_screen(
6256 'Downloading as multiple playlists, separated by tabs. '
6257 f'To download as a single playlist instead, pass {uploads_url}')
6258 return self.playlist_result(entries, item_id, **metadata)
6259
6260 # Inline playlist
37e57a9f 6261 playlist = traverse_obj(
6262 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6263 if playlist:
ac56cf38 6264 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6265
37e57a9f 6266 video_id = traverse_obj(
6267 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6268 if video_id:
bd7e919a 6269 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6270 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6271 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6272
8bdd16b4 6273 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6274
c5e8d7af 6275
8bdd16b4 6276class YoutubePlaylistIE(InfoExtractor):
96565c7e 6277 IE_DESC = 'YouTube playlists'
8bdd16b4 6278 _VALID_URL = r'''(?x)(?:
6279 (?:https?://)?
6280 (?:\w+\.)?
6281 (?:
6282 (?:
6283 youtube(?:kids)?\.com|
d9190e44 6284 %(invidious)s
8bdd16b4 6285 )
6286 /.*?\?.*?\blist=
6287 )?
6288 (?P<id>%(playlist_id)s)
d9190e44
RH
6289 )''' % {
6290 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6291 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6292 }
8bdd16b4 6293 IE_NAME = 'youtube:playlist'
cdc628a4 6294 _TESTS = [{
8bdd16b4 6295 'note': 'issue #673',
6296 'url': 'PLBB231211A4F62143',
cdc628a4 6297 'info_dict': {
8bdd16b4 6298 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6299 'id': 'PLBB231211A4F62143',
976ae3ea 6300 'uploader': 'Wickman',
8bdd16b4 6301 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 6302 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6303 'view_count': int,
86973308 6304 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
976ae3ea 6305 'modified_date': r're:\d{8}',
6306 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6307 'channel': 'Wickman',
6308 'tags': [],
86973308
M
6309 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6310 'availability': 'public',
8bdd16b4 6311 },
6312 'playlist_mincount': 29,
6313 }, {
6314 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6315 'info_dict': {
6316 'title': 'YDL_safe_search',
6317 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6318 },
6319 'playlist_count': 2,
6320 'skip': 'This playlist is private',
9558dcec 6321 }, {
8bdd16b4 6322 'note': 'embedded',
6323 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6324 'playlist_count': 4,
9558dcec 6325 'info_dict': {
8bdd16b4 6326 'title': 'JODA15',
6327 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6328 'uploader': 'milan',
6329 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 6330 'description': '',
6331 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6332 'tags': [],
6333 'modified_date': '20140919',
6334 'view_count': int,
6335 'channel': 'milan',
6336 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6337 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
c26f9b99 6338 'availability': 'public',
976ae3ea 6339 },
86973308 6340 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
cdc628a4 6341 }, {
8bdd16b4 6342 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6343 'playlist_mincount': 455,
8bdd16b4 6344 'info_dict': {
6345 'title': '2018 Chinese New Singles (11/6 updated)',
6346 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6347 'uploader': 'LBK',
6348 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 6349 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6350 'channel': 'LBK',
6351 'view_count': int,
6352 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6353 'tags': [],
6354 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6355 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6356 'modified_date': r're:\d{8}',
c26f9b99 6357 'availability': 'public',
976ae3ea 6358 },
6359 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6360 }, {
29f7c58a 6361 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6362 'only_matching': True,
6363 }, {
6364 # music album playlist
6365 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6366 'only_matching': True,
6367 }]
6368
6369 @classmethod
6370 def suitable(cls, url):
201c1459 6371 if YoutubeTabIE.suitable(url):
6372 return False
49a57e70 6373 from ..utils import parse_qs
201c1459 6374 qs = parse_qs(url)
6375 if qs.get('v', [None])[0]:
6376 return False
86e5f3ed 6377 return super().suitable(url)
29f7c58a 6378
6379 def _real_extract(self, url):
6380 playlist_id = self._match_id(url)
46953e7e 6381 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6382 url = update_url_query(
6383 'https://www.youtube.com/playlist',
6384 parse_qs(url) or {'list': playlist_id})
6385 if is_music_url:
6386 url = smuggle_url(url, {'is_music_url': True})
6387 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6388
6389
6390class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6391 IE_DESC = 'youtu.be'
29f7c58a 6392 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6393 _TESTS = [{
8bdd16b4 6394 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6395 'info_dict': {
6396 'id': 'yeWKywCrFtk',
6397 'ext': 'mp4',
6398 'title': 'Small Scale Baler and Braiding Rugs',
6399 'uploader': 'Backus-Page House Museum',
6400 'uploader_id': 'backuspagemuseum',
6401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6402 'upload_date': '20161008',
6403 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6404 'categories': ['Nonprofits & Activism'],
6405 'tags': list,
6406 'like_count': int,
976ae3ea 6407 'age_limit': 0,
6408 'playable_in_embed': True,
6409 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6410 'channel': 'Backus-Page House Museum',
6411 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6412 'live_status': 'not_live',
6413 'view_count': int,
6414 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6415 'availability': 'public',
6416 'duration': 59,
12a1b225
A
6417 'comment_count': int,
6418 'channel_follower_count': int
8bdd16b4 6419 },
6420 'params': {
6421 'noplaylist': True,
6422 'skip_download': True,
6423 },
39e7107d 6424 }, {
8bdd16b4 6425 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6426 'only_matching': True,
cdc628a4
PH
6427 }]
6428
8bdd16b4 6429 def _real_extract(self, url):
5ad28e7f 6430 mobj = self._match_valid_url(url)
29f7c58a 6431 video_id = mobj.group('id')
6432 playlist_id = mobj.group('playlist_id')
8bdd16b4 6433 return self.url_result(
29f7c58a 6434 update_url_query('https://www.youtube.com/watch', {
6435 'v': video_id,
6436 'list': playlist_id,
6437 'feature': 'youtu.be',
6438 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6439
6440
b6ce9bb0 6441class YoutubeLivestreamEmbedIE(InfoExtractor):
6442 IE_DESC = 'YouTube livestream embeds'
6443 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6444 _TESTS = [{
6445 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6446 'only_matching': True,
6447 }]
6448
6449 def _real_extract(self, url):
6450 channel_id = self._match_id(url)
6451 return self.url_result(
6452 f'https://www.youtube.com/channel/{channel_id}/live',
6453 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6454
6455
8bdd16b4 6456class YoutubeYtUserIE(InfoExtractor):
96565c7e 6457 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6458 IE_NAME = 'youtube:user'
8bdd16b4 6459 _VALID_URL = r'ytuser:(?P<id>.+)'
6460 _TESTS = [{
6461 'url': 'ytuser:phihag',
6462 'only_matching': True,
6463 }]
6464
6465 def _real_extract(self, url):
6466 user_id = self._match_id(url)
08270da5 6467 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6468
b05654f0 6469
3d3dddc9 6470class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6471 IE_NAME = 'youtube:favorites'
96565c7e 6472 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6473 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6474 _LOGIN_REQUIRED = True
6475 _TESTS = [{
6476 'url': ':ytfav',
6477 'only_matching': True,
6478 }, {
6479 'url': ':ytfavorites',
6480 'only_matching': True,
6481 }]
6482
6483 def _real_extract(self, url):
6484 return self.url_result(
6485 'https://www.youtube.com/playlist?list=LL',
6486 ie=YoutubeTabIE.ie_key())
6487
6488
ca5300c7 6489class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6490 IE_NAME = 'youtube:notif'
6491 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6492 _VALID_URL = r':ytnotif(?:ication)?s?'
6493 _LOGIN_REQUIRED = True
6494 _TESTS = [{
6495 'url': ':ytnotif',
6496 'only_matching': True,
6497 }, {
6498 'url': ':ytnotifications',
6499 'only_matching': True,
6500 }]
6501
6502 def _extract_notification_menu(self, response, continuation_list):
6503 notification_list = traverse_obj(
6504 response,
6505 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6506 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6507 expected_type=list) or []
6508 continuation_list[0] = None
6509 for item in notification_list:
6510 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6511 if entry:
6512 yield entry
6513 continuation = item.get('continuationItemRenderer')
6514 if continuation:
6515 continuation_list[0] = continuation
6516
6517 def _extract_notification_renderer(self, notification):
6518 video_id = traverse_obj(
6519 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6520 url = f'https://www.youtube.com/watch?v={video_id}'
6521 channel_id = None
6522 if not video_id:
6523 browse_ep = traverse_obj(
6524 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6525 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6526 post_id = self._search_regex(
6527 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6528 'post id', default=None)
6529 if not channel_id or not post_id:
6530 return
6531 # The direct /post url redirects to this in the browser
6532 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6533
6534 channel = traverse_obj(
6535 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6536 expected_type=str)
c7a7baaa 6537 notification_title = self._get_text(notification, 'shortMessage')
6538 if notification_title:
6539 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6540 # TODO: handle recommended videos
ca5300c7 6541 title = self._search_regex(
c7a7baaa 6542 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6543 'video title', default=None)
5225df50 6544 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6545 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6546 else None)
ca5300c7 6547 return {
6548 '_type': 'url',
6549 'url': url,
6550 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6551 'video_id': video_id,
6552 'title': title,
6553 'channel_id': channel_id,
6554 'channel': channel,
6555 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6556 'timestamp': timestamp,
ca5300c7 6557 }
6558
6559 def _notification_menu_entries(self, ytcfg):
6560 continuation_list = [None]
6561 response = None
6562 for page in itertools.count(1):
6563 ctoken = traverse_obj(
6564 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6565 response = self._extract_response(
6566 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6567 ep='notification/get_notification_menu', check_get_keys='actions',
6568 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6569 yield from self._extract_notification_menu(response, continuation_list)
6570 if not continuation_list[0]:
6571 break
6572
6573 def _real_extract(self, url):
6574 display_id = 'notifications'
6575 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6576 self._report_playlist_authcheck(ytcfg)
6577 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6578
6579
a6213a49 6580class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6581 IE_DESC = 'YouTube search'
78caa52a 6582 IE_NAME = 'youtube:search'
b05654f0 6583 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6584 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6585 _TESTS = [{
6586 'url': 'ytsearch5:youtube-dl test video',
6587 'playlist_count': 5,
6588 'info_dict': {
6589 'id': 'youtube-dl test video',
6590 'title': 'youtube-dl test video',
6591 }
6592 }]
b05654f0 6593
a61fd4cf 6594
5f7cb91a 6595class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6596 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6597 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6598 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6599 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6600 _TESTS = [{
6601 'url': 'ytsearchdate5:youtube-dl test video',
6602 'playlist_count': 5,
6603 'info_dict': {
6604 'id': 'youtube-dl test video',
6605 'title': 'youtube-dl test video',
6606 }
6607 }]
75dff0ee 6608
c9ae7b95 6609
a6213a49 6610class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6611 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6612 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6613 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6614 _TESTS = [{
6615 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6616 'playlist_mincount': 5,
6617 'info_dict': {
11f9be09 6618 'id': 'youtube-dl test video',
3462ffa8 6619 'title': 'youtube-dl test video',
6620 }
a61fd4cf 6621 }, {
6622 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6623 'playlist_mincount': 5,
6624 'info_dict': {
6625 'id': 'python',
6626 'title': 'python',
6627 }
ad210f4f 6628 }, {
6629 'url': 'https://www.youtube.com/results?search_query=%23cats',
6630 'playlist_mincount': 1,
6631 'info_dict': {
6632 'id': '#cats',
6633 'title': '#cats',
12a1b225
A
6634 # The test suite does not have support for nested playlists
6635 # 'entries': [{
6636 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6637 # 'title': '#cats',
6638 # }],
ad210f4f 6639 },
c7335551
M
6640 }, {
6641 # Channel results
6642 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6643 'info_dict': {
6644 'id': 'kurzgesagt',
6645 'title': 'kurzgesagt',
6646 },
6647 'playlist': [{
6648 'info_dict': {
6649 '_type': 'url',
6650 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6651 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6652 'ie_key': 'YoutubeTab',
6653 'channel': 'Kurzgesagt – In a Nutshell',
6654 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6655 'title': 'Kurzgesagt – In a Nutshell',
6656 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6657 'playlist_count': int, # XXX: should have a way of saying > 1
6658 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6659 'thumbnails': list
6660 }
6661 }],
6662 'params': {'extract_flat': True, 'playlist_items': '1'},
6663 'playlist_mincount': 1,
3462ffa8 6664 }, {
6665 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6666 'only_matching': True,
6667 }]
6668
6669 def _real_extract(self, url):
4dfbf869 6670 qs = parse_qs(url)
386e1dd9 6671 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6672 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6673
6674
16aa9ea4 6675class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6676 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6677 IE_NAME = 'youtube:music:search_url'
6678 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6679 _TESTS = [{
6680 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6681 'playlist_count': 16,
6682 'info_dict': {
6683 'id': 'royalty free music',
6684 'title': 'royalty free music',
6685 }
6686 }, {
6687 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6688 'playlist_mincount': 30,
6689 'info_dict': {
6690 'id': 'royalty free music - songs',
6691 'title': 'royalty free music - songs',
6692 },
6693 'params': {'extract_flat': 'in_playlist'}
6694 }, {
6695 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6696 'playlist_mincount': 30,
6697 'info_dict': {
6698 'id': 'royalty free music - community playlists',
6699 'title': 'royalty free music - community playlists',
6700 },
6701 'params': {'extract_flat': 'in_playlist'}
6702 }]
6703
6704 _SECTIONS = {
6705 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6706 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6707 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6708 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6709 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6710 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6711 }
6712
6713 def _real_extract(self, url):
6714 qs = parse_qs(url)
6715 query = (qs.get('search_query') or qs.get('q'))[0]
6716 params = qs.get('sp', (None,))[0]
6717 if params:
6718 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6719 else:
ac668111 6720 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6721 params = self._SECTIONS.get(section)
6722 if not params:
6723 section = None
6724 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6725 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6726
6727
182bda88 6728class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6729 """
25f14e9f 6730 Base class for feed extractors
82d02080 6731 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6732 """
b2e8bc1b 6733 _LOGIN_REQUIRED = True
82d02080 6734 _FEED_NAME = 'feeds'
a25bca9f 6735
6736 def _real_initialize(self):
6737 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6738
82d02080 6739 @classproperty
d7ae0639 6740 def IE_NAME(self):
82d02080 6741 return f'youtube:{self._FEED_NAME}'
04cc9617 6742
3853309f 6743 def _real_extract(self, url):
3d3dddc9 6744 return self.url_result(
182bda88 6745 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6746
6747
ef2f3c7f 6748class YoutubeWatchLaterIE(InfoExtractor):
6749 IE_NAME = 'youtube:watchlater'
96565c7e 6750 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6751 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6752 _TESTS = [{
8bdd16b4 6753 'url': ':ytwatchlater',
bc7a9cd8
S
6754 'only_matching': True,
6755 }]
25f14e9f
S
6756
6757 def _real_extract(self, url):
ef2f3c7f 6758 return self.url_result(
6759 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6760
6761
25f14e9f 6762class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6763 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6764 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6765 _FEED_NAME = 'recommended'
45db527f 6766 _LOGIN_REQUIRED = False
3d3dddc9 6767 _TESTS = [{
6768 'url': ':ytrec',
6769 'only_matching': True,
6770 }, {
6771 'url': ':ytrecommended',
6772 'only_matching': True,
6773 }, {
6774 'url': 'https://youtube.com',
6775 'only_matching': True,
6776 }]
1ed5b5c9 6777
1ed5b5c9 6778
25f14e9f 6779class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6780 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6781 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6782 _FEED_NAME = 'subscriptions'
3d3dddc9 6783 _TESTS = [{
6784 'url': ':ytsubs',
6785 'only_matching': True,
6786 }, {
6787 'url': ':ytsubscriptions',
6788 'only_matching': True,
6789 }]
1ed5b5c9 6790
1ed5b5c9 6791
25f14e9f 6792class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6793 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6794 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6795 _FEED_NAME = 'history'
3d3dddc9 6796 _TESTS = [{
6797 'url': ':ythistory',
6798 'only_matching': True,
6799 }]
1ed5b5c9
JMF
6800
6801
6e634cbe 6802class YoutubeStoriesIE(InfoExtractor):
6803 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6804 IE_NAME = 'youtube:stories'
6805 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6806 _TESTS = [{
6807 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6808 'only_matching': True,
6809 }]
6810
6811 def _real_extract(self, url):
6812 playlist_id = f'RLTD{self._match_id(url)}'
6813 return self.url_result(
50ac0e54 6814 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6815 ie=YoutubeTabIE, video_id=playlist_id)
6816
6817
80eb0bd9 6818class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 6819 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 6820 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 6821 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 6822 _TESTS = [{
1dd18a88 6823 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 6824 'only_matching': True,
6825 }]
6826
6827 @staticmethod
6828 def _generate_audio_pivot_params(video_id):
6829 """
6830 Generates sfv_audio_pivot browse params for this video id
6831 """
6832 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6833 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6834
6835 def _real_extract(self, url):
6836 video_id = self._match_id(url)
6837 return self.url_result(
6838 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6839 ie=YoutubeTabIE)
6840
6841
15870e90
PH
6842class YoutubeTruncatedURLIE(InfoExtractor):
6843 IE_NAME = 'youtube:truncated_url'
6844 IE_DESC = False # Do not list
975d35db 6845 _VALID_URL = r'''(?x)
b95aab84
PH
6846 (?:https?://)?
6847 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6848 (?:watch\?(?:
c4808c60 6849 feature=[a-z_]+|
b95aab84
PH
6850 annotation_id=annotation_[^&]+|
6851 x-yt-cl=[0-9]+|
c1708b89 6852 hl=[^&]*|
287be8c6 6853 t=[0-9]+
b95aab84
PH
6854 )?
6855 |
6856 attribution_link\?a=[^&]+
6857 )
6858 $
975d35db 6859 '''
15870e90 6860
c4808c60 6861 _TESTS = [{
2d3d2997 6862 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6863 'only_matching': True,
dc2fc736 6864 }, {
2d3d2997 6865 'url': 'https://www.youtube.com/watch?',
dc2fc736 6866 'only_matching': True,
b95aab84
PH
6867 }, {
6868 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6869 'only_matching': True,
6870 }, {
6871 'url': 'https://www.youtube.com/watch?feature=foo',
6872 'only_matching': True,
c1708b89
PH
6873 }, {
6874 'url': 'https://www.youtube.com/watch?hl=en-GB',
6875 'only_matching': True,
287be8c6
PH
6876 }, {
6877 'url': 'https://www.youtube.com/watch?t=2372',
6878 'only_matching': True,
c4808c60
PH
6879 }]
6880
15870e90
PH
6881 def _real_extract(self, url):
6882 raise ExtractorError(
78caa52a
PH
6883 'Did you forget to quote the URL? Remember that & is a meta '
6884 'character in most shells, so you want to put the URL in quotes, '
3867038a 6885 'like youtube-dl '
2d3d2997 6886 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6887 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6888 expected=True)
772fd5cc
PH
6889
6890
471d0367 6891class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6892 IE_NAME = 'youtube:clip'
471d0367 6893 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6894 _TESTS = [{
6895 # FIXME: Other metadata should be extracted from the clip, not from the base video
6896 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6897 'info_dict': {
6898 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6899 'ext': 'mp4',
6900 'section_start': 29.0,
6901 'section_end': 39.7,
6902 'duration': 10.7,
12a1b225
A
6903 'age_limit': 0,
6904 'availability': 'public',
6905 'categories': ['Gaming'],
6906 'channel': 'Scott The Woz',
6907 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6908 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6909 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6910 'like_count': int,
6911 'playable_in_embed': True,
6912 'tags': 'count:17',
6913 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6914 'title': 'Mobile Games on Console - Scott The Woz',
6915 'upload_date': '20210920',
6916 'uploader': 'Scott The Woz',
6917 'uploader_id': 'scottthewoz',
6918 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6919 'view_count': int,
6920 'live_status': 'not_live',
6921 'channel_follower_count': int
471d0367 6922 }
6923 }]
3cd786db 6924
6925 def _real_extract(self, url):
471d0367 6926 clip_id = self._match_id(url)
6927 _, data = self._extract_webpage(url, clip_id)
6928
6929 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6930 if not video_id:
6931 raise ExtractorError('Unable to find video ID')
6932
6933 clip_data = traverse_obj(data, (
6934 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6935 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6936 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6937 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6938
6939 return {
6940 '_type': 'url_transparent',
6941 'url': f'https://www.youtube.com/watch?v={video_id}',
6942 'ie_key': YoutubeIE.ie_key(),
6943 'id': clip_id,
6944 'section_start': int(clip_data['startTimeMs']) / 1000,
6945 'section_end': int(clip_data['endTimeMs']) / 1000,
6946 }
3cd786db 6947
6948
b032ff0f 6949class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
6950 IE_NAME = 'youtube:consent'
6951 IE_DESC = False # Do not list
6952 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
6953 _TESTS = [{
6954 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
6955 'info_dict': {
6956 'id': 'qVv6vCqciTM',
6957 'ext': 'mp4',
6958 'age_limit': 0,
6959 'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
6960 'comment_count': int,
6961 'chapters': 'count:13',
6962 'upload_date': '20221223',
6963 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
6964 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
6965 'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
6966 'like_count': int,
6967 'release_date': '20221223',
6968 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
6969 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
6970 'view_count': int,
6971 'playable_in_embed': True,
6972 'duration': 4438,
6973 'availability': 'public',
6974 'channel_follower_count': int,
6975 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
6976 'categories': ['Entertainment'],
6977 'live_status': 'was_live',
6978 'release_timestamp': 1671793345,
6979 'channel': 'さなちゃんねる',
6980 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
6981 'uploader': 'さなちゃんねる',
6982 },
6983 'add_ie': ['Youtube'],
6984 'params': {'skip_download': 'Youtube'},
6985 }]
6986
6987 def _real_extract(self, url):
6988 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
6989 if not redirect_url:
6990 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
6991 return self.url_result(redirect_url)
6992
6993
772fd5cc
PH
6994class YoutubeTruncatedIDIE(InfoExtractor):
6995 IE_NAME = 'youtube:truncated_id'
6996 IE_DESC = False # Do not list
b95aab84 6997 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6998
6999 _TESTS = [{
7000 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7001 'only_matching': True,
7002 }]
7003
7004 def _real_extract(self, url):
7005 video_id = self._match_id(url)
7006 raise ExtractorError(
86e5f3ed 7007 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 7008 expected=True)