]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/reddit] Add subreddit as `channel_id` (#5685)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
a4894d3e 3import collections
109dd3b2 4import copy
fe93e2c4 5import datetime
c26f9b99 6import enum
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
720c3099 10import math
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
46383212 14import sys
f8271158 15import threading
8a784c74 16import time
e0df6211 17import traceback
14f25df2 18import urllib.error
ac668111 19import urllib.parse
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
25836db6 22from .openload import PhantomJSwrapper
14f25df2 23from ..compat import functools
545cc85d 24from ..jsinterp import JSInterpreter
4bb4a188 25from ..utils import (
f8271158 26 NO_DEFAULT,
27 ExtractorError,
4d37720a 28 LazyList,
693f0600 29 UserNotLive,
720c3099 30 bug_reports_message,
82d02080 31 classproperty,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
7a32c70d 35 filter_dict,
2d30521a 36 float_or_none,
11f9be09 37 format_field,
ff91cf74 38 get_first,
dd27fd17 39 int_or_none,
641ad5d8 40 is_html,
34921b43 41 join_nonempty,
48416bc4 42 js_to_json,
94278f72 43 mimetype2ext,
9c0d7f49 44 network_exceptions,
11f9be09 45 orderedSet,
6310acf5 46 parse_codecs,
49bd8c66 47 parse_count,
7c80519c 48 parse_duration,
7ea65411 49 parse_iso8601,
4dfbf869 50 parse_qs,
dca3ff4a 51 qualities,
3995d37d 52 remove_start,
cf7e015f 53 smuggle_url,
dbdaaa23 54 str_or_none,
c93d53f5 55 str_to_int,
f3aa3c3f 56 strftime_or_none,
7c365c21 57 traverse_obj,
556dbe7f 58 try_get,
c5e8d7af
PH
59 unescapeHTML,
60 unified_strdate,
f0d785d3 61 unified_timestamp,
cf7e015f 62 unsmuggle_url,
8bdd16b4 63 update_url_query,
21c340b8 64 url_or_none,
fe93e2c4 65 urljoin,
7c365c21 66 variadic,
c5e8d7af
PH
67)
68
962ffcf8 69# any clients starting with _ cannot be explicitly requested by the user
000c15a4 70INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
a0c830f4 76 'clientVersion': '2.20220801.00.00',
000c15a4 77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 86 'clientVersion': '1.20220731.00.00',
000c15a4 87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
a0c830f4 97 'clientVersion': '1.20220727.01.00',
000c15a4 98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
e7e94f2a 102 'web_creator': {
18c7683d 103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
a0c830f4 107 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
000c15a4 112 'android': {
18c7683d 113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
50ac0e54 117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 123 'REQUIRE_JS_PLAYER': False
000c15a4 124 },
125 'android_embedded': {
18c7683d 126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 133 },
134 },
b6de707d 135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
000c15a4 137 },
138 'android_music': {
18c7683d 139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
a0c830f4 143 'clientVersion': '5.16.51',
50ac0e54 144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 149 'REQUIRE_JS_PLAYER': False
000c15a4 150 },
e7e94f2a 151 'android_creator': {
18c7683d 152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
50ac0e54 156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
18c7683d 164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 166 'ios': {
18c7683d 167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
224b5a35 171 'clientVersion': '17.33.2',
18c7683d 172 'deviceModel': 'iPhone14,3',
224b5a35 173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 174 }
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_embedded': {
000c15a4 180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 183 'clientVersion': '17.33.2',
18c7683d 184 'deviceModel': 'iPhone14,3',
224b5a35 185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 186 },
187 },
b6de707d 188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
000c15a4 190 },
191 'ios_music': {
18c7683d 192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
224b5a35
SF
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 199 },
200 },
b6de707d 201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
000c15a4 203 },
e7e94f2a
D
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
224b5a35
SF
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
211 },
212 },
b6de707d 213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
e7e94f2a 215 },
3619f78d 216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 218 'mweb': {
18c7683d 219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
a0c830f4 223 'clientVersion': '2.20220801.00.00',
000c15a4 224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
000c15a4 240}
241
242
e7870111
D
243def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
000c15a4 251def build_innertube_clients():
2e4cacd0 252 THIRD_PARTY = {
e7870111 253 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 254 }
e7870111 255 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 256 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 257
258 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 259 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 260 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 261 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 262 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 263
e7870111 264 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 265 ytcfg['priority'] = 10 * priority(base_client)
266
e48b3875 267 if not variant:
e7870111
D
268 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
269 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
270 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
271 embedscreen['priority'] -= 3
272 elif variant == 'embedded':
e48b3875 273 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 274 ytcfg['priority'] -= 2
e48b3875 275 else:
000c15a4 276 ytcfg['priority'] -= 3
277
278
279build_innertube_clients()
280
281
c26f9b99 282class BadgeType(enum.Enum):
283 AVAILABILITY_UNLISTED = enum.auto()
284 AVAILABILITY_PRIVATE = enum.auto()
285 AVAILABILITY_PUBLIC = enum.auto()
286 AVAILABILITY_PREMIUM = enum.auto()
287 AVAILABILITY_SUBSCRIPTION = enum.auto()
288 LIVE_NOW = enum.auto()
289
290
de7f3446 291class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 292 """Provide base functions for Youtube extractors"""
e00eb564 293
3462ffa8 294 _RESERVED_NAMES = (
08e29b9f 295 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
182bda88 296 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
1dd18a88 297 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
0a5095fe 298 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
3462ffa8 299
3619f78d 300 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
301
52efa4b3 302 # _NETRC_MACHINE = 'youtube'
3619f78d 303
b2e8bc1b
JMF
304 # If True it will raise an error if no login info is provided
305 _LOGIN_REQUIRED = False
306
d9190e44
RH
307 _INVIDIOUS_SITES = (
308 # invidious-redirect websites
309 r'(?:www\.)?redirect\.invidious\.io',
310 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 311 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
312 r'(?:www\.)?invidious\.pussthecat\.org',
313 r'(?:www\.)?invidious\.zee\.li',
314 r'(?:www\.)?invidious\.ethibox\.fr',
315 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
316 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
317 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
318 # youtube-dl invidious instances list
319 r'(?:(?:www|no)\.)?invidiou\.sh',
320 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
321 r'(?:www\.)?invidious\.kabi\.tk',
322 r'(?:www\.)?invidious\.mastodon\.host',
323 r'(?:www\.)?invidious\.zapashcanon\.fr',
324 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
325 r'(?:www\.)?invidious\.tinfoil-hat\.net',
326 r'(?:www\.)?invidious\.himiko\.cloud',
327 r'(?:www\.)?invidious\.reallyancient\.tech',
328 r'(?:www\.)?invidious\.tube',
329 r'(?:www\.)?invidiou\.site',
330 r'(?:www\.)?invidious\.site',
331 r'(?:www\.)?invidious\.xyz',
332 r'(?:www\.)?invidious\.nixnet\.xyz',
333 r'(?:www\.)?invidious\.048596\.xyz',
334 r'(?:www\.)?invidious\.drycat\.fr',
335 r'(?:www\.)?inv\.skyn3t\.in',
336 r'(?:www\.)?tube\.poal\.co',
337 r'(?:www\.)?tube\.connect\.cafe',
338 r'(?:www\.)?vid\.wxzm\.sx',
339 r'(?:www\.)?vid\.mint\.lgbt',
340 r'(?:www\.)?vid\.puffyan\.us',
341 r'(?:www\.)?yewtu\.be',
342 r'(?:www\.)?yt\.elukerio\.org',
343 r'(?:www\.)?yt\.lelux\.fi',
344 r'(?:www\.)?invidious\.ggc-project\.de',
345 r'(?:www\.)?yt\.maisputain\.ovh',
346 r'(?:www\.)?ytprivate\.com',
347 r'(?:www\.)?invidious\.13ad\.de',
348 r'(?:www\.)?invidious\.toot\.koeln',
349 r'(?:www\.)?invidious\.fdn\.fr',
350 r'(?:www\.)?watch\.nettohikari\.com',
351 r'(?:www\.)?invidious\.namazso\.eu',
352 r'(?:www\.)?invidious\.silkky\.cloud',
353 r'(?:www\.)?invidious\.exonip\.de',
354 r'(?:www\.)?invidious\.riverside\.rocks',
355 r'(?:www\.)?invidious\.blamefran\.net',
356 r'(?:www\.)?invidious\.moomoo\.de',
357 r'(?:www\.)?ytb\.trom\.tf',
358 r'(?:www\.)?yt\.cyberhost\.uk',
359 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
360 r'(?:www\.)?qklhadlycap4cnod\.onion',
361 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
362 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
363 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
364 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
365 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
366 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
367 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
368 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
369 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
370 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
371 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
372 r'(?:www\.)?piped\.kavin\.rocks',
d1c4f6d4 373 r'(?:www\.)?piped\.tokhmi\.xyz',
e14ea7fb 374 r'(?:www\.)?piped\.syncpundit\.io',
d1c4f6d4 375 r'(?:www\.)?piped\.mha\.fi',
e14ea7fb
BG
376 r'(?:www\.)?watch\.whatever\.social',
377 r'(?:www\.)?piped\.garudalinux\.org',
378 r'(?:www\.)?piped\.rivo\.lol',
379 r'(?:www\.)?piped-libre\.kavin\.rocks',
380 r'(?:www\.)?yt\.jae\.fi',
d1c4f6d4 381 r'(?:www\.)?piped\.mint\.lgbt',
e14ea7fb
BG
382 r'(?:www\.)?il\.ax',
383 r'(?:www\.)?piped\.esmailelbob\.xyz',
384 r'(?:www\.)?piped\.projectsegfau\.lt',
385 r'(?:www\.)?piped\.privacydev\.net',
386 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
387 r'(?:www\.)?piped\.smnz\.de',
388 r'(?:www\.)?piped\.adminforge\.de',
389 r'(?:www\.)?watch\.whatevertinfoil\.de',
390 r'(?:www\.)?piped\.qdi\.fi',
bc87dac7
B
391 r'(?:www\.)?piped\.video',
392 r'(?:www\.)?piped\.aeong\.one',
d9190e44
RH
393 )
394
c26f9b99 395 # extracted from account/account_menu ep
396 # XXX: These are the supported YouTube UI and API languages,
397 # which is slightly different from languages supported for translation in YouTube studio
398 _SUPPORTED_LANG_CODES = [
399 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
400 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
401 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
402 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
403 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
404 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
405 ]
406
a057779d 407 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
408
c26f9b99 409 @functools.cached_property
410 def _preferred_lang(self):
411 """
412 Returns a language code supported by YouTube for the user preferred language.
413 Returns None if no preferred language set.
414 """
415 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
416 if not preferred_lang:
417 return
418 if preferred_lang not in self._SUPPORTED_LANG_CODES:
419 raise ExtractorError(
420 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
421 expected=True)
422 elif preferred_lang != 'en':
423 self.report_warning(
424 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
425 return preferred_lang
426
cce889b9 427 def _initialize_consent(self):
428 cookies = self._get_cookies('https://www.youtube.com/')
429 if cookies.get('__Secure-3PSID'):
430 return
431 consent_id = None
432 consent = cookies.get('CONSENT')
433 if consent:
434 if 'YES' in consent.value:
435 return
436 consent_id = self._search_regex(
437 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
438 if not consent_id:
439 consent_id = random.randint(100, 999)
440 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 441
f3aa3c3f 442 def _initialize_pref(self):
443 cookies = self._get_cookies('https://www.youtube.com/')
444 pref_cookie = cookies.get('PREF')
445 pref = {}
446 if pref_cookie:
447 try:
14f25df2 448 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 449 except ValueError:
450 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
c26f9b99 451 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
14f25df2 452 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 453
b2e8bc1b 454 def _real_initialize(self):
f3aa3c3f 455 self._initialize_pref()
cce889b9 456 self._initialize_consent()
a25bca9f 457 self._check_login_required()
458
459 def _check_login_required(self):
24146491 460 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 461 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 462
b7c47b74 463 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
464 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 465
000c15a4 466 def _get_default_ytcfg(self, client='web'):
467 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 468
000c15a4 469 def _get_innertube_host(self, client='web'):
470 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 471
000c15a4 472 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 473 # try_get but with fallback to default ytcfg client values when present
474 _func = lambda y: try_get(y, getter, expected_type)
475 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
476
000c15a4 477 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 478 return self._ytcfg_get_safe(
479 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 480 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 481
000c15a4 482 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 483 return self._ytcfg_get_safe(
484 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 485 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 486
2ae778b8 487 def _select_api_hostname(self, req_api_hostname, default_client=None):
488 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
489 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
490
000c15a4 491 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 493
000c15a4 494 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 495 context = get_first(
496 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 497 # Enforce language and tz for extraction
498 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
c26f9b99 499 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 500 return context
501
cf87314d 502 _SAPISID = None
503
109dd3b2 504 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 505 time_now = round(time.time())
cf87314d 506 if self._SAPISID is None:
507 yt_cookies = self._get_cookies('https://www.youtube.com')
508 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
509 # See: https://github.com/yt-dlp/yt-dlp/issues/393
510 sapisid_cookie = dict_get(
511 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
512 if sapisid_cookie and sapisid_cookie.value:
513 self._SAPISID = sapisid_cookie.value
514 self.write_debug('Extracted SAPISID cookie')
515 # SAPISID cookie is required if not already present
516 if not yt_cookies.get('SAPISID'):
517 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
518 self._set_cookie(
519 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
520 else:
521 self._SAPISID = False
522 if not self._SAPISID:
523 return None
1974e99f 524 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
525 sapisidhash = hashlib.sha1(
86e5f3ed 526 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 527 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
528
529 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 530 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 531 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 532
109dd3b2 533 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 534 data.update(query)
11f9be09 535 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 536 real_headers.update({'content-type': 'application/json'})
537 if headers:
538 real_headers.update(headers)
2ae778b8 539 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
540 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 541 return self._download_json(
2ae778b8 542 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 543 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 544 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 545 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 546
65141660 547 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
548 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 549
99e9e001 550 @staticmethod
551 def _extract_session_index(*data):
552 """
553 Index of current account in account list.
554 See: https://github.com/yt-dlp/yt-dlp/pull/519
555 """
556 for ytcfg in data:
557 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
558 if session_index is not None:
559 return session_index
560
561 # Deprecated?
562 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 563 if ytcfg:
14f25df2 564 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
565 if token:
566 return token
99e9e001 567 if webpage:
568 return self._search_regex(
569 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
570 'identity token', default=None, fatal=False)
a1c5d2ca
M
571
572 @staticmethod
fe93e2c4 573 def _extract_account_syncid(*args):
8ea3f7b9 574 """
575 Extract syncId required to download private playlists of secondary channels
fe93e2c4 576 @params response and/or ytcfg
8ea3f7b9 577 """
fe93e2c4 578 for data in args:
579 # ytcfg includes channel_syncid if on secondary channel
14f25df2 580 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 581 if delegated_sid:
582 return delegated_sid
583 sync_ids = (try_get(
584 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 585 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 586 if len(sync_ids) >= 2 and sync_ids[1]:
587 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
588 # and just "user_syncid||" for primary channel. We only want the channel_syncid
589 return sync_ids[0]
a1c5d2ca 590
ac56cf38 591 @staticmethod
592 def _extract_visitor_data(*args):
593 """
594 Extracts visitorData from an API response or ytcfg
595 Appears to be used to track session state
596 """
9222c381 597 return get_first(
6c73052c 598 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 599 expected_type=str)
ac56cf38 600
2762dbb1 601 @functools.cached_property
99e9e001 602 def is_authenticated(self):
603 return bool(self._generate_sapisidhash_header())
604
11f9be09 605 def extract_ytcfg(self, video_id, webpage):
8c54a305 606 if not webpage:
607 return {}
29f7c58a 608 return self._parse_json(
609 self._search_regex(
610 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 611 default='{}'), video_id, fatal=False) or {}
612
11f9be09 613 def generate_api_headers(
99e9e001 614 self, *, ytcfg=None, account_syncid=None, session_index=None,
615 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
616
2ae778b8 617 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 618 headers = {
14f25df2 619 'X-YouTube-Client-Name': str(
11f9be09 620 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
621 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 622 'Origin': origin,
623 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
624 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 625 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
626 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 627 }
628 if session_index is None:
314ee305 629 session_index = self._extract_session_index(ytcfg)
630 if account_syncid or session_index is not None:
631 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 632
109dd3b2 633 auth = self._generate_sapisidhash_header(origin)
f4f751af 634 if auth is not None:
635 headers['Authorization'] = auth
109dd3b2 636 headers['X-Origin'] = origin
7a32c70d 637 return filter_dict(headers)
29f7c58a 638
a25bca9f 639 def _download_ytcfg(self, client, video_id):
640 url = {
641 'web': 'https://www.youtube.com',
642 'web_music': 'https://music.youtube.com',
643 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
644 }.get(client)
645 if not url:
646 return {}
647 webpage = self._download_webpage(
648 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
649 return self.extract_ytcfg(video_id, webpage) or {}
650
2d6659b9 651 @staticmethod
652 def _build_api_continuation_query(continuation, ctp=None):
653 query = {
654 'continuation': continuation
655 }
656 # TODO: Inconsistency with clickTrackingParams.
657 # Currently we have a fixed ctp contained within context (from ytcfg)
658 # and a ctp in root query for continuation.
659 if ctp:
660 query['clickTracking'] = {'clickTrackingParams': ctp}
661 return query
662
2d6659b9 663 @classmethod
664 def _extract_next_continuation_data(cls, renderer):
665 next_continuation = try_get(
666 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
667 lambda x: x['continuation']['reloadContinuationData']), dict)
668 if not next_continuation:
669 return
670 continuation = next_continuation.get('continuation')
671 if not continuation:
672 return
673 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 674 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 675
676 @classmethod
677 def _extract_continuation_ep_data(cls, continuation_ep: dict):
678 if isinstance(continuation_ep, dict):
679 continuation = try_get(
14f25df2 680 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 681 if not continuation:
682 return
683 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 684 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 685
686 @classmethod
687 def _extract_continuation(cls, renderer):
688 next_continuation = cls._extract_next_continuation_data(renderer)
689 if next_continuation:
690 return next_continuation
fe93e2c4 691
7a32c70d 692 return traverse_obj(renderer, (
693 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
694 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
695 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
2d6659b9 696
fe93e2c4 697 @classmethod
698 def _extract_alerts(cls, data):
109dd3b2 699 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
700 if not isinstance(alert_dict, dict):
701 continue
702 for alert in alert_dict.values():
703 alert_type = alert.get('type')
704 if not alert_type:
705 continue
052e1350 706 message = cls._get_text(alert, 'text')
109dd3b2 707 if message:
708 yield alert_type, message
709
c0ac49bc 710 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
a057779d 711 errors, warnings = [], []
109dd3b2 712 for alert_type, alert_message in alerts:
641ad5d8 713 if alert_type.lower() == 'error' and fatal:
109dd3b2 714 errors.append([alert_type, alert_message])
a057779d 715 elif alert_message not in self._IGNORED_WARNINGS:
109dd3b2 716 warnings.append([alert_type, alert_message])
717
718 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 719 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 720 if errors:
721 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
722
723 def _extract_and_report_alerts(self, data, *args, **kwargs):
724 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
725
47193e02 726 def _extract_badges(self, renderer: dict):
c26f9b99 727 privacy_icon_map = {
728 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
729 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
730 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
731 }
732
733 badge_style_map = {
734 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
735 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
736 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
737 }
738
739 label_map = {
740 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
741 'private': BadgeType.AVAILABILITY_PRIVATE,
742 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
743 'live': BadgeType.LIVE_NOW,
744 'premium': BadgeType.AVAILABILITY_PREMIUM
745 }
746
747 badges = []
748 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
749 badge_type = (
750 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
751 or badge_style_map.get(traverse_obj(badge, 'style'))
752 )
753 if badge_type:
754 badges.append({'type': badge_type})
755 continue
756
757 # fallback, won't work in some languages
758 label = traverse_obj(badge, 'label', expected_type=str, default='')
759 for match, label_badge_type in label_map.items():
760 if match in label.lower():
761 badges.append({'type': badge_type})
762 continue
763
47193e02 764 return badges
765
c26f9b99 766 @staticmethod
767 def _has_badge(badges, badge_type):
768 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
769
47193e02 770 @staticmethod
052e1350 771 def _get_text(data, *path_list, max_runs=None):
772 for path in path_list or [None]:
773 if path is None:
774 obj = [data]
775 else:
776 obj = traverse_obj(data, path, default=[])
777 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
778 obj = [obj]
779 for item in obj:
14f25df2 780 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 781 if text:
782 return text
783 runs = try_get(item, lambda x: x['runs'], list) or []
784 if not runs and isinstance(item, list):
785 runs = item
786
787 runs = runs[:min(len(runs), max_runs or len(runs))]
788 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
789 if text:
790 return text
47193e02 791
f0d785d3 792 def _get_count(self, data, *path_list):
793 count_text = self._get_text(data, *path_list) or ''
794 count = parse_count(count_text)
795 if count is None:
796 count = str_to_int(
797 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
798 return count
799
a709d873 800 @staticmethod
801 def _extract_thumbnails(data, *path_list):
802 """
803 Extract thumbnails from thumbnails dict
804 @param path_list: path list to level that contains 'thumbnails' key
805 """
806 thumbnails = []
807 for path in path_list or [()]:
808 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
809 thumbnail_url = url_or_none(thumbnail.get('url'))
810 if not thumbnail_url:
811 continue
812 # Sometimes youtube gives a wrong thumbnail URL. See:
813 # https://github.com/yt-dlp/yt-dlp/issues/233
814 # https://github.com/ytdl-org/youtube-dl/issues/28023
815 if 'maxresdefault' in thumbnail_url:
816 thumbnail_url = thumbnail_url.split('?')[0]
817 thumbnails.append({
818 'url': thumbnail_url,
819 'height': int_or_none(thumbnail.get('height')),
820 'width': int_or_none(thumbnail.get('width')),
821 })
822 return thumbnails
823
f3aa3c3f 824 @staticmethod
825 def extract_relative_time(relative_time_text):
826 """
827 Extracts a relative time from string and converts to dt object
f0d785d3 828 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 829 """
f0d785d3 830 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 831 if mobj:
f0d785d3 832 start = mobj.group('start')
833 if start:
834 return datetime_from_str(start)
f3aa3c3f 835 try:
f0d785d3 836 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 837 except ValueError:
838 return None
839
c26f9b99 840 def _parse_time_text(self, text):
841 if not text:
842 return
f3aa3c3f 843 dt = self.extract_relative_time(text)
844 timestamp = None
845 if isinstance(dt, datetime.datetime):
846 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 847
848 if timestamp is None:
849 timestamp = (
850 unified_timestamp(text) or unified_timestamp(
851 self._search_regex(
17322130 852 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 853 text.lower(), 'time text', default=None)))
f0d785d3 854
c26f9b99 855 if text and timestamp is None and self._preferred_lang in (None, 'en'):
856 self.report_warning(
857 f'Cannot parse localized time text "{text}"', only_once=True)
858 return timestamp
f3aa3c3f 859
109dd3b2 860 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
861 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 862 default_client='web'):
be5c1ae8 863 for retry in self.RetryManager():
109dd3b2 864 try:
865 response = self._call_api(
866 ep=ep, fatal=True, headers=headers,
be5c1ae8 867 video_id=item_id, query=query, note=note,
109dd3b2 868 context=self._extract_context(ytcfg, default_client),
869 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 870 api_hostname=api_hostname, default_client=default_client)
109dd3b2 871 except ExtractorError as e:
be5c1ae8 872 if not isinstance(e.cause, network_exceptions):
873 return self._error_or_warning(e, fatal=fatal)
874 elif not isinstance(e.cause, urllib.error.HTTPError):
875 retry.error = e
876 continue
109dd3b2 877
be5c1ae8 878 first_bytes = e.cause.read(512)
879 if not is_html(first_bytes):
880 yt_error = try_get(
881 self._parse_json(
882 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
883 lambda x: x['error']['message'], str)
884 if yt_error:
885 self._report_alerts([('ERROR', yt_error)], fatal=False)
886 # Downloading page may result in intermittent 5xx HTTP error
887 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
888 # We also want to catch all other network exceptions since errors in later pages can be troublesome
889 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
890 if e.cause.code not in (403, 429):
891 retry.error = e
892 continue
893 return self._error_or_warning(e, fatal=fatal)
894
895 try:
896 self._extract_and_report_alerts(response, only_once=True)
897 except ExtractorError as e:
898 # YouTube servers may return errors we want to retry on in a 200 OK response
899 # See: https://github.com/yt-dlp/yt-dlp/issues/839
900 if 'unknown error' in e.msg.lower():
901 retry.error = e
902 continue
903 return self._error_or_warning(e, fatal=fatal)
904 # Youtube sometimes sends incomplete data
905 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
906 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 907 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 908 continue
909
910 return response
109dd3b2 911
9297939e 912 @staticmethod
913 def is_music_url(url):
914 return re.match(r'https?://music\.youtube\.com/', url) is not None
915
30a074c2 916 def _extract_video(self, renderer):
917 video_id = renderer.get('videoId')
4dc23a80
M
918
919 reel_header_renderer = traverse_obj(renderer, (
920 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
921 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
922
923 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
052e1350 924 description = self._get_text(renderer, 'descriptionSnippet')
6141346d
M
925
926 duration = int_or_none(renderer.get('lengthSeconds'))
927 if duration is None:
928 duration = parse_duration(self._get_text(
929 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 930 if duration is None:
4dc23a80 931 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1c1b2f96 932 duration = parse_duration(self._search_regex(
933 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
934 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
935 video_id, default=None, group='duration'))
936
f3aa3c3f 937 channel_id = traverse_obj(
a44ca5a4 938 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
939 expected_type=str, get_all=False)
4dc23a80
M
940 if not channel_id:
941 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
942
f3aa3c3f 943 overlay_style = traverse_obj(
a44ca5a4 944 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
945 get_all=False, expected_type=str)
f3aa3c3f 946 badges = self._extract_badges(renderer)
4dc23a80 947
fd2ad7cb 948 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 949 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
950 expected_type=str)) or ''
fd2ad7cb 951 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 952 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 953 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 954
4dc23a80
M
955 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
956 or self._get_text(reel_header_renderer, 'timestampText') or '')
957 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
958
867c66ff
M
959 live_status = (
960 'is_upcoming' if scheduled_timestamp is not None
961 else 'was_live' if 'streamed' in time_text.lower()
962 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
963 else None)
964
4dc23a80
M
965 # videoInfo is a string like '50K views • 10 years ago'.
966 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
967 view_count = (0 if 'no views' in view_count_text.lower()
968 else self._get_count({'simpleText': view_count_text}))
969 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
970
30a074c2 971 return {
39ed931e 972 '_type': 'url',
30a074c2 973 'ie_key': YoutubeIE.ie_key(),
974 'id': video_id,
fd2ad7cb 975 'url': url,
30a074c2 976 'title': title,
977 'description': description,
978 'duration': duration,
f3aa3c3f 979 'channel_id': channel_id,
4dc23a80
M
980 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
981 or self._get_text(reel_header_renderer, 'channelTitleText')),
982 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
983 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5225df50 984 'timestamp': (self._parse_time_text(time_text)
985 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
986 else None),
f3aa3c3f 987 'release_timestamp': scheduled_timestamp,
c26f9b99 988 'availability':
989 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
990 else self._availability(
991 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
992 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
993 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
867c66ff 994 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
4dc23a80 995 view_count_field: view_count,
e63faa10 996 'live_status': live_status
30a074c2 997 }
998
0c148415 999
360e1ca5 1000class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 1001 IE_DESC = 'YouTube'
cb7dfeea 1002 _VALID_URL = r"""(?x)^
c5e8d7af 1003 (
edb53e2d 1004 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 1005 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1006 (?:www\.)?deturl\.com/www\.youtube\.com|
1007 (?:www\.)?pwnyoutube\.com|
1008 (?:www\.)?hooktube\.com|
1009 (?:www\.)?yourepeat\.com|
1010 tube\.majestyc\.net|
1011 %(invidious)s|
1012 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
1013 (?:.*?\#/)? # handle anchor (#/) redirect urls
1014 (?: # the various things that can precede the ID:
b6ce9bb0 1015 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 1016 |(?: # or the v= param in all its forms
f7000f3a 1017 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 1018 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 1019 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
1020 v=
1021 )
f4b05232 1022 ))
cbaed4bb
S
1023 |(?:
1024 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
1025 vid\.plus| # or vid.plus/xxxx
1026 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 1027 %(invidious)s
cbaed4bb 1028 )/
edb53e2d 1029 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1030 )
c5e8d7af 1031 )? # all until now is optional -> you can pass the naked ID
201c1459 1032 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1033 (?(1).+)? # if we found the ID, everything can follow
9297939e 1034 (?:\#|$)""" % {
d9190e44 1035 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 1036 }
7c6eb424 1037 _EMBED_REGEX = [
1038 r'''(?x)
1039 (?:
0ca0f881 1040 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
7c6eb424 1041 data-video-url=|
1042 <embed[^>]+?src=|
1043 embedSWF\(?:\s*|
1044 <object[^>]+data=|
1045 new\s+SWFObject\(
1046 )
1047 (["\'])
1048 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1049 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1050 \1''',
1051 # https://wordpress.org/plugins/lazy-load-for-videos/
1052 r'''(?xs)
1053 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1054 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1055 ]
6368e2e6 1056 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
7c6eb424 1057
e40c758c 1058 _PLAYER_INFO_RE = (
cc2db878 1059 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1060 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1061 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1062 )
2c62dc26 1063 _formats = {
c2d3cb4c 1064 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1065 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1066 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1067 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1068 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1069 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1070 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1071 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1072 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1073 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1074 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1075 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1076 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1077 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1078 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1079 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1080 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1081 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1082
1083
1084 # 3D videos
c2d3cb4c 1085 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1086 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1087 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1088 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1089 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1090 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1091 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1092
96fb5605 1093 # Apple HTTP Live Streaming
11f12195 1094 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1095 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1096 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1097 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1098 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1099 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1100 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1101 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1102
1103 # DASH mp4 video
d23028a8
S
1104 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1106 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1107 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1109 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1110 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1111 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1112 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1113 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1114 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1115 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1116
f6f1fc92 1117 # Dash mp4 audio
d23028a8
S
1118 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1119 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1120 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1121 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1122 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1123 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1124 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1125
1126 # Dash webm
d23028a8
S
1127 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1131 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1132 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1133 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1134 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1139 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1140 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1142 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1143 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1144 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1145 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1146 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1147 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1148 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1149
1150 # Dash webm audio
d23028a8
S
1151 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1152 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1153
0857baad 1154 # Dash webm audio with opus inside
d23028a8
S
1155 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1156 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1157 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1158
ce6b9a2d
PH
1159 # RTMP (unnamed)
1160 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1161
1162 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1163 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1164 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1165 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1166 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1167 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1168 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1169 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1170 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1171 }
29f7c58a 1172 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1173
fd5c4aab
S
1174 _GEO_BYPASS = False
1175
78caa52a 1176 IE_NAME = 'youtube'
2eb88d95
PH
1177 _TESTS = [
1178 {
2d3d2997 1179 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1180 'info_dict': {
1181 'id': 'BaW_jenozKc',
1182 'ext': 'mp4',
3867038a 1183 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1184 'uploader': 'Philipp Hagemeister',
1185 'uploader_id': 'phihag',
ec85ded8 1186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1187 'channel': 'Philipp Hagemeister',
dd4c4492
S
1188 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1189 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1190 'upload_date': '20121002',
ff9f925b 1191 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1192 'categories': ['Science & Technology'],
3867038a 1193 'tags': ['youtube-dl'],
556dbe7f 1194 'duration': 10,
dbdaaa23 1195 'view_count': int,
3e7c1224 1196 'like_count': int,
ff9f925b 1197 'availability': 'public',
1198 'playable_in_embed': True,
1199 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1200 'live_status': 'not_live',
1201 'age_limit': 0,
7c80519c 1202 'start_time': 1,
297a564b 1203 'end_time': 9,
12a1b225 1204 'comment_count': int,
6c73052c 1205 'channel_follower_count': int
2eb88d95 1206 }
0e853ca4 1207 },
fccd3771 1208 {
4bc3a23e
PH
1209 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1210 'note': 'Embed-only video (#1746)',
1211 'info_dict': {
1212 'id': 'yZIXLfi8CZQ',
1213 'ext': 'mp4',
1214 'upload_date': '20120608',
1215 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1216 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1217 'uploader': 'SET India',
94bfcd23 1218 'uploader_id': 'setindia',
ec85ded8 1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1220 'age_limit': 18,
545cc85d 1221 },
1222 'skip': 'Private video',
fccd3771 1223 },
11b56058 1224 {
8bdd16b4 1225 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1226 'note': 'Use the first video ID in the URL',
1227 'info_dict': {
1228 'id': 'BaW_jenozKc',
1229 'ext': 'mp4',
3867038a 1230 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1231 'uploader': 'Philipp Hagemeister',
1232 'uploader_id': 'phihag',
ec85ded8 1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1234 'channel': 'Philipp Hagemeister',
1235 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1236 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1237 'upload_date': '20121002',
976ae3ea 1238 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1239 'categories': ['Science & Technology'],
3867038a 1240 'tags': ['youtube-dl'],
556dbe7f 1241 'duration': 10,
dbdaaa23 1242 'view_count': int,
11b56058 1243 'like_count': int,
976ae3ea 1244 'availability': 'public',
1245 'playable_in_embed': True,
1246 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1247 'live_status': 'not_live',
1248 'age_limit': 0,
12a1b225 1249 'comment_count': int,
6c73052c 1250 'channel_follower_count': int
34a7de29
S
1251 },
1252 'params': {
1253 'skip_download': True,
1254 },
11b56058 1255 },
dd27fd17 1256 {
2d3d2997 1257 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1258 'note': '256k DASH audio (format 141) via DASH manifest',
1259 'info_dict': {
1260 'id': 'a9LDPn-MO4I',
1261 'ext': 'm4a',
1262 'upload_date': '20121002',
1263 'uploader_id': '8KVIDEO',
ec85ded8 1264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1265 'description': '',
1266 'uploader': '8KVIDEO',
1267 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1268 },
4bc3a23e
PH
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141',
4919603f 1272 },
de3c7fe0 1273 'skip': 'format 141 not served anymore',
dd27fd17 1274 },
8bdd16b4 1275 # DASH manifest with encrypted signature
1276 {
1277 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1278 'info_dict': {
1279 'id': 'IB3lcPjvWLA',
1280 'ext': 'm4a',
1281 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1282 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1283 'duration': 244,
1284 'uploader': 'AfrojackVEVO',
1285 'uploader_id': 'AfrojackVEVO',
1286 'upload_date': '20131011',
cc2db878 1287 'abr': 129.495,
976ae3ea 1288 'like_count': int,
1289 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1290 'playable_in_embed': True,
1291 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1292 'view_count': int,
1293 'track': 'The Spark',
1294 'live_status': 'not_live',
1295 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1296 'channel': 'Afrojack',
1297 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1298 'tags': 'count:19',
1299 'availability': 'public',
1300 'categories': ['Music'],
1301 'age_limit': 0,
1302 'alt_title': 'The Spark',
6c73052c 1303 'channel_follower_count': int
8bdd16b4 1304 },
1305 'params': {
1306 'youtube_include_dash_manifest': True,
1307 'format': '141/bestaudio[ext=m4a]',
1308 },
1309 },
65c2fde2 1310 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1311 {
65c2fde2 1312 'note': 'Embed allowed age-gate video',
2d3d2997 1313 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1314 'info_dict': {
1315 'id': 'HtVdAasjOgU',
1316 'ext': 'mp4',
1317 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1318 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1319 'duration': 142,
c522adb1
JMF
1320 'uploader': 'The Witcher',
1321 'uploader_id': 'WitcherGame',
ec85ded8 1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1323 'upload_date': '20140605',
34952f09 1324 'age_limit': 18,
976ae3ea 1325 'categories': ['Gaming'],
1326 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1327 'availability': 'needs_auth',
1328 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1329 'like_count': int,
1330 'channel': 'The Witcher',
1331 'live_status': 'not_live',
1332 'tags': 'count:17',
1333 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1334 'playable_in_embed': True,
1335 'view_count': int,
6c73052c 1336 'channel_follower_count': int
c522adb1
JMF
1337 },
1338 },
65c2fde2 1339 {
1340 'note': 'Age-gate video with embed allowed in public site',
1341 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1342 'info_dict': {
1343 'id': 'HsUATh_Nc2U',
1344 'ext': 'mp4',
1345 'title': 'Godzilla 2 (Official Video)',
1346 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1347 'upload_date': '20200408',
1348 'uploader_id': 'FlyingKitty900',
1349 'uploader': 'FlyingKitty',
1350 'age_limit': 18,
976ae3ea 1351 'availability': 'needs_auth',
1352 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1353 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1354 'channel': 'FlyingKitty',
1355 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1356 'view_count': int,
1357 'categories': ['Entertainment'],
1358 'live_status': 'not_live',
1359 'tags': ['Flyingkitty', 'godzilla 2'],
1360 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1361 'like_count': int,
1362 'duration': 177,
1363 'playable_in_embed': True,
6c73052c 1364 'channel_follower_count': int
65c2fde2 1365 },
1366 },
1367 {
1368 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1369 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1370 'info_dict': {
1371 'id': 'Tq92D6wQ1mg',
1372 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1373 'ext': 'mp4',
17322130 1374 'upload_date': '20191228',
65c2fde2 1375 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1376 'uploader': 'Projekt Melody',
1377 'description': 'md5:17eccca93a786d51bc67646756894066',
1378 'age_limit': 18,
976ae3ea 1379 'like_count': int,
1380 'availability': 'needs_auth',
1381 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1382 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1383 'view_count': int,
1384 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1385 'channel': 'Projekt Melody',
1386 'live_status': 'not_live',
1387 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1388 'playable_in_embed': True,
1389 'categories': ['Entertainment'],
1390 'duration': 106,
1391 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1392 'comment_count': int,
6c73052c 1393 'channel_follower_count': int
65c2fde2 1394 },
1395 },
1396 {
1397 'note': 'Non-Agegated non-embeddable video',
1398 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1399 'info_dict': {
1400 'id': 'MeJVWBSsPAY',
1401 'ext': 'mp4',
1402 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1403 'uploader': 'Herr Lurik',
1404 'uploader_id': 'st3in234',
1405 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1406 'upload_date': '20130730',
976ae3ea 1407 'track': 'Such mich find mich',
1408 'age_limit': 0,
1409 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1410 'like_count': int,
1411 'playable_in_embed': False,
1412 'creator': 'OOMPH!',
1413 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1414 'view_count': int,
1415 'alt_title': 'Such mich find mich',
1416 'duration': 210,
1417 'channel': 'Herr Lurik',
1418 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1419 'categories': ['Music'],
1420 'availability': 'public',
1421 'uploader_url': 'http://www.youtube.com/user/st3in234',
1422 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1423 'live_status': 'not_live',
1424 'artist': 'OOMPH!',
6c73052c 1425 'channel_follower_count': int
65c2fde2 1426 },
1427 },
1428 {
1429 'note': 'Non-bypassable age-gated video',
1430 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1431 'only_matching': True,
1432 },
8bdd16b4 1433 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1434 # YouTube Red ad is not captured for creator
1435 {
1436 'url': '__2ABJjxzNo',
1437 'info_dict': {
1438 'id': '__2ABJjxzNo',
1439 'ext': 'mp4',
1440 'duration': 266,
1441 'upload_date': '20100430',
1442 'uploader_id': 'deadmau5',
1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1444 'creator': 'deadmau5',
1445 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1446 'uploader': 'deadmau5',
1447 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1448 'alt_title': 'Some Chords',
976ae3ea 1449 'availability': 'public',
1450 'tags': 'count:14',
1451 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1452 'view_count': int,
1453 'live_status': 'not_live',
1454 'channel': 'deadmau5',
1455 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1456 'like_count': int,
1457 'track': 'Some Chords',
1458 'artist': 'deadmau5',
1459 'playable_in_embed': True,
1460 'age_limit': 0,
1461 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1462 'categories': ['Music'],
1463 'album': 'Some Chords',
6c73052c 1464 'channel_follower_count': int
8bdd16b4 1465 },
1466 'expected_warnings': [
1467 'DASH manifest missing',
1468 ]
1469 },
067aa17e 1470 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1471 {
1472 'url': 'lqQg6PlCWgI',
1473 'info_dict': {
1474 'id': 'lqQg6PlCWgI',
1475 'ext': 'mp4',
556dbe7f 1476 'duration': 6085,
90227264 1477 'upload_date': '20150827',
cbe2bd91 1478 'uploader_id': 'olympic',
ec85ded8 1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1480 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1481 'uploader': 'Olympics',
cbe2bd91 1482 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1483 'like_count': int,
1484 'release_timestamp': 1343767800,
1485 'playable_in_embed': True,
1486 'categories': ['Sports'],
1487 'release_date': '20120731',
1488 'channel': 'Olympics',
1489 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1490 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1491 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1492 'age_limit': 0,
1493 'availability': 'public',
1494 'live_status': 'was_live',
1495 'view_count': int,
1496 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1497 'channel_follower_count': int
cbe2bd91
PH
1498 },
1499 'params': {
1500 'skip_download': 'requires avconv',
e52a40ab 1501 }
cbe2bd91 1502 },
6271f1ca
PH
1503 # Non-square pixels
1504 {
1505 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1506 'info_dict': {
1507 'id': '_b-2C3KPAM0',
1508 'ext': 'mp4',
1509 'stretched_ratio': 16 / 9.,
556dbe7f 1510 'duration': 85,
6271f1ca
PH
1511 'upload_date': '20110310',
1512 'uploader_id': 'AllenMeow',
ec85ded8 1513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1514 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1515 'uploader': '孫ᄋᄅ',
6271f1ca 1516 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1517 'playable_in_embed': True,
1518 'channel': '孫ᄋᄅ',
1519 'age_limit': 0,
1520 'tags': 'count:11',
1521 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1522 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1523 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1524 'view_count': int,
1525 'categories': ['People & Blogs'],
1526 'like_count': int,
1527 'live_status': 'not_live',
1528 'availability': 'unlisted',
12a1b225 1529 'comment_count': int,
6c73052c 1530 'channel_follower_count': int
6271f1ca 1531 },
06b491eb
S
1532 },
1533 # url_encoded_fmt_stream_map is empty string
1534 {
1535 'url': 'qEJwOuvDf7I',
1536 'info_dict': {
1537 'id': 'qEJwOuvDf7I',
f57b7835 1538 'ext': 'webm',
06b491eb
S
1539 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1540 'description': '',
1541 'upload_date': '20150404',
1542 'uploader_id': 'spbelect',
1543 'uploader': 'Наблюдатели Петербурга',
1544 },
1545 'params': {
1546 'skip_download': 'requires avconv',
e323cf3f
S
1547 },
1548 'skip': 'This live event has ended.',
06b491eb 1549 },
067aa17e 1550 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1551 {
1552 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1553 'info_dict': {
1554 'id': 'FIl7x6_3R5Y',
eb6793ba 1555 'ext': 'webm',
da77d856
S
1556 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1557 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1558 'duration': 220,
da77d856
S
1559 'upload_date': '20150625',
1560 'uploader_id': 'dorappi2000',
ec85ded8 1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1562 'uploader': 'dorappi2000',
eb6793ba 1563 'formats': 'mincount:31',
da77d856 1564 },
eb6793ba 1565 'skip': 'not actual anymore',
2ee8f5d8 1566 },
8a1a26ce
YCH
1567 # DASH manifest with segment_list
1568 {
1569 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1570 'md5': '8ce563a1d667b599d21064e982ab9e31',
1571 'info_dict': {
1572 'id': 'CsmdDsKjzN8',
1573 'ext': 'mp4',
17ee98e1 1574 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1575 'uploader': 'Airtek',
1576 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1577 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1578 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1579 },
1580 'params': {
1581 'youtube_include_dash_manifest': True,
1582 'format': '135', # bestvideo
be49068d
S
1583 },
1584 'skip': 'This live event has ended.',
2ee8f5d8 1585 },
cf7e015f 1586 {
6368e2e6 1587 # Multifeed videos (multiple cameras), URL can be of any Camera
1588 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
cf7e015f 1589 'info_dict': {
6368e2e6 1590 'id': 'zaPI8MvL8pg',
1591 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1592 'description': 'md5:563ccbc698b39298481ca3c571169519',
cf7e015f
S
1593 },
1594 'playlist': [{
1595 'info_dict': {
6368e2e6 1596 'id': 'j5yGuxZ8lLU',
cf7e015f 1597 'ext': 'mp4',
6368e2e6 1598 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1599 'uploader': 'WiiLikeToPlay',
1600 'description': 'md5:563ccbc698b39298481ca3c571169519',
1601 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1602 'duration': 10120,
1603 'channel_follower_count': int,
1604 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1605 'availability': 'public',
1606 'playable_in_embed': True,
1607 'upload_date': '20131105',
1608 'uploader_id': 'WiiRikeToPray',
1609 'categories': ['Gaming'],
1610 'live_status': 'was_live',
1611 'tags': 'count:24',
1612 'release_timestamp': 1383701910,
1613 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1614 'comment_count': int,
1615 'age_limit': 0,
1616 'like_count': int,
1617 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1618 'channel': 'WiiLikeToPlay',
1619 'view_count': int,
1620 'release_date': '20131106',
cf7e015f
S
1621 },
1622 }, {
1623 'info_dict': {
6368e2e6 1624 'id': 'zaPI8MvL8pg',
cf7e015f 1625 'ext': 'mp4',
6368e2e6 1626 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1627 'uploader_id': 'WiiRikeToPray',
1628 'availability': 'public',
1629 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1630 'channel': 'WiiLikeToPlay',
1631 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1632 'channel_follower_count': int,
1633 'description': 'md5:563ccbc698b39298481ca3c571169519',
1634 'duration': 10108,
1635 'age_limit': 0,
1636 'like_count': int,
1637 'tags': 'count:24',
1638 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1639 'uploader': 'WiiLikeToPlay',
1640 'release_timestamp': 1383701915,
1641 'comment_count': int,
1642 'upload_date': '20131105',
1643 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1644 'release_date': '20131106',
1645 'playable_in_embed': True,
1646 'live_status': 'was_live',
1647 'categories': ['Gaming'],
1648 'view_count': int,
cf7e015f
S
1649 },
1650 }, {
1651 'info_dict': {
6368e2e6 1652 'id': 'R7r3vfO7Hao',
cf7e015f 1653 'ext': 'mp4',
6368e2e6 1654 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1655 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1656 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1657 'like_count': int,
1658 'availability': 'public',
1659 'playable_in_embed': True,
1660 'upload_date': '20131105',
1661 'description': 'md5:563ccbc698b39298481ca3c571169519',
1662 'uploader_id': 'WiiRikeToPray',
1663 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1664 'channel_follower_count': int,
1665 'tags': 'count:24',
1666 'release_date': '20131106',
1667 'uploader': 'WiiLikeToPlay',
1668 'comment_count': int,
1669 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1670 'channel': 'WiiLikeToPlay',
1671 'categories': ['Gaming'],
1672 'release_timestamp': 1383701914,
1673 'live_status': 'was_live',
1674 'age_limit': 0,
1675 'duration': 10128,
1676 'view_count': int,
cf7e015f
S
1677 },
1678 }],
6368e2e6 1679 'params': {'skip_download': True},
cbaed4bb 1680 },
f9f49d87 1681 {
067aa17e 1682 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1683 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1684 'info_dict': {
1685 'id': 'gVfLd0zydlo',
1686 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1687 },
1688 'playlist_count': 2,
be49068d 1689 'skip': 'Not multifeed anymore',
f9f49d87 1690 },
cbaed4bb 1691 {
2d3d2997 1692 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1693 'only_matching': True,
0e49d9a6 1694 },
6d4fc66b 1695 {
2d3d2997 1696 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1697 'only_matching': True,
1698 },
0e49d9a6 1699 {
067aa17e 1700 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1701 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1702 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1703 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1704 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1705 'info_dict': {
1706 'id': 'lsguqyKfVQg',
1707 'ext': 'mp4',
1708 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1709 'alt_title': 'Dark Walk',
0e49d9a6 1710 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1711 'duration': 133,
0e49d9a6
LL
1712 'upload_date': '20151119',
1713 'uploader_id': 'IronSoulElf',
ec85ded8 1714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1715 'uploader': 'IronSoulElf',
11f9be09 1716 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1717 'track': 'Dark Walk',
1718 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1719 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1720 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1721 'categories': ['Film & Animation'],
1722 'view_count': int,
1723 'live_status': 'not_live',
1724 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1725 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1726 'tags': 'count:13',
1727 'availability': 'public',
1728 'channel': 'IronSoulElf',
1729 'playable_in_embed': True,
1730 'like_count': int,
1731 'age_limit': 0,
6c73052c 1732 'channel_follower_count': int
0e49d9a6
LL
1733 },
1734 'params': {
1735 'skip_download': True,
1736 },
1737 },
61f92af1 1738 {
067aa17e 1739 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1740 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1741 'only_matching': True,
1742 },
313dfc45
LL
1743 {
1744 # Video with yt:stretch=17:0
1745 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1746 'info_dict': {
1747 'id': 'Q39EVAstoRM',
1748 'ext': 'mp4',
1749 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1750 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1751 'upload_date': '20151107',
1752 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1753 'uploader': 'CH GAMER DROID',
1754 },
1755 'params': {
1756 'skip_download': True,
1757 },
be49068d 1758 'skip': 'This video does not exist.',
313dfc45 1759 },
201c1459 1760 {
1761 # Video with incomplete 'yt:stretch=16:'
1762 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1763 'only_matching': True,
1764 },
7caf9830
S
1765 {
1766 # Video licensed under Creative Commons
1767 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1768 'info_dict': {
1769 'id': 'M4gD1WSo5mA',
1770 'ext': 'mp4',
1771 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1772 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1773 'duration': 721,
17322130 1774 'upload_date': '20150128',
7caf9830 1775 'uploader_id': 'BerkmanCenter',
ec85ded8 1776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1777 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1778 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1779 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1780 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1781 'like_count': int,
1782 'age_limit': 0,
1783 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1784 'channel': 'The Berkman Klein Center for Internet & Society',
1785 'availability': 'public',
1786 'view_count': int,
1787 'categories': ['Education'],
1788 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1789 'live_status': 'not_live',
1790 'playable_in_embed': True,
12a1b225 1791 'comment_count': int,
d5d1df8a 1792 'channel_follower_count': int,
1793 'chapters': list,
7caf9830
S
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 },
fd050249
S
1799 {
1800 # Channel-like uploader_url
1801 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1802 'info_dict': {
1803 'id': 'eQcmzGIKrzg',
1804 'ext': 'mp4',
1805 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1806 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1807 'duration': 4060,
17322130 1808 'upload_date': '20151120',
eb6793ba 1809 'uploader': 'Bernie Sanders',
fd050249 1810 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1812 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1813 'playable_in_embed': True,
1814 'tags': 'count:12',
1815 'like_count': int,
1816 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1817 'age_limit': 0,
1818 'availability': 'public',
1819 'categories': ['News & Politics'],
1820 'channel': 'Bernie Sanders',
1821 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1822 'view_count': int,
1823 'live_status': 'not_live',
1824 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1825 'comment_count': int,
d5d1df8a 1826 'channel_follower_count': int,
1827 'chapters': list,
fd050249
S
1828 },
1829 'params': {
1830 'skip_download': True,
1831 },
1832 },
040ac686
S
1833 {
1834 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1835 'only_matching': True,
7f29cf54
S
1836 },
1837 {
067aa17e 1838 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1839 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1840 'only_matching': True,
6496ccb4
S
1841 },
1842 {
1843 # Rental video preview
1844 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1845 'info_dict': {
1846 'id': 'uGpuVWrhIzE',
1847 'ext': 'mp4',
1848 'title': 'Piku - Trailer',
1849 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1850 'upload_date': '20150811',
1851 'uploader': 'FlixMatrix',
1852 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1854 'license': 'Standard YouTube License',
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
eb6793ba 1859 'skip': 'This video is not available.',
022a5d66 1860 },
12afdc2a
S
1861 {
1862 # YouTube Red video with episode data
1863 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1864 'info_dict': {
1865 'id': 'iqKdEhx-dD4',
1866 'ext': 'mp4',
1867 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1868 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1869 'duration': 2085,
12afdc2a
S
1870 'upload_date': '20170118',
1871 'uploader': 'Vsauce',
1872 'uploader_id': 'Vsauce',
1873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1874 'series': 'Mind Field',
1875 'season_number': 1,
1876 'episode_number': 1,
976ae3ea 1877 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1878 'tags': 'count:12',
1879 'view_count': int,
1880 'availability': 'public',
1881 'age_limit': 0,
1882 'channel': 'Vsauce',
1883 'episode': 'Episode 1',
1884 'categories': ['Entertainment'],
1885 'season': 'Season 1',
1886 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1887 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1888 'like_count': int,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
6c73052c 1891 'channel_follower_count': int
12afdc2a
S
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 'expected_warnings': [
1897 'Skipping DASH manifest',
1898 ],
1899 },
c7121fa7
S
1900 {
1901 # The following content has been identified by the YouTube community
1902 # as inappropriate or offensive to some audiences.
1903 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1904 'info_dict': {
1905 'id': '6SJNVb0GnPI',
1906 'ext': 'mp4',
1907 'title': 'Race Differences in Intelligence',
1908 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1909 'duration': 965,
1910 'upload_date': '20140124',
1911 'uploader': 'New Century Foundation',
1912 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1914 },
1915 'params': {
1916 'skip_download': True,
1917 },
545cc85d 1918 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1919 },
022a5d66
S
1920 {
1921 # itag 212
1922 'url': '1t24XAntNCY',
1923 'only_matching': True,
fd5c4aab
S
1924 },
1925 {
1926 # geo restricted to JP
1927 'url': 'sJL6WA-aGkQ',
1928 'only_matching': True,
1929 },
cd5a74a2
S
1930 {
1931 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1932 'only_matching': True,
1933 },
bc2ca1bb 1934 {
1935 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1936 'only_matching': True,
1937 },
1938 {
1939 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1940 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1941 'only_matching': True,
1942 },
825cd268
RA
1943 {
1944 # DRM protected
1945 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1946 'only_matching': True,
4fe54c12
S
1947 },
1948 {
1949 # Video with unsupported adaptive stream type formats
1950 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1951 'info_dict': {
1952 'id': 'Z4Vy8R84T1U',
1953 'ext': 'mp4',
1954 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1955 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1956 'duration': 433,
1957 'upload_date': '20130923',
1958 'uploader': 'Amelia Putri Harwita',
1959 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1961 'formats': 'maxcount:10',
1962 },
1963 'params': {
1964 'skip_download': True,
1965 'youtube_include_dash_manifest': False,
1966 },
5429d6a9 1967 'skip': 'not actual anymore',
5caabd3c 1968 },
1969 {
822b9d9c 1970 # Youtube Music Auto-generated description
5caabd3c 1971 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1972 'info_dict': {
1973 'id': 'MgNrAu2pzNs',
1974 'ext': 'mp4',
1975 'title': 'Voyeur Girl',
1976 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1977 'upload_date': '20190312',
5429d6a9
S
1978 'uploader': 'Stephen - Topic',
1979 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1980 'artist': 'Stephen',
1981 'track': 'Voyeur Girl',
1982 'album': 'it\'s too much love to know my dear',
1983 'release_date': '20190313',
1984 'release_year': 2019,
976ae3ea 1985 'alt_title': 'Voyeur Girl',
1986 'view_count': int,
1987 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1988 'playable_in_embed': True,
1989 'like_count': int,
1990 'categories': ['Music'],
1991 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1992 'channel': 'Stephen',
1993 'availability': 'public',
1994 'creator': 'Stephen',
1995 'duration': 169,
1996 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1997 'age_limit': 0,
1998 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1999 'tags': 'count:11',
2000 'live_status': 'not_live',
6c73052c 2001 'channel_follower_count': int
5caabd3c 2002 },
2003 'params': {
2004 'skip_download': True,
2005 },
2006 },
66b48727
RA
2007 {
2008 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2009 'only_matching': True,
2010 },
011e75e6
S
2011 {
2012 # invalid -> valid video id redirection
2013 'url': 'DJztXj2GPfl',
2014 'info_dict': {
2015 'id': 'DJztXj2GPfk',
2016 'ext': 'mp4',
2017 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2018 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2019 'upload_date': '20090125',
2020 'uploader': 'Prochorowka',
2021 'uploader_id': 'Prochorowka',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
2023 'artist': 'Panjabi MC',
2024 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2025 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2026 },
2027 'params': {
2028 'skip_download': True,
2029 },
545cc85d 2030 'skip': 'Video unavailable',
ea74e00b
DP
2031 },
2032 {
2033 # empty description results in an empty string
2034 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2035 'info_dict': {
2036 'id': 'x41yOUIvK2k',
2037 'ext': 'mp4',
2038 'title': 'IMG 3456',
2039 'description': '',
2040 'upload_date': '20170613',
2041 'uploader_id': 'ElevageOrVert',
2042 'uploader': 'ElevageOrVert',
976ae3ea 2043 'view_count': int,
2044 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2045 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2046 'like_count': int,
2047 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2048 'tags': [],
2049 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2050 'availability': 'public',
2051 'age_limit': 0,
2052 'categories': ['Pets & Animals'],
2053 'duration': 7,
2054 'playable_in_embed': True,
2055 'live_status': 'not_live',
2056 'channel': 'ElevageOrVert',
6c73052c 2057 'channel_follower_count': int
ea74e00b
DP
2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
a0566bbf 2063 {
29f7c58a 2064 # with '};' inside yt initial data (see [1])
2065 # see [2] for an example with '};' inside ytInitialPlayerResponse
2066 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2067 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 2068 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2069 'info_dict': {
2070 'id': 'CHqg6qOn4no',
2071 'ext': 'mp4',
2072 'title': 'Part 77 Sort a list of simple types in c#',
2073 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2074 'upload_date': '20130831',
2075 'uploader_id': 'kudvenkat',
2076 'uploader': 'kudvenkat',
976ae3ea 2077 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2078 'like_count': int,
2079 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2080 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2081 'live_status': 'not_live',
2082 'categories': ['Education'],
2083 'availability': 'public',
2084 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2085 'tags': 'count:12',
2086 'playable_in_embed': True,
2087 'age_limit': 0,
2088 'view_count': int,
2089 'duration': 522,
2090 'channel': 'kudvenkat',
12a1b225 2091 'comment_count': int,
d5d1df8a 2092 'channel_follower_count': int,
2093 'chapters': list,
a0566bbf 2094 },
2095 'params': {
2096 'skip_download': True,
2097 },
2098 },
29f7c58a 2099 {
2100 # another example of '};' in ytInitialData
2101 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2102 'only_matching': True,
2103 },
2104 {
2105 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2106 'only_matching': True,
2107 },
545cc85d 2108 {
cc2db878 2109 # https://github.com/ytdl-org/youtube-dl/pull/28094
2110 'url': 'OtqTfy26tG0',
2111 'info_dict': {
2112 'id': 'OtqTfy26tG0',
2113 'ext': 'mp4',
2114 'title': 'Burn Out',
2115 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2116 'upload_date': '20141120',
2117 'uploader': 'The Cinematic Orchestra - Topic',
2118 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2119 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2120 'artist': 'The Cinematic Orchestra',
2121 'track': 'Burn Out',
2122 'album': 'Every Day',
976ae3ea 2123 'like_count': int,
2124 'live_status': 'not_live',
2125 'alt_title': 'Burn Out',
2126 'duration': 614,
2127 'age_limit': 0,
2128 'view_count': int,
2129 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2130 'creator': 'The Cinematic Orchestra',
2131 'channel': 'The Cinematic Orchestra',
2132 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2133 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2134 'availability': 'public',
2135 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2136 'categories': ['Music'],
2137 'playable_in_embed': True,
6c73052c 2138 'channel_follower_count': int
cc2db878 2139 },
2140 'params': {
2141 'skip_download': True,
2142 },
545cc85d 2143 },
bc2ca1bb 2144 {
2145 # controversial video, only works with bpctr when authenticated with cookies
2146 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2147 'only_matching': True,
2148 },
a1a7907b 2149 {
2150 # controversial video, requires bpctr/contentCheckOk
2151 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2152 'info_dict': {
2153 'id': 'SZJvDhaSDnc',
2154 'ext': 'mp4',
2155 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2156 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2157 'uploader': 'CBS Mornings',
11f9be09 2158 'uploader_id': 'CBSThisMorning',
a1a7907b 2159 'upload_date': '20140716',
976ae3ea 2160 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2161 'duration': 170,
2162 'categories': ['News & Politics'],
2163 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2164 'view_count': int,
2165 'channel': 'CBS Mornings',
2166 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2167 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2168 'age_limit': 18,
2169 'availability': 'needs_auth',
2170 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2171 'like_count': int,
2172 'live_status': 'not_live',
2173 'playable_in_embed': True,
6c73052c 2174 'channel_follower_count': int
a1a7907b 2175 }
2176 },
f7ad7160 2177 {
2178 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2179 'url': 'cBvYw8_A0vQ',
2180 'info_dict': {
2181 'id': 'cBvYw8_A0vQ',
2182 'ext': 'mp4',
2183 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2184 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2185 'upload_date': '20201120',
2186 'uploader': 'Walk around Japan',
2187 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2189 'duration': 1456,
2190 'categories': ['Travel & Events'],
2191 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2192 'view_count': int,
2193 'channel': 'Walk around Japan',
2194 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2195 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2196 'age_limit': 0,
2197 'availability': 'public',
2198 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2199 'live_status': 'not_live',
2200 'playable_in_embed': True,
6c73052c 2201 'channel_follower_count': int
f7ad7160 2202 },
2203 'params': {
2204 'skip_download': True,
2205 },
0fb983f6 2206 }, {
2207 # Has multiple audio streams
2208 'url': 'WaOKSUlf4TM',
2209 'only_matching': True
9297939e 2210 }, {
2211 # Requires Premium: has format 141 when requested using YTM url
2212 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2213 'only_matching': True
2214 }, {
120916da 2215 # multiple subtitles with same lang_code
2216 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2217 'only_matching': True,
109dd3b2 2218 }, {
2219 # Force use android client fallback
2220 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2221 'info_dict': {
2222 'id': 'YOelRv7fMxY',
11f9be09 2223 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2224 'ext': '3gp',
2225 'upload_date': '20210624',
2226 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2227 'uploader': 'colinfurze',
11f9be09 2228 'uploader_id': 'colinfurze',
109dd3b2 2229 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2230 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2231 'duration': 596,
2232 'categories': ['Entertainment'],
2233 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2234 'view_count': int,
2235 'channel': 'colinfurze',
2236 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2237 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2238 'age_limit': 0,
2239 'availability': 'public',
2240 'like_count': int,
2241 'live_status': 'not_live',
2242 'playable_in_embed': True,
d5d1df8a 2243 'channel_follower_count': int,
2244 'chapters': list,
109dd3b2 2245 },
2246 'params': {
2247 'format': '17', # 3gp format available on android
2248 'extractor_args': {'youtube': {'player_client': ['android']}},
2249 },
120916da 2250 },
109dd3b2 2251 {
2252 # Skip download of additional client configs (remix client config in this case)
2253 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2254 'only_matching': True,
2255 'params': {
2256 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2257 },
8fc54b12 2258 }, {
2259 # shorts
2260 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2261 'only_matching': True,
9222c381 2262 }, {
2263 'note': 'Storyboards',
2264 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2265 'info_dict': {
2266 'id': '5KLPxDtMqe8',
2267 'ext': 'mhtml',
2268 'format_id': 'sb0',
2269 'title': 'Your Brain is Plastic',
2270 'uploader_id': 'scishow',
2271 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2272 'upload_date': '20140324',
2273 'uploader': 'SciShow',
976ae3ea 2274 'like_count': int,
2275 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2276 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2277 'view_count': int,
2278 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2279 'playable_in_embed': True,
2280 'tags': 'count:12',
2281 'uploader_url': 'http://www.youtube.com/user/scishow',
2282 'availability': 'public',
2283 'channel': 'SciShow',
2284 'live_status': 'not_live',
2285 'duration': 248,
2286 'categories': ['Education'],
2287 'age_limit': 0,
d5d1df8a 2288 'channel_follower_count': int,
2289 'chapters': list,
9222c381 2290 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2291 }, {
2292 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2293 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2294 'info_dict': {
2295 'id': '2NUZ8W2llS4',
2296 'ext': 'mp4',
2297 'title': 'The NP that test your phone performance 🙂',
2298 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2299 'uploader': 'Leon Nguyen',
2300 'uploader_id': 'VNSXIII',
2301 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2302 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2303 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2304 'duration': 21,
2305 'view_count': int,
2306 'age_limit': 0,
2307 'categories': ['Gaming'],
2308 'tags': 'count:23',
2309 'playable_in_embed': True,
2310 'live_status': 'not_live',
2311 'upload_date': '20220103',
2312 'like_count': int,
2313 'availability': 'public',
2314 'channel': 'Leon Nguyen',
2315 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2316 'comment_count': int,
992f9a73 2317 'channel_follower_count': int
2318 }
1ff88b7a 2319 }, {
2320 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2321 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2322 'info_dict': {
2323 'id': '2NUZ8W2llS4',
2324 'ext': 'mp4',
2325 'title': 'The NP that test your phone performance 🙂',
2326 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2327 'uploader': 'Leon Nguyen',
2328 'uploader_id': 'VNSXIII',
2329 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2330 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2331 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2332 'duration': 21,
2333 'view_count': int,
2334 'age_limit': 0,
2335 'categories': ['Gaming'],
2336 'tags': 'count:23',
2337 'playable_in_embed': True,
2338 'live_status': 'not_live',
2339 'upload_date': '20220102',
2340 'like_count': int,
2341 'availability': 'public',
2342 'channel': 'Leon Nguyen',
2343 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2344 'comment_count': int,
2345 'channel_follower_count': int
2346 },
2347 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2348 }, {
2349 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2350 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2351 'info_dict': {
2352 'id': 'mzZzzBU6lrM',
2353 'ext': 'mp4',
2354 'title': 'I Met GeorgeNotFound In Real Life...',
2355 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2356 'uploader': 'Quackity',
2357 'uploader_id': 'QuackityHQ',
2358 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2359 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2360 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2361 'duration': 955,
2362 'view_count': int,
2363 'age_limit': 0,
2364 'categories': ['Entertainment'],
2365 'tags': 'count:26',
2366 'playable_in_embed': True,
2367 'live_status': 'not_live',
2368 'release_timestamp': 1641172509,
2369 'release_date': '20220103',
2370 'upload_date': '20220103',
2371 'like_count': int,
2372 'availability': 'public',
2373 'channel': 'Quackity',
2374 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2375 'channel_follower_count': int
2376 }
2377 },
2378 { # continuous livestream. Microformat upload date should be preferred.
2379 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2380 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2381 'info_dict': {
2382 'id': 'kgx4WGK0oNU',
2383 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2384 'ext': 'mp4',
2385 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2386 'availability': 'public',
2387 'age_limit': 0,
2388 'release_timestamp': 1637975704,
2389 'upload_date': '20210619',
2390 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2391 'live_status': 'is_live',
2392 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2393 'uploader': '阿鲍Abao',
2394 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2395 'channel': 'Abao in Tokyo',
2396 'channel_follower_count': int,
2397 'release_date': '20211127',
2398 'tags': 'count:39',
2399 'categories': ['People & Blogs'],
2400 'like_count': int,
2401 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2402 'view_count': int,
2403 'playable_in_embed': True,
2404 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
867c66ff 2405 'concurrent_view_count': int,
992f9a73 2406 },
2407 'params': {'skip_download': True}
6e634cbe 2408 }, {
2409 # Story. Requires specific player params to work.
ee27297f 2410 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2411 'info_dict': {
ee27297f 2412 'id': 'vv8qTUWmulI',
6e634cbe 2413 'ext': 'mp4',
ee27297f 2414 'availability': 'unlisted',
2415 'view_count': int,
2416 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2417 'upload_date': '20220526',
2418 'categories': ['Education'],
2419 'title': 'Story',
2420 'channel': 'IT\'S HISTORY',
2421 'description': '',
2422 'uploader_id': 'BlastfromthePast',
2423 'duration': 12,
2424 'uploader': 'IT\'S HISTORY',
6e634cbe 2425 'playable_in_embed': True,
6e634cbe 2426 'age_limit': 0,
6e634cbe 2427 'live_status': 'not_live',
ee27297f 2428 'tags': [],
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2430 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2431 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2432 },
2433 'skip': 'stories get removed after some period of time',
ee27297f 2434 }, {
2435 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2436 'info_dict': {
2437 'id': 'tjjjtzRLHvA',
2438 'ext': 'mp4',
2439 'title': 'ハッシュタグ無し };if window.ytcsi',
2440 'upload_date': '20220323',
2441 'like_count': int,
2442 'availability': 'unlisted',
2443 'channel': 'nao20010128nao',
2444 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2445 'age_limit': 0,
2446 'uploader': 'nao20010128nao',
2447 'uploader_id': 'nao20010128nao',
2448 'categories': ['Music'],
6e634cbe 2449 'view_count': int,
2450 'description': '',
ee27297f 2451 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2452 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2453 'live_status': 'not_live',
2454 'playable_in_embed': True,
2455 'channel_follower_count': int,
2456 'duration': 6,
2457 'tags': [],
2458 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2459 }
c26f9b99 2460 }, {
2461 # Prefer primary title+description language metadata by default
2462 # Do not prefer translated description if primary is empty
2463 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2464 'info_dict': {
2465 'id': 'el3E4MbxRqQ',
2466 'ext': 'mp4',
2467 'title': 'dlp test video 2 - primary sv no desc',
2468 'description': '',
2469 'channel': 'cole-dlp-test-acc',
2470 'tags': [],
2471 'view_count': int,
2472 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2473 'like_count': int,
2474 'playable_in_embed': True,
2475 'availability': 'unlisted',
2476 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2477 'age_limit': 0,
2478 'duration': 5,
2479 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2480 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2481 'live_status': 'not_live',
2482 'upload_date': '20220908',
2483 'categories': ['People & Blogs'],
2484 'uploader': 'cole-dlp-test-acc',
2485 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2486 },
2487 'params': {'skip_download': True}
2488 }, {
2489 # Extractor argument: prefer translated title+description
2490 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2491 'info_dict': {
2492 'id': 'gHKT4uU8Zng',
2493 'ext': 'mp4',
2494 'channel': 'cole-dlp-test-acc',
2495 'tags': [],
2496 'duration': 5,
2497 'live_status': 'not_live',
2498 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2499 'upload_date': '20220728',
2500 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2501 'view_count': int,
2502 'categories': ['People & Blogs'],
2503 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2504 'title': 'dlp test video title translated (fr)',
2505 'availability': 'public',
2506 'uploader': 'cole-dlp-test-acc',
2507 'age_limit': 0,
2508 'description': 'dlp test video description translated (fr)',
2509 'playable_in_embed': True,
2510 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2511 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2512 },
2513 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2514 'expected_warnings': [r'Preferring "fr" translated fields'],
a4166234 2515 }, {
2516 'note': '6 channel audio',
2517 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2518 'only_matching': True,
a4894d3e 2519 }, {
2520 'note': 'Multiple HLS formats with same itag',
2521 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2522 'info_dict': {
2523 'id': 'kX3nB4PpJko',
2524 'ext': 'mp4',
2525 'categories': ['Entertainment'],
2526 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2527 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
2528 'live_status': 'not_live',
2529 'duration': 937,
2530 'channel_follower_count': int,
2531 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2532 'title': 'Last To Take Hand Off Jet, Keeps It!',
2533 'channel': 'MrBeast',
2534 'playable_in_embed': True,
2535 'view_count': int,
2536 'upload_date': '20221112',
2537 'uploader': 'MrBeast',
2538 'uploader_id': 'MrBeast6000',
2539 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2540 'age_limit': 0,
2541 'availability': 'public',
2542 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2543 'like_count': int,
2544 'tags': [],
2545 },
2546 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
9bb85699 2547 }, {
2548 'note': 'Audio formats with Dynamic Range Compression',
2549 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2550 'info_dict': {
2551 'id': 'Tq92D6wQ1mg',
2552 'ext': 'weba',
2553 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2554 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2555 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2556 'channel_follower_count': int,
2557 'description': 'md5:17eccca93a786d51bc67646756894066',
2558 'upload_date': '20191228',
2559 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2560 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2561 'playable_in_embed': True,
2562 'like_count': int,
2563 'categories': ['Entertainment'],
2564 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2565 'age_limit': 18,
2566 'channel': 'Projekt Melody',
2567 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2568 'view_count': int,
2569 'availability': 'needs_auth',
2570 'comment_count': int,
2571 'live_status': 'not_live',
2572 'uploader': 'Projekt Melody',
2573 'duration': 106,
2574 },
2575 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
6e634cbe 2576 }
2eb88d95
PH
2577 ]
2578
f2e8dbcc 2579 _WEBPAGE_TESTS = [
2580 # YouTube <object> embed
2581 {
2582 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2583 'md5': '873c81d308b979f0e23ee7e620b312a3',
2584 'info_dict': {
2585 'id': 'msN87y-iEx0',
2586 'ext': 'mp4',
2587 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2588 'upload_date': '20080526',
2589 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2590 'uploader': 'Christopher Sykes',
2591 'uploader_id': 'ChristopherJSykes',
2592 'age_limit': 0,
2593 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2594 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2595 'playable_in_embed': True,
2596 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2597 'like_count': int,
2598 'comment_count': int,
2599 'channel': 'Christopher Sykes',
2600 'live_status': 'not_live',
2601 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2602 'availability': 'public',
2603 'duration': 195,
2604 'view_count': int,
2605 'categories': ['Science & Technology'],
2606 'channel_follower_count': int,
2607 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2608 },
2609 'params': {
2610 'skip_download': True,
2611 }
2612 },
2613 ]
2614
201c1459 2615 @classmethod
2616 def suitable(cls, url):
4dfbf869 2617 from ..utils import parse_qs
2618
201c1459 2619 qs = parse_qs(url)
2620 if qs.get('list', [None])[0]:
2621 return False
86e5f3ed 2622 return super().suitable(url)
201c1459 2623
e0df6211 2624 def __init__(self, *args, **kwargs):
86e5f3ed 2625 super().__init__(*args, **kwargs)
545cc85d 2626 self._code_cache = {}
83799698 2627 self._player_cache = {}
e0df6211 2628
4d37720a 2629 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
adbc4ec4 2630 lock = threading.Lock()
185bf310 2631 start_time = time.time()
adbc4ec4
THD
2632 formats = [f for f in formats if f.get('is_from_start')]
2633
185bf310 2634 def refetch_manifest(format_id, delay):
2635 nonlocal formats, start_time, is_live
2636 if time.time() <= start_time + delay:
adbc4ec4
THD
2637 return
2638
2639 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2640 video_details = traverse_obj(
2641 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2642 microformats = traverse_obj(
2643 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2644 expected_type=dict, default=[])
4d37720a
L
2645 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2646 is_live = live_status == 'is_live'
185bf310 2647 start_time = time.time()
adbc4ec4 2648
185bf310 2649 def mpd_feed(format_id, delay):
adbc4ec4
THD
2650 """
2651 @returns (manifest_url, manifest_stream_number, is_live) or None
2652 """
2653 with lock:
185bf310 2654 refetch_manifest(format_id, delay)
adbc4ec4
THD
2655
2656 f = next((f for f in formats if f['format_id'] == format_id), None)
2657 if not f:
185bf310 2658 if not is_live:
2659 self.to_screen(f'{video_id}: Video is no longer live')
2660 else:
2661 self.report_warning(
2662 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2663 return None
2664 return f['manifest_url'], f['manifest_stream_number'], is_live
2665
2666 for f in formats:
4d37720a
L
2667 f['is_live'] = is_live
2668 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2669 live_start_time, mpd_feed, not is_live and f.copy())
2670 if is_live:
2671 f['fragments'] = gen
2672 f['protocol'] = 'http_dash_segments_generator'
2673 else:
2674 f['fragments'] = LazyList(gen({}))
2675 del f['is_from_start']
adbc4ec4 2676
4d37720a 2677 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
adbc4ec4
THD
2678 FETCH_SPAN, MAX_DURATION = 5, 432000
2679
2680 mpd_url, stream_number, is_live = None, None, True
2681
2682 begin_index = 0
2683 download_start_time = ctx.get('start') or time.time()
2684
2685 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2686 if lack_early_segments:
2687 self.report_warning(bug_reports_message(
2688 'Starting download from the last 120 hours of the live stream since '
2689 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2690 lack_early_segments = True
2691
2692 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2693 fragments, fragment_base_url = None, None
2694
a539f065 2695 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2696 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2697 # Obtain from MPD's maximum seq value
2698 old_mpd_url = mpd_url
185bf310 2699 last_error = ctx.pop('last_error', None)
14f25df2 2700 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2701 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2702 or (mpd_url, stream_number, False))
2703 if not refresh_sequence:
2704 if expire_fast and not is_live:
2705 return False, last_seq
2706 elif old_mpd_url == mpd_url:
2707 return True, last_seq
4d37720a
L
2708 if manifestless_orig_fmt:
2709 fmt_info = manifestless_orig_fmt
2710 else:
2711 try:
2712 fmts, _ = self._extract_mpd_formats_and_subtitles(
2713 mpd_url, None, note=False, errnote=False, fatal=False)
2714 except ExtractorError:
2715 fmts = None
2716 if not fmts:
2717 no_fragment_score += 2
2718 return False, last_seq
2719 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
adbc4ec4
THD
2720 fragments = fmt_info['fragments']
2721 fragment_base_url = fmt_info['fragment_base_url']
2722 assert fragment_base_url
2723
2724 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2725 return True, _last_seq
2726
4d37720a 2727 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
adbc4ec4
THD
2728 while is_live:
2729 fetch_time = time.time()
2730 if no_fragment_score > 30:
2731 return
2732 if last_segment_url:
2733 # Obtain from "X-Head-Seqnum" header value from each segment
2734 try:
2735 urlh = self._request_webpage(
2736 last_segment_url, None, note=False, errnote=False, fatal=False)
2737 except ExtractorError:
2738 urlh = None
2739 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2740 if last_seq is None:
a539f065 2741 no_fragment_score += 2
adbc4ec4
THD
2742 last_segment_url = None
2743 continue
2744 else:
a539f065
LNO
2745 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2746 no_fragment_score += 2
185bf310 2747 if not should_continue:
adbc4ec4
THD
2748 continue
2749
2750 if known_idx > last_seq:
2751 last_segment_url = None
2752 continue
2753
2754 last_seq += 1
2755
2756 if begin_index < 0 and known_idx < 0:
2757 # skip from the start when it's negative value
2758 known_idx = last_seq + begin_index
2759 if lack_early_segments:
2760 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2761 try:
2762 for idx in range(known_idx, last_seq):
2763 # do not update sequence here or you'll get skipped some part of it
a539f065 2764 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2765 if not should_continue:
adbc4ec4
THD
2766 known_idx = idx - 1
2767 raise ExtractorError('breaking out of outer loop')
2768 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2769 yield {
2770 'url': last_segment_url,
36195c44 2771 'fragment_count': last_seq,
adbc4ec4
THD
2772 }
2773 if known_idx == last_seq:
2774 no_fragment_score += 5
2775 else:
2776 no_fragment_score = 0
2777 known_idx = last_seq
2778 except ExtractorError:
2779 continue
2780
4d37720a
L
2781 if manifestless_orig_fmt:
2782 # Stop at the first iteration if running for post-live manifestless;
2783 # fragment count no longer increase since it starts
2784 break
2785
adbc4ec4
THD
2786 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2787
b6de707d 2788 def _extract_player_url(self, *ytcfgs, webpage=None):
2789 player_url = traverse_obj(
2790 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2791 get_all=False, expected_type=str)
11f9be09 2792 if not player_url:
b6de707d 2793 return
60f393e4 2794 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2795
b6de707d 2796 def _download_player_url(self, video_id, fatal=False):
2797 res = self._download_webpage(
2798 'https://www.youtube.com/iframe_api',
2799 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2800 if res:
2801 player_version = self._search_regex(
2802 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2803 if player_version:
2804 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2805
60064c53
PH
2806 def _signature_cache_id(self, example_sig):
2807 """ Return a string representation of a signature """
14f25df2 2808 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2809
e40c758c
S
2810 @classmethod
2811 def _extract_player_info(cls, player_url):
2812 for player_re in cls._PLAYER_INFO_RE:
2813 id_m = re.search(player_re, player_url)
2814 if id_m:
2815 break
2816 else:
c081b35c 2817 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2818 return id_m.group('id')
e40c758c 2819
404f611f 2820 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2821 player_id = self._extract_player_info(player_url)
2822 if player_id not in self._code_cache:
1276a43a 2823 code = self._download_webpage(
109dd3b2 2824 player_url, video_id, fatal=fatal,
2825 note='Downloading player ' + player_id,
2826 errnote='Download of %s failed' % player_url)
1276a43a 2827 if code:
2828 self._code_cache[player_id] = code
404f611f 2829 return self._code_cache.get(player_id)
109dd3b2 2830
e40c758c 2831 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2832 player_id = self._extract_player_info(player_url)
e0df6211 2833
c4417ddb 2834 # Read from filesystem cache
86e5f3ed 2835 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2836 assert os.path.basename(func_id) == func_id
a0e07d31 2837
ae61d108 2838 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2839 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2840
580ce007 2841 if not cache_spec:
2842 code = self._load_player(video_id, player_url)
404f611f 2843 if code:
109dd3b2 2844 res = self._parse_sig_js(code)
ac668111 2845 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2846 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2847 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2848
2849 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2850
60064c53 2851 def _print_sig_code(self, func, example_sig):
404f611f 2852 if not self.get_param('youtube_print_sig_code'):
2853 return
2854
edf3e38e
PH
2855 def gen_sig_code(idxs):
2856 def _genslice(start, end, step):
78caa52a 2857 starts = '' if start == 0 else str(start)
8bcc8756 2858 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2859 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2860 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2861
2862 step = None
7af808a5
PH
2863 # Quelch pyflakes warnings - start will be set when step is set
2864 start = '(Never used)'
edf3e38e
PH
2865 for i, prev in zip(idxs[1:], idxs[:-1]):
2866 if step is not None:
2867 if i - prev == step:
2868 continue
2869 yield _genslice(start, prev, step)
2870 step = None
2871 continue
2872 if i - prev in [-1, 1]:
2873 step = i - prev
2874 start = prev
2875 continue
2876 else:
78caa52a 2877 yield 's[%d]' % prev
edf3e38e 2878 if step is None:
78caa52a 2879 yield 's[%d]' % i
edf3e38e
PH
2880 else:
2881 yield _genslice(start, i, step)
2882
ac668111 2883 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2884 cache_res = func(test_string)
edf3e38e 2885 cache_spec = [ord(c) for c in cache_res]
78caa52a 2886 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2887 signature_id_tuple = '(%s)' % (
14f25df2 2888 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2889 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2890 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2891 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2892
e0df6211
PH
2893 def _parse_sig_js(self, jscode):
2894 funcname = self._search_regex(
abefc03f
S
2895 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2896 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2897 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2898 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2899 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2900 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2901 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2902 # Obsolete patterns
2903 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2904 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2905 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2906 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2907 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2908 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2909 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2910 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2911 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2912
2913 jsi = JSInterpreter(jscode)
2914 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2915 return lambda s: initial_function([s])
2916
580ce007 2917 def _cached(self, func, *cache_id):
2918 def inner(*args, **kwargs):
2919 if cache_id not in self._player_cache:
2920 try:
2921 self._player_cache[cache_id] = func(*args, **kwargs)
2922 except ExtractorError as e:
2923 self._player_cache[cache_id] = e
2924 except Exception as e:
2925 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2926
2927 ret = self._player_cache[cache_id]
2928 if isinstance(ret, Exception):
2929 raise ret
2930 return ret
2931 return inner
2932
545cc85d 2933 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2934 """Turn the encrypted s field into a working signature"""
580ce007 2935 extract_sig = self._cached(
2936 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2937 func = extract_sig(video_id, player_url, s)
2938 self._print_sig_code(func, s)
2939 return func(s)
404f611f 2940
2941 def _decrypt_nsig(self, s, video_id, player_url):
2942 """Turn the encrypted n field into a working signature"""
2943 if player_url is None:
2944 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2945 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2946
b505e851 2947 try:
2948 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2949 except ExtractorError as e:
2950 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2951 if self.get_param('youtube_print_sig_code'):
2952 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2953
25836db6 2954 try:
2955 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2956 ret = extract_nsig(jsi, func_code)(s)
2957 except JSInterpreter.Exception as e:
2958 try:
992dc6b4 2959 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2960 except ExtractorError:
2961 raise e
2962 self.report_warning(
2963 f'Native nsig extraction failed: Trying with PhantomJS\n'
2964 f' n = {s} ; player = {player_url}', video_id)
0468a3b3 2965 self.write_debug(e, only_once=True)
25836db6 2966
2967 args, func_body = func_code
2968 ret = jsi.execute(
2969 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2970 video_id=video_id, note='Executing signature code').strip()
580ce007 2971
2972 self.write_debug(f'Decrypted nsig {s} => {ret}')
2973 return ret
2974
90a1df30 2975 def _extract_n_function_name(self, jscode):
2976 funcname, idx = self._search_regex(
2977 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2978 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2979 if not idx:
2980 return funcname
2981
2982 return json.loads(js_to_json(self._search_regex(
2983 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2984 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2985
580ce007 2986 def _extract_n_function_code(self, video_id, player_url):
404f611f 2987 player_id = self._extract_player_info(player_url)
05deb747 2988 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 2989 jscode = func_code or self._load_player(video_id, player_url)
2990 jsi = JSInterpreter(jscode)
404f611f 2991
2992 if func_code:
580ce007 2993 return jsi, player_id, func_code
404f611f 2994
b505e851 2995 func_name = self._extract_n_function_name(jscode)
2996
2997 # For redundancy
2998 func_code = self._search_regex(
2999 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3000 # NB: The end of the regex is intentionally kept strict
3001 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3002 jscode, 'nsig function', group=('var', 'code'), default=None)
3003 if func_code:
3004 func_code = ([func_code[0]], func_code[1])
3005 else:
3006 self.write_debug('Extracting nsig function with jsinterp')
3007 func_code = jsi.extract_function_code(func_name)
3008
580ce007 3009 self.cache.store('youtube-nsig', player_id, func_code)
3010 return jsi, player_id, func_code
3011
3012 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 3013 func = jsi.extract_function_from_code(*func_code)
f6ca640b 3014
580ce007 3015 def extract_nsig(s):
25836db6 3016 try:
3017 ret = func([s])
3018 except JSInterpreter.Exception:
3019 raise
3020 except Exception as e:
3021 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3022
f6ca640b 3023 if ret.startswith('enhanced_except_'):
25836db6 3024 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 3025 return ret
580ce007 3026
3027 return extract_nsig
e0df6211 3028
109dd3b2 3029 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3030 """
3031 Extract signatureTimestamp (sts)
3032 Required to tell API what sig/player version is in use.
3033 """
3034 sts = None
3035 if isinstance(ytcfg, dict):
3036 sts = int_or_none(ytcfg.get('STS'))
3037
3038 if not sts:
3039 # Attempt to extract from player
3040 if player_url is None:
3041 error_msg = 'Cannot extract signature timestamp without player_url.'
3042 if fatal:
3043 raise ExtractorError(error_msg)
3044 self.report_warning(error_msg)
3045 return
404f611f 3046 code = self._load_player(video_id, player_url, fatal=fatal)
3047 if code:
109dd3b2 3048 sts = int_or_none(self._search_regex(
3049 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3050 'JS player signature timestamp', group='sts', fatal=fatal))
3051 return sts
3052
11f9be09 3053 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
3054 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3055 label = 'fully ' if is_full else ''
3056 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3057 expected_type=url_or_none)
3058 if not url:
3059 self.report_warning(f'Unable to mark {label}watched')
3060 return
14f25df2 3061 parsed_url = urllib.parse.urlparse(url)
3062 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
3063
3064 # cpn generation algorithm is reverse engineered from base.js.
3065 # In fact it works even with dummy cpn.
3066 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3067 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3068
3069 # # more consistent results setting it to right before the end
3070 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3071
3072 qs.update({
3073 'ver': ['2'],
3074 'cpn': [cpn],
3075 'cmt': video_length,
3076 'el': 'detailpage', # otherwise defaults to "shorts"
3077 })
3078
3079 if is_full:
3080 # these seem to mark watchtime "history" in the real world
3081 # they're required, so send in a single value
3082 qs.update({
5318156f 3083 'st': 0,
06cc8f10
B
3084 'et': video_length,
3085 })
3086
14f25df2 3087 url = urllib.parse.urlunparse(
3088 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
3089
3090 self._download_webpage(
3091 url, video_id, f'Marking {label}watched',
3092 'Unable to mark watched', fatal=False)
d77ab8e2 3093
bfd973ec 3094 @classmethod
3095 def _extract_from_webpage(cls, url, webpage):
3096 # Invidious Instances
3097 # https://github.com/yt-dlp/yt-dlp/issues/195
3098 # https://github.com/iv-org/invidious/pull/1730
3099 mobj = re.search(
3100 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3101 webpage)
3102 if mobj:
3103 yield cls.url_result(mobj.group('url'), cls)
3104 raise cls.StopExtraction()
3105
3106 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
3107
3108 # lazyYT YouTube embed
bfd973ec 3109 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3110 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
3111
3112 # Wordpress "YouTube Video Importer" plugin
bfd973ec 3113 for m in re.findall(r'''(?x)<div[^>]+
3114 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3115 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3116 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 3117
97665381
PH
3118 @classmethod
3119 def extract_id(cls, url):
ae61d108 3120 video_id = cls.get_temp_id(url)
3121 if not video_id:
3122 raise ExtractorError(f'Invalid URL: {url}')
3123 return video_id
c5e8d7af 3124
7c365c21 3125 def _extract_chapters_from_json(self, data, duration):
3126 chapter_list = traverse_obj(
3127 data, (
3128 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3129 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3130 ), expected_type=list)
3131
3132 return self._extract_chapters(
3133 chapter_list,
3134 chapter_time=lambda chapter: float_or_none(
3135 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3136 chapter_title=lambda chapter: traverse_obj(
3137 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3138 duration=duration)
3139
3140 def _extract_chapters_from_engagement_panel(self, data, duration):
3141 content_list = traverse_obj(
8bdd16b4 3142 data,
7c365c21 3143 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 3144 expected_type=list, default=[])
052e1350 3145 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3146 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 3147
1890fc63 3148 return next(filter(None, (
3149 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3150 chapter_time, chapter_title, duration)
3151 for contents in content_list)), [])
7c365c21 3152
1890fc63 3153 def _extract_chapters_from_description(self, description, duration):
2e30b46f 3154 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3155 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
1890fc63 3156 return self._extract_chapters(
2e30b46f 3157 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
1890fc63 3158 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2e30b46f 3159 duration=duration, strict=False) or self._extract_chapters(
3160 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3161 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
1890fc63 3162 duration=duration, strict=False)
84213ea8 3163
1890fc63 3164 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3165 if not duration:
3166 return
3167 chapter_list = [{
3168 'start_time': chapter_time(chapter),
3169 'title': chapter_title(chapter),
3170 } for chapter in chapter_list or []]
3171 if not strict:
3172 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3173
a3976e07 3174 chapters = [{'start_time': 0}]
1890fc63 3175 for idx, chapter in enumerate(chapter_list):
a3976e07 3176 if chapter['start_time'] is None:
1890fc63 3177 self.report_warning(f'Incomplete chapter {idx}')
3178 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 3179 chapters.append(chapter)
709ee214 3180 elif chapter not in chapters:
3181 self.report_warning(
3182 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
a3976e07 3183 return chapters[1:]
84213ea8 3184
a1c5d2ca
M
3185 def _extract_comment(self, comment_renderer, parent=None):
3186 comment_id = comment_renderer.get('commentId')
3187 if not comment_id:
3188 return
fe93e2c4 3189
052e1350 3190 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 3191
c26f9b99 3192 # Timestamp is an estimate calculated from the current time and time_text
3193 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3194 timestamp = self._parse_time_text(time_text)
3195
052e1350 3196 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 3197 author_id = try_get(comment_renderer,
14f25df2 3198 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 3199
49bd8c66 3200 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 3201 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 3202 author_thumbnail = try_get(comment_renderer,
14f25df2 3203 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
3204
3205 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 3206 is_favorited = 'creatorHeart' in (try_get(
3207 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
3208 return {
3209 'id': comment_id,
3210 'text': text,
d92f5d5a 3211 'timestamp': timestamp,
a1c5d2ca
M
3212 'time_text': time_text,
3213 'like_count': votes,
97524332 3214 'is_favorited': is_favorited,
a1c5d2ca
M
3215 'author': author,
3216 'author_id': author_id,
3217 'author_thumbnail': author_thumbnail,
3218 'author_is_uploader': author_is_uploader,
3219 'parent': parent or 'root'
3220 }
3221
46383212 3222 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3223
3224 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 3225
3226 def extract_header(contents):
2d6659b9 3227 _continuation = None
3228 for content in contents:
46383212 3229 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 3230 expected_comment_count = self._get_count(
3231 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 3232
2d6659b9 3233 if expected_comment_count:
46383212 3234 tracker['est_total'] = expected_comment_count
3235 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3236 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 3237
3238 sort_menu_item = try_get(
3239 comments_header_renderer,
3240 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3241 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3242
3243 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3244 if not _continuation:
3245 continue
3246
46383212 3247 sort_text = str_or_none(sort_menu_item.get('title'))
3248 if not sort_text:
2d6659b9 3249 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 3250 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 3251 break
a2160aa4 3252 return _continuation
a1c5d2ca 3253
2d6659b9 3254 def extract_thread(contents):
a1c5d2ca 3255 if not parent:
46383212 3256 tracker['current_page_thread'] = 0
a1c5d2ca 3257 for content in contents:
46383212 3258 if not parent and tracker['total_parent_comments'] >= max_parents:
3259 yield
a1c5d2ca 3260 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 3261 comment_renderer = get_first(
3262 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3263 expected_type=dict, default={})
a1c5d2ca 3264
a1c5d2ca
M
3265 comment = self._extract_comment(comment_renderer, parent)
3266 if not comment:
3267 continue
46383212 3268
3269 tracker['running_total'] += 1
3270 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 3271 yield comment
46383212 3272
a1c5d2ca
M
3273 # Attempt to get the replies
3274 comment_replies_renderer = try_get(
3275 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3276
3277 if comment_replies_renderer:
46383212 3278 tracker['current_page_thread'] += 1
a1c5d2ca 3279 comment_entries_iter = self._comment_entries(
99e9e001 3280 comment_replies_renderer, ytcfg, video_id,
46383212 3281 parent=comment.get('id'), tracker=tracker)
86e5f3ed 3282 yield from itertools.islice(comment_entries_iter, min(
3283 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3284
46383212 3285 # Keeps track of counts across recursive calls
3286 if not tracker:
3287 tracker = dict(
3288 running_total=0,
3289 est_total=0,
3290 current_page_thread=0,
3291 total_parent_comments=0,
3292 total_reply_comments=0)
3293
3294 # TODO: Deprecated
2d6659b9 3295 # YouTube comments have a max depth of 2
46383212 3296 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3297 if max_depth:
da4db748 3298 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3299 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3300 if max_depth == 1 and parent:
3301 return
a1c5d2ca 3302
46383212 3303 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3304 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3305
46383212 3306 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3307
46383212 3308 response = None
6e634cbe 3309 is_forced_continuation = False
2d6659b9 3310 is_first_continuation = parent is None
6e634cbe 3311 if is_first_continuation and not continuation:
3312 # Sometimes you can get comments by generating the continuation yourself,
3313 # even if YouTube initially reports them being disabled - e.g. stories comments.
3314 # Note: if the comment section is actually disabled, YouTube may return a response with
3315 # required check_get_keys missing. So we will disable that check initially in this case.
3316 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3317 is_forced_continuation = True
a1c5d2ca
M
3318
3319 for page_num in itertools.count(0):
3320 if not continuation:
3321 break
46383212 3322 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3323 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3324 if page_num == 0:
3325 if is_first_continuation:
3326 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3327 else:
2d6659b9 3328 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3329 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3330 else:
3331 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3332 ' ' if parent else '', ' replies' if parent else '',
3333 page_num, comment_prog_str)
e72e48c5
M
3334 try:
3335 response = self._extract_response(
3336 item_id=None, query=continuation,
3337 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3338 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3339 except ExtractorError as e:
3340 # Ignore incomplete data error for replies if retries didn't work.
3341 # This is to allow any other parent comments and comment threads to be downloaded.
3342 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3343 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3344 self.report_warning(
3345 'Received incomplete data for a comment reply thread and retrying did not help. '
3346 'Ignoring to let other comments be downloaded.')
3347 else:
3348 raise
6e634cbe 3349 is_forced_continuation = False
46383212 3350 continuation_contents = traverse_obj(
3351 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3352
2d6659b9 3353 continuation = None
46383212 3354 for continuation_section in continuation_contents:
3355 continuation_items = traverse_obj(
3356 continuation_section,
3357 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3358 get_all=False, expected_type=list) or []
3359 if is_first_continuation:
3360 continuation = extract_header(continuation_items)
3361 is_first_continuation = False
2d6659b9 3362 if continuation:
a1c5d2ca 3363 break
46383212 3364 continue
a1c5d2ca 3365
46383212 3366 for entry in extract_thread(continuation_items):
3367 if not entry:
3368 return
3369 yield entry
3370 continuation = self._extract_continuation({'contents': continuation_items})
3371 if continuation:
2d6659b9 3372 break
a1c5d2ca 3373
6e634cbe 3374 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3375 if message and not parent and tracker['running_total'] == 0:
3376 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
0cf643b2 3377 raise self.CommentsDisabled
6e634cbe 3378
3379 @staticmethod
3380 def _generate_comment_continuation(video_id):
3381 """
3382 Generates initial comment section continuation token from given video id
3383 """
3384 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3385 return base64.b64encode(token.encode()).decode()
3386
a2160aa4 3387 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3388 """Entry for comment extraction"""
2d6659b9 3389 def _real_comment_extract(contents):
aae16f6e 3390 renderer = next((
3391 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3392 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3393 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3394
a2160aa4 3395 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3396 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3397
109dd3b2 3398 @staticmethod
99e9e001 3399 def _get_checkok_params():
3400 return {'contentCheckOk': True, 'racyCheckOk': True}
3401
3402 @classmethod
3403 def _generate_player_context(cls, sts=None):
109dd3b2 3404 context = {
3405 'html5Preference': 'HTML5_PREF_WANTS',
3406 }
3407 if sts is not None:
3408 context['signatureTimestamp'] = sts
3409 return {
3410 'playbackContext': {
3411 'contentPlaybackContext': context
a1a7907b 3412 },
99e9e001 3413 **cls._get_checkok_params()
109dd3b2 3414 }
3415
e7e94f2a
D
3416 @staticmethod
3417 def _is_agegated(player_response):
3418 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3419 return True
e7e94f2a
D
3420
3421 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3422 AGE_GATE_REASONS = (
3423 'confirm your age', 'age-restricted', 'inappropriate', # reason
3424 'age_verification_required', 'age_check_required', # status
3425 )
3426 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3427
3428 @staticmethod
3429 def _is_unplayable(player_response):
3430 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3431
50ac0e54 3432 _STORY_PLAYER_PARAMS = '8AEB'
3433
3434 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3435
11f9be09 3436 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3437 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3438 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3439 headers = self.generate_api_headers(
99e9e001 3440 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3441
6e634cbe 3442 yt_query = {
3443 'videoId': video_id,
6e634cbe 3444 }
50ac0e54 3445 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3446 yt_query['params'] = self._STORY_PLAYER_PARAMS
3447
11f9be09 3448 yt_query.update(self._generate_player_context(sts))
3449 return self._extract_response(
3450 item_id=video_id, ep='player', query=yt_query,
379e44ed 3451 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3452 default_client=client,
11f9be09 3453 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3454 ) or None
3455
11f9be09 3456 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3457 requested_clients = []
d0d012d4 3458 default = ['android', 'web']
000c15a4 3459 allowed_clients = sorted(
86e5f3ed 3460 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3461 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3462 for client in self._configuration_arg('player_client'):
3463 if client in allowed_clients:
3464 requested_clients.append(client)
d0d012d4 3465 elif client == 'default':
3466 requested_clients.extend(default)
b4c055ba 3467 elif client == 'all':
3468 requested_clients.extend(allowed_clients)
3469 else:
3470 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3471 if not requested_clients:
d0d012d4 3472 requested_clients = default
cf7e015f 3473
11f9be09 3474 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3475 requested_clients.extend(
e7e94f2a 3476 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3477
11f9be09 3478 return orderedSet(requested_clients)
cf7e015f 3479
50ac0e54 3480 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3481 initial_pr = None
3482 if webpage:
b7c47b74 3483 initial_pr = self._search_json(
3484 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3485
ae729626 3486 all_clients = set(clients)
c0bc527b 3487 clients = clients[::-1]
b6de707d 3488 prs = []
e7e94f2a 3489
ae729626 3490 def append_client(*client_names):
e7870111 3491 """ Append the first client name that exists but not already used """
ae729626 3492 for client_name in client_names:
e7870111
D
3493 actual_client = _split_innertube_client(client_name)[0]
3494 if actual_client in INNERTUBE_CLIENTS:
3495 if actual_client not in all_clients:
ae729626 3496 clients.append(client_name)
e7870111
D
3497 all_clients.add(actual_client)
3498 return
e7e94f2a 3499
379e44ed 3500 # Android player_response does not have microFormats which are needed for
3501 # extraction of some data. So we return the initial_pr with formats
3502 # stripped out even if not requested by the user
3503 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3504 if initial_pr:
3505 pr = dict(initial_pr)
3506 pr['streamingData'] = None
b6de707d 3507 prs.append(pr)
379e44ed 3508
3509 last_error = None
b6de707d 3510 tried_iframe_fallback = False
3511 player_url = None
c0bc527b 3512 while clients:
e7870111 3513 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3514 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3515 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3516 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3517
b6de707d 3518 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3519 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3520 if 'js' in self._configuration_arg('player_skip'):
3521 require_js_player = False
3522 player_url = None
3523
3524 if not player_url and not tried_iframe_fallback and require_js_player:
3525 player_url = self._download_player_url(video_id)
3526 tried_iframe_fallback = True
3527
379e44ed 3528 try:
3529 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3530 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3531 except ExtractorError as e:
3532 if last_error:
3533 self.report_warning(last_error)
3534 last_error = e
3535 continue
3536
11f9be09 3537 if pr:
a3e96421 3538 # YouTube may return a different video player response than expected.
3539 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3540 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3541 if pr_video_id and pr_video_id != video_id:
3542 self.report_warning(
c7dcf0b3 3543 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3544 else:
3545 prs.append(pr)
c0bc527b 3546
e7e94f2a 3547 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3548 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3549 append_client(f'{base_client}_creator')
e7e94f2a 3550 elif self._is_agegated(pr):
e7870111
D
3551 if variant == 'tv_embedded':
3552 append_client(f'{base_client}_embedded')
3553 elif not variant:
3554 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3555
379e44ed 3556 if last_error:
b6de707d 3557 if not len(prs):
379e44ed 3558 raise last_error
3559 self.report_warning(last_error)
b6de707d 3560 return prs, player_url
11f9be09 3561
4d37720a
L
3562 def _needs_live_processing(self, live_status, duration):
3563 if (live_status == 'is_live' and self.get_param('live_from_start')
3564 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3565 return live_status
3566
3567 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
a4894d3e 3568 itags, stream_ids = collections.defaultdict(set), []
b25cac65 3569 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3570 q = qualities([
2a9c6dcd 3571 # Normally tiny is the smallest video-only formats. But
3572 # audio-only formats with unknown quality may get tagged as tiny
3573 'tiny',
3574 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3575 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3576 ])
11f9be09 3577 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3578
545cc85d 3579 for fmt in streaming_formats:
727029c5 3580 if fmt.get('targetDurationSec'):
545cc85d 3581 continue
321bf820 3582
cc2db878 3583 itag = str_or_none(fmt.get('itag'))
9297939e 3584 audio_track = fmt.get('audioTrack') or {}
9bb85699 3585 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
9297939e 3586 if stream_id in stream_ids:
3587 continue
3588
cc2db878 3589 quality = fmt.get('quality')
2a9c6dcd 3590 height = int_or_none(fmt.get('height'))
d3fc8074 3591 if quality == 'tiny' or not quality:
3592 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3593 # The 3gp format (17) in android client has a quality of "small",
3594 # but is actually worse than other formats
3595 if itag == '17':
3596 quality = 'tiny'
3597 if quality:
3598 if itag:
3599 itag_qualities[itag] = quality
3600 if height:
3601 res_qualities[height] = quality
cc2db878 3602 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3603 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3604 # number of fragment that would subsequently requested with (`&sq=N`)
3605 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3606 continue
3607
545cc85d 3608 fmt_url = fmt.get('url')
3609 if not fmt_url:
14f25df2 3610 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3611 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3612 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3613 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3614 continue
52023f12 3615 try:
3616 fmt_url += '&%s=%s' % (
3617 traverse_obj(sc, ('sp', -1)) or 'signature',
3618 self._decrypt_signature(encrypted_sig, video_id, player_url)
3619 )
3620 except ExtractorError as e:
580ce007 3621 self.report_warning('Signature extraction failed: Some formats may be missing',
3622 video_id=video_id, only_once=True)
52023f12 3623 self.write_debug(e, only_once=True)
201e9eaa 3624 continue
545cc85d 3625
404f611f 3626 query = parse_qs(fmt_url)
3627 throttled = False
b2916526 3628 if query.get('n'):
404f611f 3629 try:
580ce007 3630 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3631 fmt_url = update_url_query(fmt_url, {
580ce007 3632 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3633 })
404f611f 3634 except ExtractorError as e:
25836db6 3635 phantomjs_hint = ''
3636 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3637 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3638 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
17ffed18 3639 if player_url:
3640 self.report_warning(
3641 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3642 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3643 self.write_debug(e, only_once=True)
3644 else:
3645 self.report_warning(
3646 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3647 video_id=video_id, only_once=True)
404f611f 3648 throttled = True
3649
0ad92dfb 3650 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3651 language_preference = (
3652 10 if audio_track.get('audioIsDefault') and 10
3653 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3654 else -1)
0ad92dfb 3655 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3656 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3657 # Make sure to avoid false positives with small duration differences.
62b58c09 3658 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3659 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3660 if is_damaged:
0f06bcd7 3661 self.report_warning(
3662 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3663 dct = {
3664 'asr': int_or_none(fmt.get('audioSampleRate')),
3665 'filesize': int_or_none(fmt.get('contentLength')),
9bb85699 3666 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
34921b43 3667 'format_note': join_nonempty(
26e8e044 3668 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3669 ' (default)' if language_preference > 0 else ''),
404f611f 3670 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
9bb85699 3671 'DRC' if fmt.get('isDrc') else None,
a4166234 3672 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3673 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3674 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3675 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3676 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3677 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3678 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3679 'height': height,
9bb85699 3680 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
727029c5 3681 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3682 'tbr': tbr,
545cc85d 3683 'url': fmt_url,
2a9c6dcd 3684 'width': int_or_none(fmt.get('width')),
ab6df717 3685 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
08e29b9f 3686 'desc' if language_preference < -1 else '') or None,
ab6df717 3687 'language_preference': language_preference,
a405b38f 3688 # Strictly de-prioritize damaged and 3gp formats
3689 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3690 }
60bdb7bd 3691 mime_mobj = re.match(
3692 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3693 if mime_mobj:
3694 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3695 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3696 no_audio = dct.get('acodec') == 'none'
3697 no_video = dct.get('vcodec') == 'none'
3698 if no_audio:
3699 dct['vbr'] = tbr
3700 if no_video:
3701 dct['abr'] = tbr
3702 if no_audio or no_video:
545cc85d 3703 dct['downloader_options'] = {
3704 # Youtube throttles chunks >~10M
3705 'http_chunk_size': 10485760,
bf1317d2 3706 }
7c60c33e 3707 if dct.get('ext'):
3708 dct['container'] = dct['ext'] + '_dash'
a4894d3e 3709
3710 if itag:
3711 itags[itag].add(('https', dct.get('language')))
3712 stream_ids.append(stream_id)
11f9be09 3713 yield dct
545cc85d 3714
4d37720a
L
3715 needs_live_processing = self._needs_live_processing(live_status, duration)
3716 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3717
3718 skip_manifests = set(self._configuration_arg('skip'))
3719 if (not self.get_param('youtube_include_hls_manifest', True)
3720 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3721 or needs_live_processing and skip_bad_formats):
3722 skip_manifests.add('hls')
3723
0f06bcd7 3724 if not self.get_param('youtube_include_dash_manifest', True):
4d37720a
L
3725 skip_manifests.add('dash')
3726 if self._configuration_arg('include_live_dash'):
3727 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3728 'Use include_incomplete_formats extractor argument instead')
3729 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3730 skip_manifests.add('dash')
5d3a0e79 3731
a0bb6ce5 3732 def process_manifest_format(f, proto, itag):
a4894d3e 3733 key = (proto, f.get('language'))
3734 if key in itags[itag]:
3735 return False
3736 itags[itag].add(key)
3737
3738 if any(p != proto for p, _ in itags[itag]):
3739 f['format_id'] = f'{itag}-{proto}'
3740 elif itag:
a0bb6ce5 3741 f['format_id'] = itag
a0bb6ce5 3742
b25cac65 3743 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3744 if f['quality'] == -1 and f.get('height'):
3745 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3746 return True
2a9c6dcd 3747
c646d76f 3748 subtitles = {}
11f9be09 3749 for sd in streaming_data:
4d37720a 3750 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
9297939e 3751 if hls_manifest_url:
4d37720a
L
3752 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3753 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
c646d76f 3754 subtitles = self._merge_subtitles(subs, subtitles)
3755 for f in fmts:
a0bb6ce5 3756 if process_manifest_format(f, 'hls', self._search_regex(
3757 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3758 yield f
545cc85d 3759
4d37720a 3760 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
5d3a0e79 3761 if dash_manifest_url:
c646d76f 3762 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3763 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3764 for f in formats:
a0bb6ce5 3765 if process_manifest_format(f, 'dash', f['format_id']):
3766 f['filesize'] = int_or_none(self._search_regex(
3767 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4d37720a 3768 if needs_live_processing:
adbc4ec4
THD
3769 f['is_from_start'] = True
3770
a0bb6ce5 3771 yield f
c646d76f 3772 yield subtitles
11f9be09 3773
720c3099 3774 def _extract_storyboard(self, player_responses, duration):
3775 spec = get_first(
3776 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3777 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3778 if not base_url:
720c3099 3779 return
720c3099 3780 L = len(spec) - 1
3781 for i, args in enumerate(spec):
3782 args = args.split('#')
3783 counts = list(map(int_or_none, args[:5]))
3784 if len(args) != 8 or not all(counts):
3785 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3786 continue
3787 width, height, frame_count, cols, rows = counts
3788 N, sigh = args[6:]
3789
3790 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3791 fragment_count = frame_count / (cols * rows)
3792 fragment_duration = duration / fragment_count
3793 yield {
3794 'format_id': f'sb{i}',
3795 'format_note': 'storyboard',
3796 'ext': 'mhtml',
3797 'protocol': 'mhtml',
3798 'acodec': 'none',
3799 'vcodec': 'none',
3800 'url': url,
3801 'width': width,
3802 'height': height,
45e8a04e 3803 'fps': frame_count / duration,
3804 'rows': rows,
3805 'columns': cols,
720c3099 3806 'fragments': [{
b3edc806 3807 'url': url.replace('$M', str(j)),
720c3099 3808 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3809 } for j in range(math.ceil(fragment_count))],
3810 }
3811
adbc4ec4 3812 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3813 webpage = None
3814 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3815 query = {'bpctr': '9999999999', 'has_verified': '1'}
3816 if smuggled_data.get('is_story'):
3817 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3818 webpage = self._download_webpage(
50ac0e54 3819 webpage_url, video_id, fatal=False, query=query)
11f9be09 3820
3821 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3822
b6de707d 3823 player_responses, player_url = self._extract_player_responses(
11f9be09 3824 self._get_requested_clients(url, smuggled_data),
50ac0e54 3825 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3826
adbc4ec4
THD
3827 return webpage, master_ytcfg, player_responses, player_url
3828
a1b2d843 3829 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3830 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3831 is_live = get_first(video_details, 'isLive')
3832 if is_live is None:
3833 is_live = get_first(live_broadcast_details, 'isLiveNow')
4d37720a
L
3834 live_content = get_first(video_details, 'isLiveContent')
3835 is_upcoming = get_first(video_details, 'isUpcoming')
4d37720a
L
3836 post_live = get_first(video_details, 'isPostLiveDvr')
3837 live_status = ('post_live' if post_live
3838 else 'is_live' if is_live
3839 else 'is_upcoming' if is_upcoming
6678a4f0 3840 else 'was_live' if live_content
3841 else 'not_live' if False in (is_live, live_content)
3842 else None)
adbc4ec4 3843 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
4d37720a 3844 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
adbc4ec4 3845
4d37720a 3846 return live_broadcast_details, live_status, streaming_data, formats, subtitles
adbc4ec4
THD
3847
3848 def _real_extract(self, url):
3849 url, smuggled_data = unsmuggle_url(url, {})
3850 video_id = self._match_id(url)
3851
3852 base_url = self.http_scheme() + '//www.youtube.com/'
3853 webpage_url = base_url + 'watch?v=' + video_id
3854
3855 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3856
11f9be09 3857 playability_statuses = traverse_obj(
3858 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3859
3860 trailer_video_id = get_first(
3861 playability_statuses,
3862 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3863 expected_type=str)
3864 if trailer_video_id:
3865 return self.url_result(
3866 trailer_video_id, self.ie_key(), trailer_video_id)
3867
3868 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3869 if webpage else (lambda x: None))
3870
3871 video_details = traverse_obj(
3872 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3873 microformats = traverse_obj(
3874 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3875 expected_type=dict, default=[])
c26f9b99 3876
3877 translated_title = self._get_text(microformats, (..., 'title'))
3878 video_title = (self._preferred_lang and translated_title
3879 or get_first(video_details, 'title') # primary
3880 or translated_title
3881 or search_meta(['og:title', 'twitter:title', 'title']))
3882 translated_description = self._get_text(microformats, (..., 'description'))
3883 original_description = get_first(video_details, 'shortDescription')
3884 video_description = (
3885 self._preferred_lang and translated_description
3886 # If original description is blank, it will be an empty string.
3887 # Do not prefer translated description in this case.
3888 or original_description if original_description is not None else translated_description)
11f9be09 3889
d89257f3 3890 multifeed_metadata_list = get_first(
3891 player_responses,
3892 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3893 expected_type=str)
3894 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3895 if self.get_param('noplaylist'):
11f9be09 3896 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3897 else:
3898 entries = []
3899 feed_ids = []
3900 for feed in multifeed_metadata_list.split(','):
3901 # Unquote should take place before split on comma (,) since textual
3902 # fields may contain comma as well (see
3903 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3904 feed_data = urllib.parse.parse_qs(
ac668111 3905 urllib.parse.unquote_plus(feed))
d89257f3 3906
3907 def feed_entry(name):
3908 return try_get(
14f25df2 3909 feed_data, lambda x: x[name][0], str)
d89257f3 3910
3911 feed_id = feed_entry('id')
3912 if not feed_id:
3913 continue
3914 feed_title = feed_entry('title')
3915 title = video_title
3916 if feed_title:
3917 title += ' (%s)' % feed_title
3918 entries.append({
3919 '_type': 'url_transparent',
3920 'ie_key': 'Youtube',
3921 'url': smuggle_url(
3922 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3923 {'force_singlefeed': True}),
3924 'title': title,
3925 })
3926 feed_ids.append(feed_id)
3927 self.to_screen(
3928 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3929 % (', '.join(feed_ids), video_id))
3930 return self.playlist_result(
3931 entries, video_id, video_title, video_description)
11f9be09 3932
9da6612b 3933 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3934 or int_or_none(get_first(microformats, 'lengthSeconds'))
3935 or parse_duration(search_meta('duration')) or None)
a1b2d843 3936
4d37720a
L
3937 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3938 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3939 if live_status == 'post_live':
3940 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
bf1317d2 3941
545cc85d 3942 if not formats:
11f9be09 3943 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3944 self.report_drm(video_id)
11f9be09 3945 pemr = get_first(
3946 playability_statuses,
3947 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3948 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3949 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3950 if subreason:
545cc85d 3951 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3952 countries = get_first(microformats, 'availableCountries')
545cc85d 3953 if not countries:
3954 regions_allowed = search_meta('regionsAllowed')
3955 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3956 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3957 reason += f'. {subreason}'
545cc85d 3958 if reason:
b7da73eb 3959 self.raise_no_formats(reason, expected=True)
bf1317d2 3960
11f9be09 3961 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3962 if not keywords and webpage:
3963 keywords = [
3964 unescapeHTML(m.group('content'))
3965 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3966 for keyword in keywords:
3967 if keyword.startswith('yt:stretch='):
201c1459 3968 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3969 if mobj:
3970 # NB: float is intentional for forcing float division
3971 w, h = (float(v) for v in mobj.groups())
3972 if w > 0 and h > 0:
3973 ratio = w / h
3974 for f in formats:
3975 if f.get('vcodec') != 'none':
3976 f['stretched_ratio'] = ratio
3977 break
a709d873 3978 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3979 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3980 if thumbnail_url:
3981 thumbnails.append({
3982 'url': thumbnail_url,
ff2751ac 3983 })
fccf5021 3984 original_thumbnails = thumbnails.copy()
3985
0ba692ac 3986 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3987 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3988 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3989 thumbnail_names = [
962ffcf8 3990 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3991 # in resolution, these are not the custom thumbnail. So de-prioritize them
3992 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3993 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3994 ]
cca80fe6 3995 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3996 thumbnails.extend({
3997 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3998 video_id=video_id, name=name, ext=ext,
4d37720a 3999 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
cca80fe6 4000 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 4001 for thumb in thumbnails:
cca80fe6 4002 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 4003 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 4004 self._remove_duplicate_formats(thumbnails)
fccf5021 4005 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 4006
7ea65411 4007 category = get_first(microformats, 'category') or search_meta('genre')
4008 channel_id = str_or_none(
4009 get_first(video_details, 'channelId')
4010 or get_first(microformats, 'externalChannelId')
4011 or search_meta('channelId'))
7ea65411 4012 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4013
adbc4ec4
THD
4014 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4015 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4016 if not duration and live_end_time and live_start_time:
4017 duration = live_end_time - live_start_time
4018
4d37720a
L
4019 needs_live_processing = self._needs_live_processing(live_status, duration)
4020
4021 def is_bad_format(fmt):
4022 if needs_live_processing and not fmt.get('is_from_start'):
4023 return True
4024 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4025 and fmt.get('protocol') == 'http_dash_segments'):
4026 return True
4027
4028 for fmt in filter(is_bad_format, formats):
4029 fmt['preference'] = (fmt.get('preference') or -1) - 10
4030 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
4031
4032 if needs_live_processing:
4033 self._prepare_live_from_start_formats(
4034 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
7ea65411 4035
720c3099 4036 formats.extend(self._extract_storyboard(player_responses, duration))
4037
545cc85d 4038 info = {
4039 'id': video_id,
39ca3b5c 4040 'title': video_title,
545cc85d 4041 'formats': formats,
4042 'thumbnails': thumbnails,
fccf5021 4043 # The best thumbnail that we are sure exists. Prevents unnecessary
4044 # URL checking if user don't care about getting the best possible thumbnail
4045 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 4046 'description': video_description,
11f9be09 4047 'uploader': get_first(video_details, 'author'),
545cc85d 4048 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
4049 'uploader_url': owner_profile_url,
4050 'channel_id': channel_id,
a70635b8 4051 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 4052 'duration': duration,
4053 'view_count': int_or_none(
11f9be09 4054 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 4055 or search_meta('interactionCount')),
11f9be09 4056 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 4057 'age_limit': 18 if (
11f9be09 4058 get_first(microformats, 'isFamilySafe') is False
545cc85d 4059 or search_meta('isFamilyFriendly') == 'false'
4060 or search_meta('og:restrictions:age') == '18+') else 0,
4061 'webpage_url': webpage_url,
4062 'categories': [category] if category else None,
4063 'tags': keywords,
11f9be09 4064 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4d37720a 4065 'live_status': live_status,
adbc4ec4 4066 'release_timestamp': live_start_time,
9f14daf2 4067 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4068 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
545cc85d 4069 }
b477fc13 4070
c646d76f 4071 subtitles = {}
3944e7af 4072 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 4073 if pctr:
ecdc9049 4074 def get_lang_code(track):
4075 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4076 or track.get('languageCode'))
4077
4078 # Converted into dicts to remove duplicates
4079 captions = {
4080 get_lang_code(sub): sub
4081 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
4082 translation_languages = {
4083 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4084 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
4085
774d79cc 4086 def process_language(container, base_url, lang_code, sub_name, query):
120916da 4087 lang_subs = container.setdefault(lang_code, [])
545cc85d 4088 for fmt in self._SUBTITLE_FORMATS:
4089 query.update({
4090 'fmt': fmt,
4091 })
4092 lang_subs.append({
4093 'ext': fmt,
60f393e4 4094 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 4095 'name': sub_name,
545cc85d 4096 })
7e72694b 4097
07b47084 4098 # NB: Constructing the full subtitle dictionary is slow
4099 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4100 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 4101 for lang_code, caption_track in captions.items():
4102 base_url = caption_track.get('baseUrl')
1235d333 4103 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 4104 if not base_url:
4105 continue
ecdc9049 4106 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 4107 if caption_track.get('kind') != 'asr':
545cc85d 4108 if not lang_code:
4109 continue
4110 process_language(
ecdc9049 4111 subtitles, base_url, lang_code, lang_name, {})
4112 if not caption_track.get('isTranslatable'):
4113 continue
3944e7af 4114 for trans_code, trans_name in translation_languages.items():
4115 if not trans_code:
545cc85d 4116 continue
1235d333 4117 orig_trans_code = trans_code
71eb82d1 4118 if caption_track.get('kind') != 'asr' and trans_code != 'und':
07b47084 4119 if not get_translated_subs:
18e49408 4120 continue
ecdc9049 4121 trans_code += f'-{lang_code}'
a70635b8 4122 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 4123 # Add an "-orig" label to the original language so that it can be distinguished.
4124 # The subs are returned without "-orig" as well for compatibility
1235d333 4125 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 4126 process_language(
d49669ac 4127 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4128 # Setting tlang=lang returns damaged subtitles.
d49669ac 4129 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 4130 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 4131
4132 info['automatic_captions'] = automatic_captions
4133 info['subtitles'] = subtitles
7e72694b 4134
14f25df2 4135 parsed_url = urllib.parse.urlparse(url)
545cc85d 4136 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 4137 query = urllib.parse.parse_qs(component)
545cc85d 4138 for k, v in query.items():
4139 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4140 d_k += '_time'
4141 if d_k not in info and k in s_ks:
4142 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
4143
4144 # Youtube Music Auto-generated description
822b9d9c 4145 if video_description:
1890fc63 4146 mobj = re.search(
4147 r'''(?xs)
4148 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4149 (?P<album>[^\n]+)
4150 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4151 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4152 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4153 .+\nAuto-generated\ by\ YouTube\.\s*$
4154 ''', video_description)
822b9d9c 4155 if mobj:
822b9d9c
RA
4156 release_year = mobj.group('release_year')
4157 release_date = mobj.group('release_date')
4158 if release_date:
4159 release_date = release_date.replace('-', '')
4160 if not release_year:
545cc85d 4161 release_year = release_date[:4]
4162 info.update({
4163 'album': mobj.group('album'.strip()),
4164 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4165 'track': mobj.group('track').strip(),
4166 'release_date': release_date,
cc2db878 4167 'release_year': int_or_none(release_year),
545cc85d 4168 })
7e72694b 4169
545cc85d 4170 initial_data = None
4171 if webpage:
56ba69e4 4172 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 4173 if not initial_data:
99e9e001 4174 query = {'videoId': video_id}
4175 query.update(self._get_checkok_params())
109dd3b2 4176 initial_data = self._extract_response(
4177 item_id=video_id, ep='next', fatal=False,
99e9e001 4178 ytcfg=master_ytcfg, query=query,
4179 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 4180 note='Downloading initial data API JSON')
545cc85d 4181
0df111a3 4182 info['comment_count'] = traverse_obj(initial_data, (
4183 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4184 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4185 ), (
4186 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4187 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4188 ), expected_type=int_or_none, get_all=False)
4189
19a03940 4190 try: # This will error if there is no livechat
c60ee3a2 4191 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 4192 except (KeyError, IndexError, TypeError):
4193 pass
4194 else:
ecdc9049 4195 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 4196 # url is needed to set cookies
4197 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 4198 'video_id': video_id,
4199 'ext': 'json',
4d37720a
L
4200 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4201 else 'youtube_live_chat_replay'),
c60ee3a2 4202 }]
545cc85d 4203
4204 if initial_data:
7c365c21 4205 info['chapters'] = (
4206 self._extract_chapters_from_json(initial_data, duration)
4207 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 4208 or self._extract_chapters_from_description(video_description, duration)
7c365c21 4209 or None)
545cc85d 4210
17322130 4211 contents = traverse_obj(
4212 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4213 expected_type=list, default=[])
4214
4215 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4216 if vpir:
4217 stl = vpir.get('superTitleLink')
4218 if stl:
4219 stl = self._get_text(stl)
4220 if try_get(
4221 vpir,
4222 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4223 info['location'] = stl
4224 else:
affc4fef 4225 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 4226 if mobj:
545cc85d 4227 info.update({
17322130 4228 'series': mobj.group(1),
4229 'season_number': int(mobj.group(2)),
4230 'episode_number': int(mobj.group(3)),
545cc85d 4231 })
17322130 4232 for tlb in (try_get(
4233 vpir,
4234 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4235 list) or []):
3ffb2f5b 4236 tbrs = variadic(
4237 traverse_obj(
4238 tlb, 'toggleButtonRenderer',
4239 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4240 default=[]))
4241 for tbr in tbrs:
4242 for getter, regex in [(
4243 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4244 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4245 lambda x: x['accessibility'],
4246 lambda x: x['accessibilityData']['accessibilityData'],
4247 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4248 label = (try_get(tbr, getter, dict) or {}).get('label')
4249 if label:
4250 mobj = re.match(regex, label)
4251 if mobj:
4252 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4253 break
17322130 4254 sbr_tooltip = try_get(
4255 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4256 if sbr_tooltip:
4257 like_count, dislike_count = sbr_tooltip.split(' / ')
4258 info.update({
4259 'like_count': str_to_int(like_count),
4260 'dislike_count': str_to_int(dislike_count),
4261 })
867c66ff
M
4262 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4263 if vcr:
4264 vc = self._get_count(vcr, 'viewCount')
4265 # Upcoming premieres with waiting count are treated as live here
4266 if vcr.get('isLive'):
4267 info['concurrent_view_count'] = vc
4268 elif info.get('view_count') is None:
4269 info['view_count'] = vc
4270
17322130 4271 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4272 if vsir:
4273 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4274 info.update({
4275 'channel': self._get_text(vor, 'title'),
4276 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4277
4278 rows = try_get(
4279 vsir,
4280 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4281 list) or []
4282 multiple_songs = False
4283 for row in rows:
4284 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4285 multiple_songs = True
4286 break
4287 for row in rows:
4288 mrr = row.get('metadataRowRenderer') or {}
4289 mrr_title = mrr.get('title')
4290 if not mrr_title:
4291 continue
4292 mrr_title = self._get_text(mrr, 'title')
4293 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4294 if mrr_title == 'License':
4295 info['license'] = mrr_contents_text
4296 elif not multiple_songs:
4297 if mrr_title == 'Album':
4298 info['album'] = mrr_contents_text
4299 elif mrr_title == 'Artist':
4300 info['artist'] = mrr_contents_text
4301 elif mrr_title == 'Song':
4302 info['track'] = mrr_contents_text
545cc85d 4303
4304 fallbacks = {
4305 'channel': 'uploader',
4306 'channel_id': 'uploader_id',
4307 'channel_url': 'uploader_url',
4308 }
992f9a73 4309
17322130 4310 # The upload date for scheduled, live and past live streams / premieres in microformats
4311 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 4312 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 4313 upload_date = (
4314 unified_strdate(get_first(microformats, 'uploadDate'))
4315 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 4316 if not upload_date or (
4d37720a 4317 live_status in ('not_live', None)
1ff88b7a 4318 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4319 ):
c26f9b99 4320 upload_date = strftime_or_none(
4321 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
17322130 4322 info['upload_date'] = upload_date
992f9a73 4323
545cc85d 4324 for to, frm in fallbacks.items():
4325 if not info.get(to):
4326 info[to] = info.get(frm)
4327
4328 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4329 v = info.get(s_k)
4330 if v:
4331 info[d_k] = v
b84071c0 4332
c26f9b99 4333 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4334
4335 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4336 or get_first(video_details, 'isPrivate', expected_type=bool))
4337
4338 info['availability'] = (
4339 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4340 else self._availability(
4341 is_private=is_private,
4342 needs_premium=(
4343 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4344 or False if initial_data and is_private is not None else None),
4345 needs_subscription=(
4346 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4347 or False if initial_data and is_private is not None else None),
4348 needs_auth=info['age_limit'] >= 18,
4349 is_unlisted=None if is_private is None else (
4350 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4351 or get_first(microformats, 'isUnlisted', expected_type=bool))))
c224251a 4352
a2160aa4 4353 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4354
11f9be09 4355 self.mark_watched(video_id, player_responses)
d77ab8e2 4356
545cc85d 4357 return info
c5e8d7af 4358
a61fd4cf 4359
a6213a49 4360class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
182bda88 4361 @staticmethod
4362 def passthrough_smuggled_data(func):
bd7e919a 4363 def _smuggle(info, smuggled_data):
4364 if info.get('_type') not in ('url', 'url_transparent'):
4365 return info
4366 if smuggled_data.get('is_music_url'):
4367 parsed_url = urllib.parse.urlparse(info['url'])
4368 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4369 smuggled_data.pop('is_music_url')
4370 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4371 if smuggled_data:
4372 info['url'] = smuggle_url(info['url'], smuggled_data)
4373 return info
182bda88 4374
4375 @functools.wraps(func)
4376 def wrapper(self, url):
4377 url, smuggled_data = unsmuggle_url(url, {})
4378 if self.is_music_url(url):
4379 smuggled_data['is_music_url'] = True
4380 info_dict = func(self, url, smuggled_data)
bd7e919a 4381 if smuggled_data:
4382 _smuggle(info_dict, smuggled_data)
4383 if info_dict.get('entries'):
a8c754cc 4384 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
182bda88 4385 return info_dict
4386 return wrapper
4387
a6213a49 4388 def _extract_channel_id(self, webpage):
4389 channel_id = self._html_search_meta(
4390 'channelId', webpage, 'channel id', default=None)
4391 if channel_id:
4392 return channel_id
4393 channel_url = self._html_search_meta(
4394 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4395 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4396 'twitter:app:url:googleplay'), webpage, 'channel url')
4397 return self._search_regex(
4398 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4399 channel_url, 'channel id')
15f6397c 4400
8bdd16b4 4401 @staticmethod
cd7c66cf 4402 def _extract_basic_item_renderer(item):
4403 # Modified from _extract_grid_item_renderer
201c1459 4404 known_basic_renderers = (
a17526e4 4405 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4406 )
4407 for key, renderer in item.items():
201c1459 4408 if not isinstance(renderer, dict):
cd7c66cf 4409 continue
201c1459 4410 elif key in known_basic_renderers:
4411 return renderer
4412 elif key.startswith('grid') and key.endswith('Renderer'):
4413 return renderer
8bdd16b4 4414
c7335551
M
4415 def _extract_channel_renderer(self, renderer):
4416 channel_id = renderer['channelId']
4417 title = self._get_text(renderer, 'title')
4418 channel_url = f'https://www.youtube.com/channel/{channel_id}'
4419 return {
4420 '_type': 'url',
4421 'url': channel_url,
4422 'id': channel_id,
4423 'ie_key': YoutubeTabIE.ie_key(),
4424 'channel': title,
4425 'channel_id': channel_id,
4426 'channel_url': channel_url,
4427 'title': title,
4428 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
4429 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4430 'playlist_count': self._get_count(renderer, 'videoCountText'),
4431 'description': self._get_text(renderer, 'descriptionSnippet'),
4432 }
4433
8bdd16b4 4434 def _grid_entries(self, grid_renderer):
4435 for item in grid_renderer['items']:
4436 if not isinstance(item, dict):
39b62db1 4437 continue
cd7c66cf 4438 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4439 if not isinstance(renderer, dict):
4440 continue
052e1350 4441 title = self._get_text(renderer, 'title')
fe93e2c4 4442
8bdd16b4 4443 # playlist
4444 playlist_id = renderer.get('playlistId')
4445 if playlist_id:
4446 yield self.url_result(
4447 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4448 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4449 video_title=title)
201c1459 4450 continue
8bdd16b4 4451 # video
4452 video_id = renderer.get('videoId')
4453 if video_id:
4454 yield self._extract_video(renderer)
201c1459 4455 continue
8bdd16b4 4456 # channel
4457 channel_id = renderer.get('channelId')
4458 if channel_id:
c7335551 4459 yield self._extract_channel_renderer(renderer)
201c1459 4460 continue
4461 # generic endpoint URL support
4462 ep_url = urljoin('https://www.youtube.com/', try_get(
4463 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4464 str))
201c1459 4465 if ep_url:
4466 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4467 if ie.suitable(ep_url):
4468 yield self.url_result(
4469 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4470 break
8bdd16b4 4471
16aa9ea4 4472 def _music_reponsive_list_entry(self, renderer):
4473 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4474 if video_id:
4475 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4476 ie=YoutubeIE.ie_key(), video_id=video_id)
4477 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4478 if playlist_id:
4479 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4480 if video_id:
4481 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4482 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4483 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4484 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4485 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4486 if browse_id:
4487 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4488 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4489
3d3dddc9 4490 def _shelf_entries_from_content(self, shelf_renderer):
4491 content = shelf_renderer.get('content')
4492 if not isinstance(content, dict):
8bdd16b4 4493 return
cd7c66cf 4494 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4495 if renderer:
4496 # TODO: add support for nested playlists so each shelf is processed
4497 # as separate playlist
4498 # TODO: this includes only first N items
86e5f3ed 4499 yield from self._grid_entries(renderer)
3d3dddc9 4500 renderer = content.get('horizontalListRenderer')
4501 if renderer:
4502 # TODO
4503 pass
8bdd16b4 4504
29f7c58a 4505 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4506 ep = try_get(
4507 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4508 str)
8bdd16b4 4509 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4510 if shelf_url:
29f7c58a 4511 # Skipping links to another channels, note that checking for
4512 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4513 # will not work
4514 if skip_channels and '/channels?' in shelf_url:
4515 return
052e1350 4516 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4517 yield self.url_result(shelf_url, video_title=title)
4518 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4519 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4520
8bdd16b4 4521 def _playlist_entries(self, video_list_renderer):
4522 for content in video_list_renderer['contents']:
4523 if not isinstance(content, dict):
4524 continue
4525 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4526 if not isinstance(renderer, dict):
4527 continue
4528 video_id = renderer.get('videoId')
4529 if not video_id:
4530 continue
4531 yield self._extract_video(renderer)
07aeced6 4532
3462ffa8 4533 def _rich_entries(self, rich_grid_renderer):
80eb0bd9 4534 renderer = traverse_obj(
4535 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
3462ffa8 4536 video_id = renderer.get('videoId')
4537 if not video_id:
4538 return
4539 yield self._extract_video(renderer)
4540
8bdd16b4 4541 def _video_entry(self, video_renderer):
4542 video_id = video_renderer.get('videoId')
4543 if video_id:
4544 return self._extract_video(video_renderer)
dacb3a86 4545
ad210f4f 4546 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4547 url = urljoin('https://youtube.com', traverse_obj(
4548 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4549 if url:
4550 return self.url_result(
4551 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4552
8bdd16b4 4553 def _post_thread_entries(self, post_thread_renderer):
4554 post_renderer = try_get(
4555 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4556 if not post_renderer:
4557 return
4558 # video attachment
4559 video_renderer = try_get(
895b0931 4560 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4561 video_id = video_renderer.get('videoId')
4562 if video_id:
4563 entry = self._extract_video(video_renderer)
8bdd16b4 4564 if entry:
4565 yield entry
895b0931 4566 # playlist attachment
4567 playlist_id = try_get(
14f25df2 4568 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4569 if playlist_id:
4570 yield self.url_result(
e28f1c0a 4571 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4572 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4573 # inline video links
4574 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4575 for run in runs:
4576 if not isinstance(run, dict):
4577 continue
4578 ep_url = try_get(
14f25df2 4579 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4580 if not ep_url:
4581 continue
4582 if not YoutubeIE.suitable(ep_url):
4583 continue
4584 ep_video_id = YoutubeIE._match_id(ep_url)
4585 if video_id == ep_video_id:
4586 continue
895b0931 4587 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4588
8bdd16b4 4589 def _post_thread_continuation_entries(self, post_thread_continuation):
4590 contents = post_thread_continuation.get('contents')
4591 if not isinstance(contents, list):
4592 return
4593 for content in contents:
4594 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4595 if isinstance(renderer, dict):
4596 yield from self._post_thread_entries(renderer)
8bdd16b4 4597 continue
6b0b0a28 4598 renderer = content.get('videoRenderer')
4599 if isinstance(renderer, dict):
4600 yield self._video_entry(renderer)
07aeced6 4601
39ed931e 4602 r''' # unused
4603 def _rich_grid_entries(self, contents):
4604 for content in contents:
4605 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4606 if video_renderer:
4607 entry = self._video_entry(video_renderer)
4608 if entry:
4609 yield entry
4610 '''
52efa4b3 4611
0a5095fe 4612 def _report_history_entries(self, renderer):
4613 for url in traverse_obj(renderer, (
7a32c70d 4614 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4615 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
0a5095fe 4616 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4617 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4618
a6213a49 4619 def _extract_entries(self, parent_renderer, continuation_list):
4620 # continuation_list is modified in-place with continuation_list = [continuation_token]
4621 continuation_list[:] = [None]
4622 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4623 for content in contents:
4624 if not isinstance(content, dict):
4625 continue
16aa9ea4 4626 is_renderer = traverse_obj(
4627 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4628 expected_type=dict)
a6213a49 4629 if not is_renderer:
0a5095fe 4630 if content.get('richItemRenderer'):
4631 for entry in self._rich_entries(content['richItemRenderer']):
a6213a49 4632 yield entry
4633 continuation_list[0] = self._extract_continuation(parent_renderer)
0a5095fe 4634 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4635 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4636 yield from self._report_history_entries(table)
4637 continuation_list[0] = self._extract_continuation(table)
a6213a49 4638 continue
0a5095fe 4639
a6213a49 4640 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4641 for isr_content in isr_contents:
4642 if not isinstance(isr_content, dict):
8bdd16b4 4643 continue
69184e41 4644
a6213a49 4645 known_renderers = {
4646 'playlistVideoListRenderer': self._playlist_entries,
4647 'gridRenderer': self._grid_entries,
a17526e4 4648 'reelShelfRenderer': self._grid_entries,
4649 'shelfRenderer': self._shelf_entries,
16aa9ea4 4650 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4651 'backstagePostThreadRenderer': self._post_thread_entries,
4652 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4653 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4654 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4655 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4656 }
4657 for key, renderer in isr_content.items():
4658 if key not in known_renderers:
4659 continue
4660 for entry in known_renderers[key](renderer):
4661 if entry:
4662 yield entry
4663 continuation_list[0] = self._extract_continuation(renderer)
4664 break
70d5c17b 4665
4666 if not continuation_list[0]:
a6213a49 4667 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4668
a6213a49 4669 if not continuation_list[0]:
4670 continuation_list[0] = self._extract_continuation(parent_renderer)
4671
4672 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4673 continuation_list = [None]
4674 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4675 tab_content = try_get(tab, lambda x: x['content'], dict)
4676 if not tab_content:
4677 return
3462ffa8 4678 parent_renderer = (
29f7c58a 4679 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4680 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4681 yield from extract_entries(parent_renderer)
3462ffa8 4682 continuation = continuation_list[0]
d069eca7 4683
8bdd16b4 4684 for page_num in itertools.count(1):
4685 if not continuation:
4686 break
99e9e001 4687 headers = self.generate_api_headers(
4688 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4689 response = self._extract_response(
86e5f3ed 4690 item_id=f'{item_id} page {page_num}',
fe93e2c4 4691 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4692 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4693
4694 if not response:
8bdd16b4 4695 break
ac56cf38 4696 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4697 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4698 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4699
a1b535bd 4700 known_renderers = {
e4b98809 4701 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4702 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4703 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4704 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4705 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4706 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4707 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
0a5095fe 4708 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4709 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
1fb53b94 4710 'playlistVideoListContinuation': (self._playlist_entries, None),
4711 'gridContinuation': (self._grid_entries, None),
4712 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4713 'sectionListContinuation': (extract_entries, None), # for feeds
a1b535bd 4714 }
1fb53b94 4715
4716 continuation_items = traverse_obj(response, (
4717 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4718 'appendContinuationItemsAction', 'continuationItems'
4719 ), 'continuationContents', get_all=False)
4720 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4721
a1b535bd 4722 video_items_renderer = None
1fb53b94 4723 for key in continuation_item.keys():
a1b535bd 4724 if key not in known_renderers:
8bdd16b4 4725 continue
1fb53b94 4726 func, parent_key = known_renderers[key]
4727 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
9ba5705a 4728 continuation_list = [None]
1fb53b94 4729 yield from func(video_items_renderer)
9ba5705a 4730 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
1fb53b94 4731
4732 if not video_items_renderer:
a1b535bd 4733 break
9558dcec 4734
8bdd16b4 4735 @staticmethod
7c219ea6 4736 def _extract_selected_tab(tabs, fatal=True):
86973308
M
4737 for tab_renderer in tabs:
4738 if tab_renderer.get('selected'):
4739 return tab_renderer
4740 if fatal:
4741 raise ExtractorError('Unable to find selected tab')
4742
4743 @staticmethod
4744 def _extract_tab_renderers(response):
4745 return traverse_obj(
4746 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
b82f815f 4747
ac56cf38 4748 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
bd7e919a 4749 metadata = self._extract_metadata_from_tabs(item_id, data)
b60419c5 4750
8bdd16b4 4751 selected_tab = self._extract_selected_tab(tabs)
bd7e919a 4752 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4753 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4754
4755 return self.playlist_result(
4756 self._entries(
4757 selected_tab, metadata['id'], ytcfg,
4758 self._extract_account_syncid(ytcfg, data),
4759 self._extract_visitor_data(data, ytcfg)),
4760 **metadata)
39ed931e 4761
bd7e919a 4762 def _extract_metadata_from_tabs(self, item_id, data):
4763 info = {'id': item_id}
4764
4765 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
6141346d 4766 if metadata_renderer:
bd7e919a 4767 info.update({
4768 'uploader': metadata_renderer.get('title'),
4769 'uploader_id': metadata_renderer.get('externalId'),
4770 'uploader_url': metadata_renderer.get('channelUrl'),
4771 })
4772 if info['uploader_id']:
4773 info['id'] = info['uploader_id']
4774 else:
4775 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
b60419c5 4776
301d07fc 4777 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4778 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4779 def _get_uncropped(url):
4780 return url_or_none((url or '').split('=')[0] + '=s0')
4781
6141346d 4782 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
301d07fc 4783 if avatar_thumbnails:
4784 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4785 if uncropped_avatar:
4786 avatar_thumbnails.append({
4787 'url': uncropped_avatar,
4788 'id': 'avatar_uncropped',
4789 'preference': 1
4790 })
4791
4792 channel_banners = self._extract_thumbnails(
bd7e919a 4793 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
301d07fc 4794 for banner in channel_banners:
4795 banner['preference'] = -10
4796
4797 if channel_banners:
4798 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4799 if uncropped_banner:
4800 channel_banners.append({
4801 'url': uncropped_banner,
4802 'id': 'banner_uncropped',
4803 'preference': -5
4804 })
4805
bd7e919a 4806 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4807 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4808 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4809
301d07fc 4810 primary_thumbnails = self._extract_thumbnails(
a17526e4 4811 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
6141346d
M
4812 playlist_thumbnails = self._extract_thumbnails(
4813 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4814
bd7e919a 4815 info.update({
4816 'title': (traverse_obj(metadata_renderer, 'title')
4817 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4818 or info['id']),
4819 'availability': self._extract_availability(data),
4820 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4821 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4822 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4823 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4824 })
f0d785d3 4825
6141346d
M
4826 # Playlist stats is a text runs array containing [video count, view count, last updated].
4827 # last updated or (view count and last updated) may be missing.
4828 playlist_stats = get_first(
bd7e919a 4829 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4830
6141346d
M
4831 last_updated_unix = self._parse_time_text(
4832 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4833 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
bd7e919a 4834 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
6141346d 4835
bd7e919a 4836 info['view_count'] = self._get_count(playlist_stats, 1)
4837 if info['view_count'] is None: # 0 is allowed
4838 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4839
4840 info['playlist_count'] = self._get_count(playlist_stats, 0)
4841 if info['playlist_count'] is None: # 0 is allowed
4842 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4843
4844 if not info.get('uploader_id'):
6141346d 4845 owner = traverse_obj(playlist_header_renderer, 'ownerText')
bd7e919a 4846 if not owner: # Deprecated
6141346d
M
4847 owner = traverse_obj(
4848 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4849 ('videoOwner', 'videoOwnerRenderer', 'title'))
4850 owner_text = self._get_text(owner)
4851 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
bd7e919a 4852 info.update({
6141346d
M
4853 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4854 'uploader_id': browse_ep.get('browseId'),
4855 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
bd7e919a 4856 })
6141346d 4857
bd7e919a 4858 info.update({
4859 'channel': info['uploader'],
4860 'channel_id': info['uploader_id'],
4861 'channel_url': info['uploader_url']
4862 })
4863 return info
73c4ac2c 4864
6e634cbe 4865 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4866 first_id = last_id = response = None
2be71994 4867 for page_num in itertools.count(1):
cd7c66cf 4868 videos = list(self._playlist_entries(playlist))
4869 if not videos:
4870 return
2be71994 4871 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4872 if start >= len(videos):
4873 return
24146491 4874 yield from videos[start:]
2be71994 4875 first_id = first_id or videos[0]['id']
4876 last_id = videos[-1]['id']
79360d99 4877 watch_endpoint = try_get(
4878 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4879 headers = self.generate_api_headers(
4880 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4881 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4882 query = {
4883 'playlistId': playlist_id,
4884 'videoId': watch_endpoint.get('videoId') or last_id,
4885 'index': watch_endpoint.get('index') or len(videos),
4886 'params': watch_endpoint.get('params') or 'OAE%3D'
4887 }
4888 response = self._extract_response(
4889 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4890 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4891 check_get_keys='contents'
4892 )
cd7c66cf 4893 playlist = try_get(
79360d99 4894 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4895
ac56cf38 4896 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4897 title = playlist.get('title') or try_get(
14f25df2 4898 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4899 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4900
4901 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4902 playlist_url = urljoin(url, try_get(
4903 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4904 str))
6e634cbe 4905
4906 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4907 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4908 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4909
4910 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4911 return self.url_result(
4912 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4913 video_title=title)
cd7c66cf 4914
8bdd16b4 4915 return self.playlist_result(
6e634cbe 4916 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4917 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4918
47193e02 4919 def _extract_availability(self, data):
4920 """
4921 Gets the availability of a given playlist/tab.
4922 Note: Unless YouTube tells us explicitly, we do not assume it is public
4923 @param data: response
4924 """
6141346d
M
4925 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4926 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4927 player_header_privacy = playlist_header_renderer.get('privacy')
c26f9b99 4928
6141346d 4929 badges = self._extract_badges(sidebar_renderer)
47193e02 4930
4931 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
6141346d
M
4932 privacy_setting_icon = get_first(
4933 (playlist_header_renderer, sidebar_renderer),
4934 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4935 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4936 expected_type=str)
4937
4938 microformats_is_unlisted = traverse_obj(
4939 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
47193e02 4940
c26f9b99 4941 return (
4942 'public' if (
4943 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4944 or player_header_privacy == 'PUBLIC'
4945 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4946 else self._availability(
4947 is_private=(
4948 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4949 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4950 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4951 is_unlisted=(
4952 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4953 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
6141346d
M
4954 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4955 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
c26f9b99 4956 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4957 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4958 needs_auth=False))
47193e02 4959
4960 @staticmethod
4961 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4962 sidebar_renderer = try_get(
4963 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4964 for item in sidebar_renderer:
4965 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4966 if renderer:
4967 return renderer
4968
ac56cf38 4969 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4970 """
6141346d 4971 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
358de58c 4972 """
6141346d
M
4973 is_playlist = bool(traverse_obj(
4974 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
4975 if not is_playlist:
47193e02 4976 return
11f9be09 4977 headers = self.generate_api_headers(
99e9e001 4978 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4979 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4980 query = {
6141346d
M
4981 'params': 'wgYCCAA=',
4982 'browseId': f'VL{item_id}'
47193e02 4983 }
4984 return self._extract_response(
4985 item_id=item_id, headers=headers, query=query,
fe93e2c4 4986 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
6141346d 4987 note='Redownloading playlist API JSON with unavailable videos')
358de58c 4988
2762dbb1 4989 @functools.cached_property
a25bca9f 4990 def skip_webpage(self):
4991 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4992
ac56cf38 4993 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4994 webpage, data = None, None
4995 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4996 try:
be5c1ae8 4997 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4998 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4999 except ExtractorError as e:
5000 if isinstance(e.cause, network_exceptions):
14f25df2 5001 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 5002 retry.error = e
5003 continue
5004 self._error_or_warning(e, fatal=fatal)
14fdfea9 5005 break
ac56cf38 5006
be5c1ae8 5007 try:
5008 self._extract_and_report_alerts(data)
5009 except ExtractorError as e:
5010 self._error_or_warning(e, fatal=fatal)
5011 break
ac56cf38 5012
be5c1ae8 5013 # Sometimes youtube returns a webpage with incomplete ytInitialData
5014 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5015 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5016 retry.error = ExtractorError('Incomplete yt initial data received')
5017 continue
ac56cf38 5018
cd7c66cf 5019 return webpage, data
5020
a25bca9f 5021 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5022 """Use if failed to extract ytcfg (and data) from initial webpage"""
5023 if not ytcfg and self.is_authenticated:
5024 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5025 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5026 raise ExtractorError(
5027 f'{msg}. If you are not downloading private content, or '
5028 'your cookies are only for the first account and channel,'
5029 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5030 expected=True)
5031 self.report_warning(msg, only_once=True)
5032
ac56cf38 5033 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5034 data = None
a25bca9f 5035 if not self.skip_webpage:
ac56cf38 5036 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5037 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 5038 # Reject webpage data if redirected to home page without explicitly requesting
86973308 5039 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
1108613f 5040 if (url != 'https://www.youtube.com/feed/recommended'
5041 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5042 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5043 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5044 if fatal:
5045 raise ExtractorError(msg, expected=True)
5046 self.report_warning(msg, only_once=True)
ac56cf38 5047 if not data:
a25bca9f 5048 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 5049 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5050 return data, ytcfg
5051
5052 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5053 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5054 resolve_response = self._extract_response(
5055 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5056 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5057 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5058 for ep_key, ep in endpoints.items():
5059 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5060 if params:
5061 return self._extract_response(
5062 item_id=item_id, query=params, ep=ep, headers=headers,
5063 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 5064 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 5065 err_note = 'Failed to resolve url (does the playlist exist?)'
5066 if fatal:
5067 raise ExtractorError(err_note, expected=True)
5068 self.report_warning(err_note, item_id)
5069
a6213a49 5070 _SEARCH_PARAMS = None
5071
af5c1c55 5072 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 5073 data = {'query': query}
5074 if params is NO_DEFAULT:
5075 params = self._SEARCH_PARAMS
5076 if params:
5077 data['params'] = params
16aa9ea4 5078
5079 content_keys = (
5080 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5081 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5082 # ytmusic search
5083 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5084 ('continuationContents', ),
5085 )
a25bca9f 5086 display_id = f'query "{query}"'
86e5f3ed 5087 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 5088 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5089 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 5090
a61fd4cf 5091 continuation_list = [None]
a25bca9f 5092 search = None
a6213a49 5093 for page_num in itertools.count(1):
a61fd4cf 5094 data.update(continuation_list[0] or {})
a25bca9f 5095 headers = self.generate_api_headers(
5096 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 5097 search = self._extract_response(
a25bca9f 5098 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5099 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 5100 slr_contents = traverse_obj(search, *content_keys)
5101 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 5102 if not continuation_list[0]:
a6213a49 5103 break
5104
5105
5106class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5107 IE_DESC = 'YouTube Tabs'
5108 _VALID_URL = r'''(?x:
5109 https?://
5110 (?:\w+\.)?
5111 (?:
5112 youtube(?:kids)?\.com|
5113 %(invidious)s
5114 )/
5115 (?:
5116 (?P<channel_type>channel|c|user|browse)/|
5117 (?P<not_channel>
5118 feed/|hashtag/|
5119 (?:playlist|watch)\?.*?\blist=
5120 )|
5121 (?!(?:%(reserved_names)s)\b) # Direct URLs
5122 )
5123 (?P<id>[^/?\#&]+)
5124 )''' % {
5125 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5126 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5127 }
5128 IE_NAME = 'youtube:tab'
5129
5130 _TESTS = [{
5131 'note': 'playlists, multipage',
5132 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5133 'playlist_mincount': 94,
5134 'info_dict': {
5135 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5136 'title': 'Igor Kleiner - Playlists',
a6213a49 5137 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 5138 'uploader': 'Igor Kleiner',
a6213a49 5139 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5140 'channel': 'Igor Kleiner',
5141 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5142 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5143 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5144 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5145 'channel_follower_count': int
a6213a49 5146 },
5147 }, {
5148 'note': 'playlists, multipage, different order',
5149 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5150 'playlist_mincount': 94,
5151 'info_dict': {
5152 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5153 'title': 'Igor Kleiner - Playlists',
a6213a49 5154 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5155 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 5156 'uploader': 'Igor Kleiner',
5157 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5158 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5159 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5160 'channel': 'Igor Kleiner',
5161 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 5162 'channel_follower_count': int
a6213a49 5163 },
5164 }, {
5165 'note': 'playlists, series',
5166 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5167 'playlist_mincount': 5,
5168 'info_dict': {
5169 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5170 'title': '3Blue1Brown - Playlists',
5171 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5172 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5173 'uploader': '3Blue1Brown',
976ae3ea 5174 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5175 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5176 'channel': '3Blue1Brown',
5177 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5178 'tags': ['Mathematics'],
6c73052c 5179 'channel_follower_count': int
a6213a49 5180 },
5181 }, {
5182 'note': 'playlists, singlepage',
5183 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5184 'playlist_mincount': 4,
5185 'info_dict': {
5186 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5187 'title': 'ThirstForScience - Playlists',
5188 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5189 'uploader': 'ThirstForScience',
5190 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 5191 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5192 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5193 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5194 'tags': 'count:13',
5195 'channel': 'ThirstForScience',
6c73052c 5196 'channel_follower_count': int
a6213a49 5197 }
5198 }, {
5199 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5200 'only_matching': True,
5201 }, {
5202 'note': 'basic, single video playlist',
5203 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5204 'info_dict': {
5205 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5206 'uploader': 'Sergey M.',
5207 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5208 'title': 'youtube-dl public playlist',
976ae3ea 5209 'description': '',
5210 'tags': [],
5211 'view_count': int,
5212 'modified_date': '20201130',
5213 'channel': 'Sergey M.',
5214 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5215 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5216 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5217 'availability': 'public',
a6213a49 5218 },
5219 'playlist_count': 1,
5220 }, {
5221 'note': 'empty playlist',
5222 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5223 'info_dict': {
5224 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5225 'uploader': 'Sergey M.',
5226 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5227 'title': 'youtube-dl empty playlist',
976ae3ea 5228 'tags': [],
5229 'channel': 'Sergey M.',
5230 'description': '',
5231 'modified_date': '20160902',
5232 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5233 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5234 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
c26f9b99 5235 'availability': 'public',
a6213a49 5236 },
5237 'playlist_count': 0,
5238 }, {
5239 'note': 'Home tab',
5240 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5241 'info_dict': {
5242 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5243 'title': 'lex will - Home',
5244 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5245 'uploader': 'lex will',
5246 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5247 'channel': 'lex will',
5248 'tags': ['bible', 'history', 'prophesy'],
5249 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5250 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5251 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5252 'channel_follower_count': int
a6213a49 5253 },
5254 'playlist_mincount': 2,
5255 }, {
5256 'note': 'Videos tab',
5257 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5258 'info_dict': {
5259 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5260 'title': 'lex will - Videos',
5261 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5262 'uploader': 'lex will',
5263 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5264 'tags': ['bible', 'history', 'prophesy'],
5265 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5266 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5267 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5268 'channel': 'lex will',
6c73052c 5269 'channel_follower_count': int
a6213a49 5270 },
5271 'playlist_mincount': 975,
5272 }, {
5273 'note': 'Videos tab, sorted by popular',
5274 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5275 'info_dict': {
5276 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5277 'title': 'lex will - Videos',
5278 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5279 'uploader': 'lex will',
5280 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5281 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5282 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5283 'channel': 'lex will',
5284 'tags': ['bible', 'history', 'prophesy'],
5285 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 5286 'channel_follower_count': int
a6213a49 5287 },
5288 'playlist_mincount': 199,
5289 }, {
5290 'note': 'Playlists tab',
5291 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5292 'info_dict': {
5293 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5294 'title': 'lex will - Playlists',
5295 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5296 'uploader': 'lex will',
5297 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5298 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5299 'channel': 'lex will',
5300 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5301 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5302 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5303 'channel_follower_count': int
a6213a49 5304 },
5305 'playlist_mincount': 17,
5306 }, {
5307 'note': 'Community tab',
5308 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5309 'info_dict': {
5310 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5311 'title': 'lex will - Community',
5312 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5313 'uploader': 'lex will',
5314 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5315 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5316 'channel': 'lex will',
5317 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5318 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5319 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5320 'channel_follower_count': int
a6213a49 5321 },
5322 'playlist_mincount': 18,
5323 }, {
5324 'note': 'Channels tab',
5325 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5326 'info_dict': {
5327 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5328 'title': 'lex will - Channels',
5329 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5330 'uploader': 'lex will',
5331 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 5332 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5333 'channel': 'lex will',
5334 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5335 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5336 'tags': ['bible', 'history', 'prophesy'],
6c73052c 5337 'channel_follower_count': int
a6213a49 5338 },
5339 'playlist_mincount': 12,
5340 }, {
5341 'note': 'Search tab',
5342 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5343 'playlist_mincount': 40,
5344 'info_dict': {
5345 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5346 'title': '3Blue1Brown - Search - linear algebra',
5347 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5348 'uploader': '3Blue1Brown',
5349 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 5350 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5351 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5352 'tags': ['Mathematics'],
5353 'channel': '3Blue1Brown',
5354 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 5355 'channel_follower_count': int
a6213a49 5356 },
5357 }, {
5358 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5359 'only_matching': True,
5360 }, {
5361 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5362 'only_matching': True,
5363 }, {
5364 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5365 'only_matching': True,
5366 }, {
5367 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5368 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5369 'info_dict': {
5370 'title': '29C3: Not my department',
5371 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5372 'uploader': 'Christiaan008',
5373 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5374 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5375 'tags': [],
5376 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5377 'view_count': int,
5378 'modified_date': '20150605',
5379 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5380 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5381 'channel': 'Christiaan008',
c26f9b99 5382 'availability': 'public',
a6213a49 5383 },
5384 'playlist_count': 96,
5385 }, {
5386 'note': 'Large playlist',
5387 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5388 'info_dict': {
5389 'title': 'Uploads from Cauchemar',
5390 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5391 'uploader': 'Cauchemar',
5392 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5393 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5394 'tags': [],
5395 'modified_date': r're:\d{8}',
5396 'channel': 'Cauchemar',
5397 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5398 'view_count': int,
5399 'description': '',
5400 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
c26f9b99 5401 'availability': 'public',
a6213a49 5402 },
5403 'playlist_mincount': 1123,
976ae3ea 5404 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5405 }, {
5406 'note': 'even larger playlist, 8832 videos',
5407 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5408 'only_matching': True,
5409 }, {
5410 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5411 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5412 'info_dict': {
5413 'title': 'Uploads from Interstellar Movie',
5414 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5415 'uploader': 'Interstellar Movie',
5416 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5417 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5418 'tags': [],
5419 'view_count': int,
5420 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5421 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5422 'channel': 'Interstellar Movie',
5423 'description': '',
5424 'modified_date': r're:\d{8}',
c26f9b99 5425 'availability': 'public',
a6213a49 5426 },
5427 'playlist_mincount': 21,
5428 }, {
5429 'note': 'Playlist with "show unavailable videos" button',
5430 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5431 'info_dict': {
5432 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5433 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5434 'uploader': 'Phim Siêu Nhân Nhật Bản',
5435 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5436 'view_count': int,
5437 'channel': 'Phim Siêu Nhân Nhật Bản',
5438 'tags': [],
5439 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5440 'description': '',
5441 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5442 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5443 'modified_date': r're:\d{8}',
c26f9b99 5444 'availability': 'public',
a6213a49 5445 },
5446 'playlist_mincount': 200,
976ae3ea 5447 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5448 }, {
5449 'note': 'Playlist with unavailable videos in page 7',
5450 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5451 'info_dict': {
5452 'title': 'Uploads from BlankTV',
5453 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5454 'uploader': 'BlankTV',
5455 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5456 'channel': 'BlankTV',
5457 'channel_url': 'https://www.youtube.com/c/blanktv',
5458 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5459 'view_count': int,
5460 'tags': [],
5461 'uploader_url': 'https://www.youtube.com/c/blanktv',
5462 'modified_date': r're:\d{8}',
5463 'description': '',
c26f9b99 5464 'availability': 'public',
a6213a49 5465 },
5466 'playlist_mincount': 1000,
976ae3ea 5467 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5468 }, {
5469 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5470 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5471 'info_dict': {
5472 'title': 'Data Analysis with Dr Mike Pound',
5473 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5474 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5475 'uploader': 'Computerphile',
5476 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5477 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5478 'tags': [],
5479 'view_count': int,
5480 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5481 'channel_url': 'https://www.youtube.com/user/Computerphile',
5482 'channel': 'Computerphile',
c26f9b99 5483 'availability': 'public',
6141346d 5484 'modified_date': '20190712',
a6213a49 5485 },
5486 'playlist_mincount': 11,
5487 }, {
5488 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5489 'only_matching': True,
5490 }, {
5491 'note': 'Playlist URL that does not actually serve a playlist',
5492 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5493 'info_dict': {
5494 'id': 'FqZTN594JQw',
5495 'ext': 'webm',
5496 'title': "Smiley's People 01 detective, Adventure Series, Action",
5497 'uploader': 'STREEM',
5498 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5499 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5500 'upload_date': '20150526',
5501 'license': 'Standard YouTube License',
5502 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5503 'categories': ['People & Blogs'],
5504 'tags': list,
5505 'view_count': int,
5506 'like_count': int,
a6213a49 5507 },
5508 'params': {
5509 'skip_download': True,
5510 },
5511 'skip': 'This video is not available.',
5512 'add_ie': [YoutubeIE.ie_key()],
5513 }, {
5514 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5515 'only_matching': True,
5516 }, {
5517 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5518 'only_matching': True,
5519 }, {
5520 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5521 'info_dict': {
12a1b225 5522 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5523 'ext': 'mp4',
976ae3ea 5524 'title': str,
a6213a49 5525 'uploader': 'Sky News',
5526 'uploader_id': 'skynews',
5527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5528 'upload_date': r're:\d{8}',
976ae3ea 5529 'description': str,
a6213a49 5530 'categories': ['News & Politics'],
5531 'tags': list,
5532 'like_count': int,
86973308 5533 'release_timestamp': int,
976ae3ea 5534 'channel': 'Sky News',
5535 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5536 'age_limit': 0,
5537 'view_count': int,
86973308 5538 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
976ae3ea 5539 'playable_in_embed': True,
86973308 5540 'release_date': r're:\d+',
976ae3ea 5541 'availability': 'public',
5542 'live_status': 'is_live',
5543 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
86973308
M
5544 'channel_follower_count': int,
5545 'concurrent_view_count': int,
a6213a49 5546 },
5547 'params': {
5548 'skip_download': True,
5549 },
976ae3ea 5550 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5551 }, {
5552 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5553 'info_dict': {
5554 'id': 'a48o2S1cPoo',
5555 'ext': 'mp4',
5556 'title': 'The Young Turks - Live Main Show',
5557 'uploader': 'The Young Turks',
5558 'uploader_id': 'TheYoungTurks',
5559 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5560 'upload_date': '20150715',
5561 'license': 'Standard YouTube License',
5562 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5563 'categories': ['News & Politics'],
5564 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5565 'like_count': int,
a6213a49 5566 },
5567 'params': {
5568 'skip_download': True,
5569 },
5570 'only_matching': True,
5571 }, {
5572 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5573 'only_matching': True,
5574 }, {
5575 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5576 'only_matching': True,
5577 }, {
5578 'note': 'A channel that is not live. Should raise error',
5579 'url': 'https://www.youtube.com/user/numberphile/live',
5580 'only_matching': True,
5581 }, {
5582 'url': 'https://www.youtube.com/feed/trending',
5583 'only_matching': True,
5584 }, {
5585 'url': 'https://www.youtube.com/feed/library',
5586 'only_matching': True,
5587 }, {
5588 'url': 'https://www.youtube.com/feed/history',
5589 'only_matching': True,
5590 }, {
5591 'url': 'https://www.youtube.com/feed/subscriptions',
5592 'only_matching': True,
5593 }, {
5594 'url': 'https://www.youtube.com/feed/watch_later',
5595 'only_matching': True,
5596 }, {
5597 'note': 'Recommended - redirects to home page.',
5598 'url': 'https://www.youtube.com/feed/recommended',
5599 'only_matching': True,
5600 }, {
5601 'note': 'inline playlist with not always working continuations',
5602 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5603 'only_matching': True,
5604 }, {
5605 'url': 'https://www.youtube.com/course',
5606 'only_matching': True,
5607 }, {
5608 'url': 'https://www.youtube.com/zsecurity',
5609 'only_matching': True,
5610 }, {
5611 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5612 'only_matching': True,
5613 }, {
5614 'url': 'https://www.youtube.com/TheYoungTurks/live',
5615 'only_matching': True,
5616 }, {
5617 'url': 'https://www.youtube.com/hashtag/cctv9',
5618 'info_dict': {
5619 'id': 'cctv9',
5620 'title': '#cctv9',
976ae3ea 5621 'tags': [],
a6213a49 5622 },
4dc23a80 5623 'playlist_mincount': 300, # not consistent but should be over 300
a6213a49 5624 }, {
5625 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5626 'only_matching': True,
5627 }, {
5628 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5629 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5630 'only_matching': True
5631 }, {
5632 'note': '/browse/ should redirect to /channel/',
5633 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5634 'only_matching': True
5635 }, {
5636 'note': 'VLPL, should redirect to playlist?list=PL...',
5637 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5638 'info_dict': {
5639 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5640 'uploader': 'NoCopyrightSounds',
5641 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5642 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5643 'title': 'NCS : All Releases 💿',
976ae3ea 5644 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5645 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5646 'modified_date': r're:\d{8}',
5647 'view_count': int,
5648 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5649 'tags': [],
5650 'channel': 'NoCopyrightSounds',
c26f9b99 5651 'availability': 'public',
a6213a49 5652 },
5653 'playlist_mincount': 166,
976ae3ea 5654 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5655 }, {
5656 'note': 'Topic, should redirect to playlist?list=UU...',
5657 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5658 'info_dict': {
5659 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5660 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5661 'title': 'Uploads from Royalty Free Music - Topic',
5662 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5663 'tags': [],
5664 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5665 'channel': 'Royalty Free Music - Topic',
5666 'view_count': int,
5667 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5668 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5669 'modified_date': r're:\d{8}',
5670 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5671 'description': '',
c26f9b99 5672 'availability': 'public',
a6213a49 5673 },
a6213a49 5674 'playlist_mincount': 101,
5675 }, {
86973308
M
5676 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5677 # Treat as a general feed
a6213a49 5678 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5679 'info_dict': {
5680 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5681 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5682 'tags': [],
a6213a49 5683 },
a6213a49 5684 'playlist_mincount': 9,
5685 }, {
5686 'note': 'Youtube music Album',
5687 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5688 'info_dict': {
5689 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5690 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5691 'tags': [],
5692 'view_count': int,
5693 'description': '',
5694 'availability': 'unlisted',
5695 'modified_date': r're:\d{8}',
a6213a49 5696 },
5697 'playlist_count': 50,
5698 }, {
5699 'note': 'unlisted single video playlist',
5700 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5701 'info_dict': {
5702 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5703 'uploader': 'colethedj',
5704 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5705 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5706 'availability': 'unlisted',
5707 'tags': [],
12a1b225 5708 'modified_date': '20220418',
976ae3ea 5709 'channel': 'colethedj',
5710 'view_count': int,
5711 'description': '',
5712 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5713 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5714 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5715 },
5716 'playlist_count': 1,
5717 }, {
5718 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5719 'url': 'https://www.youtube.com/feed/recommended',
5720 'info_dict': {
5721 'id': 'recommended',
5722 'title': 'recommended',
6c73052c 5723 'tags': [],
a6213a49 5724 },
5725 'playlist_mincount': 50,
5726 'params': {
5727 'skip_download': True,
5728 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5729 },
5730 }, {
5731 'note': 'API Fallback: /videos tab, sorted by oldest first',
5732 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5733 'info_dict': {
5734 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5735 'title': 'Cody\'sLab - Videos',
5736 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5737 'uploader': 'Cody\'sLab',
5738 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5739 'channel': 'Cody\'sLab',
5740 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5741 'tags': [],
5742 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5743 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5744 'channel_follower_count': int
a6213a49 5745 },
5746 'playlist_mincount': 650,
5747 'params': {
5748 'skip_download': True,
5749 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5750 },
86973308 5751 'skip': 'Query for sorting no longer works',
a6213a49 5752 }, {
5753 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5754 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5755 'info_dict': {
5756 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5757 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5758 'title': 'Uploads from Royalty Free Music - Topic',
5759 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5760 'modified_date': r're:\d{8}',
5761 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5762 'description': '',
5763 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5764 'tags': [],
5765 'channel': 'Royalty Free Music - Topic',
5766 'view_count': int,
5767 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
c26f9b99 5768 'availability': 'public',
a6213a49 5769 },
a6213a49 5770 'playlist_mincount': 101,
5771 'params': {
5772 'skip_download': True,
5773 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5774 },
7c219ea6 5775 }, {
5776 'note': 'non-standard redirect to regional channel',
5777 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5778 'only_matching': True
61d3665d 5779 }, {
5780 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5781 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5782 'info_dict': {
5783 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5784 'modified_date': '20220407',
5785 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5786 'tags': [],
5787 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5788 'uploader': 'pukkandan',
5789 'availability': 'unlisted',
5790 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5791 'channel': 'pukkandan',
5792 'description': 'Test for collaborative playlist',
5793 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5794 'view_count': int,
61d3665d 5795 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5796 },
5797 'playlist_mincount': 2
c26f9b99 5798 }, {
5799 'note': 'translated tab name',
5800 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5801 'info_dict': {
5802 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5803 'tags': [],
5804 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5805 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
4dc23a80 5806 'description': 'test description',
c26f9b99 5807 'title': 'cole-dlp-test-acc - 再生リスト',
5808 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5809 'uploader': 'cole-dlp-test-acc',
5810 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5811 'channel': 'cole-dlp-test-acc',
5812 },
5813 'playlist_mincount': 1,
5814 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5815 'expected_warnings': ['Preferring "ja"'],
5816 }, {
5817 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5818 'note': 'preferred lang set with playlist with translated video titles',
5819 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5820 'info_dict': {
5821 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5822 'tags': [],
5823 'view_count': int,
5824 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5825 'uploader': 'cole-dlp-test-acc',
5826 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5827 'channel': 'cole-dlp-test-acc',
5828 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5829 'description': 'test',
5830 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5831 'title': 'dlp test playlist',
5832 'availability': 'public',
5833 },
5834 'playlist_mincount': 1,
5835 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5836 'expected_warnings': ['Preferring "ja"'],
80eb0bd9 5837 }, {
5838 # shorts audio pivot for 2GtVksBMYFM.
5839 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5840 'info_dict': {
5841 'id': 'sfv_audio_pivot',
5842 'title': 'sfv_audio_pivot',
5843 'tags': [],
5844 },
5845 'playlist_mincount': 50,
5846
86973308
M
5847 }, {
5848 # Channel with a real live tab (not to be mistaken with streams tab)
5849 # Do not treat like it should redirect to live stream
5850 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5851 'info_dict': {
5852 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5853 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5854 'tags': [],
5855 },
5856 'playlist_mincount': 20,
5857 }, {
5858 # Tab name is not the same as tab id
5859 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5860 'info_dict': {
5861 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5862 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5863 'tags': [],
5864 },
5865 'playlist_mincount': 8,
5866 }, {
5867 # Home tab id is literally home. Not to get mistaken with featured
5868 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5869 'info_dict': {
5870 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5871 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5872 'tags': [],
5873 },
5874 'playlist_mincount': 8,
5875 }, {
5876 # Should get three playlists for videos, shorts and streams tabs
5877 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5878 'info_dict': {
5879 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
bd7e919a 5880 'title': 'Polka Ch. 尾丸ポルカ',
5881 'channel_follower_count': int,
5882 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5883 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5884 'uploader': 'Polka Ch. 尾丸ポルカ',
5885 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5886 'channel': 'Polka Ch. 尾丸ポルカ',
5887 'tags': 'count:35',
5888 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5889 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
86973308
M
5890 },
5891 'playlist_count': 3,
5892 }, {
5893 # Shorts tab with channel with handle
5894 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5895 'info_dict': {
5896 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5897 'title': 'Not Just Bikes - Shorts',
5898 'tags': 'count:12',
5899 'uploader': 'Not Just Bikes',
5900 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5901 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5902 'channel_follower_count': int,
5903 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5904 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5905 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5906 'channel': 'Not Just Bikes',
5907 },
5908 'playlist_mincount': 10,
5909 }, {
5910 # Streams tab
5911 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5912 'info_dict': {
5913 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5914 'title': '中村悠一 - Live',
5915 'tags': 'count:7',
5916 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5917 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5918 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5919 'channel': '中村悠一',
5920 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5921 'channel_follower_count': int,
5922 'uploader': '中村悠一',
5923 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5924 },
5925 'playlist_mincount': 60,
5926 }, {
5927 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5928 # See test_youtube_lists
5929 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5930 'only_matching': True,
5931 }, {
5932 # No uploads and no UCID given. Should fail with no uploads error
5933 # See test_youtube_lists
5934 'url': 'https://www.youtube.com/news',
5935 'only_matching': True
5936 }, {
5937 # No videos tab but has a shorts tab
5938 'url': 'https://www.youtube.com/c/TKFShorts',
5939 'info_dict': {
5940 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5941 'title': 'Shorts Break - Shorts',
5942 'tags': 'count:32',
5943 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5944 'channel': 'Shorts Break',
5945 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5946 'uploader': 'Shorts Break',
5947 'channel_follower_count': int,
5948 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5949 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5950 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5951 },
5952 'playlist_mincount': 30,
5953 }, {
5954 # Trending Now Tab. tab id is empty
5955 'url': 'https://www.youtube.com/feed/trending',
5956 'info_dict': {
5957 'id': 'trending',
5958 'title': 'trending - Now',
5959 'tags': [],
5960 },
5961 'playlist_mincount': 30,
5962 }, {
5963 # Trending Gaming Tab. tab id is empty
5964 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5965 'info_dict': {
5966 'id': 'trending',
5967 'title': 'trending - Gaming',
5968 'tags': [],
5969 },
5970 'playlist_mincount': 30,
4dc23a80
M
5971 }, {
5972 # Shorts url result in shorts tab
5973 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
5974 'info_dict': {
5975 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5976 'title': 'cole-dlp-test-acc - Shorts',
5977 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5978 'channel': 'cole-dlp-test-acc',
4dc23a80
M
5979 'description': 'test description',
5980 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5981 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5982 'tags': [],
5983 'uploader': 'cole-dlp-test-acc',
5984 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5985
5986 },
5987 'playlist': [{
5988 'info_dict': {
5989 '_type': 'url',
5990 'ie_key': 'Youtube',
5991 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
5992 'id': 'sSM9J5YH_60',
5993 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5994 'title': 'SHORT short',
5995 'channel': 'cole-dlp-test-acc',
5996 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5997 'view_count': int,
5998 'thumbnails': list,
5999 }
6000 }],
6001 'params': {'extract_flat': True},
6002 }, {
6003 # Live video status should be extracted
6004 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6005 'info_dict': {
6006 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6007 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6008 'tags': []
6009 },
6010 'playlist': [{
6011 'info_dict': {
6012 '_type': 'url',
6013 'ie_key': 'Youtube',
6014 'url': 'startswith:https://www.youtube.com/watch?v=',
6015 'id': str,
6016 'title': str,
6017 'live_status': 'is_live',
6018 'channel_id': str,
6019 'channel_url': str,
6020 'concurrent_view_count': int,
6021 'channel': str,
6022 }
6023 }],
c7335551 6024 'params': {'extract_flat': True, 'playlist_items': '1'},
4dc23a80 6025 'playlist_mincount': 1
c7335551
M
6026 }, {
6027 # Channel renderer metadata. Contains number of videos on the channel
6028 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6029 'info_dict': {
6030 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6031 'title': 'cole-dlp-test-acc - Channels',
6032 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6033 'channel': 'cole-dlp-test-acc',
6034 'description': 'test description',
6035 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6036 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6037 'tags': [],
6038 'uploader': 'cole-dlp-test-acc',
6039 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6040
6041 },
6042 'playlist': [{
6043 'info_dict': {
6044 '_type': 'url',
6045 'ie_key': 'YoutubeTab',
6046 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6047 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6048 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6049 'title': 'PewDiePie',
6050 'channel': 'PewDiePie',
6051 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6052 'thumbnails': list,
6053 'channel_follower_count': int,
6054 'playlist_count': int
6055 }
6056 }],
6057 'params': {'extract_flat': True},
a6213a49 6058 }]
6059
6060 @classmethod
6061 def suitable(cls, url):
86e5f3ed 6062 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 6063
86973308
M
6064 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6065
6066 def _get_url_mobj(self, url):
6067 mobj = self._URL_RE.match(url).groupdict()
6068 mobj.update((k, '') for k, v in mobj.items() if v is None)
6069 return mobj
6070
6071 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6072 tab_name = (tab.get('title') or '').lower()
6073 tab_url = urljoin(base_url, traverse_obj(
6074 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6075
bd7e919a 6076 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6077 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
86973308 6078 if tab_id:
bd7e919a 6079 return {
6080 'TAB_ID_SPONSORSHIPS': 'membership',
6081 }.get(tab_id, tab_id), tab_name
86973308
M
6082
6083 # Fallback to tab name if we cannot get the tab id.
6084 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6085 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
bd7e919a 6086 if tab_name:
6087 self.write_debug(f'Falling back to selected tab name: {tab_name}')
86973308
M
6088 return {
6089 'home': 'featured',
6090 'live': 'streams',
6091 }.get(tab_name, tab_name), tab_name
6092
6093 def _has_tab(self, tabs, tab_id):
6094 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
fe03a6cd 6095
182bda88 6096 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6097 def _real_extract(self, url, smuggled_data):
cd7c66cf 6098 item_id = self._match_id(url)
14f25df2 6099 url = urllib.parse.urlunparse(
6100 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 6101 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 6102
86973308
M
6103 mobj = self._get_url_mobj(url)
6104 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
bd7e919a 6105 if is_channel and smuggled_data.get('is_music_url'):
6106 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6107 return self.url_result(
6108 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6109 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6110 mdata = self._extract_tab_endpoint(
6111 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6112 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6113 get_all=False, expected_type=str)
6114 if not murl:
6115 raise ExtractorError('Failed to resolve album to playlist')
6116 return self.url_result(murl, YoutubeTabIE)
6117 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6118 return self.url_result(
6119 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6120
6121 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
fe03a6cd 6122 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6123 url = f'{pre}/videos{post}'
cd7c66cf 6124
6125 # Handle both video/playlist URLs
201c1459 6126 qs = parse_qs(url)
bd7e919a 6127 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
fe03a6cd 6128 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 6129 if not playlist_id:
fe03a6cd 6130 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
bd7e919a 6131 raise ExtractorError('A video URL was given without video ID', expected=True)
fe03a6cd 6132 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 6133 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
bd7e919a 6134 return self.url_result(
6135 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
cd7c66cf 6136
86973308
M
6137 if not self._yes_playlist(playlist_id, video_id):
6138 return self.url_result(
6139 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6140
bd7e919a 6141 data, ytcfg = self._extract_data(url, display_id)
14fdfea9 6142
7c219ea6 6143 # YouTube may provide a non-standard redirect to the regional channel
6144 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
86973308 6145 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7c219ea6 6146 redirect_url = traverse_obj(
6147 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6148 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
bd7e919a 6149 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
86973308
M
6150 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6151 return self.url_result(redirect_url, YoutubeTabIE)
7c219ea6 6152
bd7e919a 6153 tabs, extra_tabs = self._extract_tab_renderers(data), []
86973308 6154 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
18db7548 6155 selected_tab = self._extract_selected_tab(tabs)
86973308
M
6156 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6157 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6158
6159 if not original_tab_id and selected_tab_name:
bd7e919a 6160 self.to_screen('Downloading all uploads of the channel. '
86973308
M
6161 'To download only the videos in a specific tab, pass the tab\'s URL')
6162 if self._has_tab(tabs, 'streams'):
bd7e919a 6163 extra_tabs.append(''.join((pre, '/streams', post)))
86973308 6164 if self._has_tab(tabs, 'shorts'):
bd7e919a 6165 extra_tabs.append(''.join((pre, '/shorts', post)))
86973308
M
6166 # XXX: Members-only tab should also be extracted
6167
bd7e919a 6168 if not extra_tabs and selected_tab_id != 'videos':
86973308
M
6169 # Channel does not have streams, shorts or videos tabs
6170 if item_id[:2] != 'UC':
6171 raise ExtractorError('This channel has no uploads', expected=True)
6172
6173 # Topic channels don't have /videos. Use the equivalent playlist instead
6174 pl_id = f'UU{item_id[2:]}'
6175 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6176 try:
6177 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6178 except ExtractorError:
6179 raise ExtractorError('This channel has no uploads', expected=True)
64f36541 6180 else:
86973308
M
6181 item_id, url = pl_id, pl_url
6182 self.to_screen(
6183 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6184
bd7e919a 6185 elif extra_tabs and selected_tab_id != 'videos':
86973308 6186 # When there are shorts/live tabs but not videos tab
bd7e919a 6187 url, data = f'{pre}{post}', None
86973308
M
6188
6189 elif (original_tab_id or 'videos') != selected_tab_id:
6190 if original_tab_id == 'live':
6191 # Live tab should have redirected to the video
6192 # Except in the case the channel has an actual live tab
6193 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
bd7e919a 6194 raise UserNotLive(video_id=item_id)
86973308
M
6195 elif selected_tab_name:
6196 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6197
6198 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6199 url = f'{pre}{post}'
18db7548 6200
358de58c 6201 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 6202 if 'no-youtube-unavailable-videos' not in compat_opts:
bd7e919a 6203 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
c0ac49bc 6204 self._extract_and_report_alerts(data, only_once=True)
86973308 6205
bd7e919a 6206 tabs, entries = self._extract_tab_renderers(data), []
8bdd16b4 6207 if tabs:
bd7e919a 6208 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6209 entries[0].update({
86973308
M
6210 'extractor_key': YoutubeTabIE.ie_key(),
6211 'extractor': YoutubeTabIE.IE_NAME,
6212 'webpage_url': url,
6213 })
bd7e919a 6214 if self.get_param('playlist_items') == '0':
6215 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6216 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6217 entries.extend(map(self._real_extract, extra_tabs))
6218
6219 if len(entries) == 1:
6220 return entries[0]
6221 elif entries:
6222 metadata = self._extract_metadata_from_tabs(item_id, data)
6223 uploads_url = 'the Uploads (UU) playlist URL'
6224 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6225 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6226 self.to_screen(
6227 'Downloading as multiple playlists, separated by tabs. '
6228 f'To download as a single playlist instead, pass {uploads_url}')
6229 return self.playlist_result(entries, item_id, **metadata)
6230
6231 # Inline playlist
37e57a9f 6232 playlist = traverse_obj(
6233 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 6234 if playlist:
ac56cf38 6235 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 6236
37e57a9f 6237 video_id = traverse_obj(
6238 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 6239 if video_id:
bd7e919a 6240 if tab != '/live': # live tab is expected to redirect to video
37e57a9f 6241 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
86973308 6242 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
cd7c66cf 6243
8bdd16b4 6244 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 6245
c5e8d7af 6246
8bdd16b4 6247class YoutubePlaylistIE(InfoExtractor):
96565c7e 6248 IE_DESC = 'YouTube playlists'
8bdd16b4 6249 _VALID_URL = r'''(?x)(?:
6250 (?:https?://)?
6251 (?:\w+\.)?
6252 (?:
6253 (?:
6254 youtube(?:kids)?\.com|
d9190e44 6255 %(invidious)s
8bdd16b4 6256 )
6257 /.*?\?.*?\blist=
6258 )?
6259 (?P<id>%(playlist_id)s)
d9190e44
RH
6260 )''' % {
6261 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6262 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6263 }
8bdd16b4 6264 IE_NAME = 'youtube:playlist'
cdc628a4 6265 _TESTS = [{
8bdd16b4 6266 'note': 'issue #673',
6267 'url': 'PLBB231211A4F62143',
cdc628a4 6268 'info_dict': {
8bdd16b4 6269 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6270 'id': 'PLBB231211A4F62143',
976ae3ea 6271 'uploader': 'Wickman',
8bdd16b4 6272 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 6273 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 6274 'view_count': int,
86973308 6275 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
976ae3ea 6276 'modified_date': r're:\d{8}',
6277 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6278 'channel': 'Wickman',
6279 'tags': [],
86973308
M
6280 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6281 'availability': 'public',
8bdd16b4 6282 },
6283 'playlist_mincount': 29,
6284 }, {
6285 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6286 'info_dict': {
6287 'title': 'YDL_safe_search',
6288 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6289 },
6290 'playlist_count': 2,
6291 'skip': 'This playlist is private',
9558dcec 6292 }, {
8bdd16b4 6293 'note': 'embedded',
6294 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6295 'playlist_count': 4,
9558dcec 6296 'info_dict': {
8bdd16b4 6297 'title': 'JODA15',
6298 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6299 'uploader': 'milan',
6300 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 6301 'description': '',
6302 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6303 'tags': [],
6304 'modified_date': '20140919',
6305 'view_count': int,
6306 'channel': 'milan',
6307 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6308 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
c26f9b99 6309 'availability': 'public',
976ae3ea 6310 },
86973308 6311 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
cdc628a4 6312 }, {
8bdd16b4 6313 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 6314 'playlist_mincount': 455,
8bdd16b4 6315 'info_dict': {
6316 'title': '2018 Chinese New Singles (11/6 updated)',
6317 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6318 'uploader': 'LBK',
6319 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 6320 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 6321 'channel': 'LBK',
6322 'view_count': int,
6323 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6324 'tags': [],
6325 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6326 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6327 'modified_date': r're:\d{8}',
c26f9b99 6328 'availability': 'public',
976ae3ea 6329 },
6330 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 6331 }, {
29f7c58a 6332 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6333 'only_matching': True,
6334 }, {
6335 # music album playlist
6336 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6337 'only_matching': True,
6338 }]
6339
6340 @classmethod
6341 def suitable(cls, url):
201c1459 6342 if YoutubeTabIE.suitable(url):
6343 return False
49a57e70 6344 from ..utils import parse_qs
201c1459 6345 qs = parse_qs(url)
6346 if qs.get('v', [None])[0]:
6347 return False
86e5f3ed 6348 return super().suitable(url)
29f7c58a 6349
6350 def _real_extract(self, url):
6351 playlist_id = self._match_id(url)
46953e7e 6352 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 6353 url = update_url_query(
6354 'https://www.youtube.com/playlist',
6355 parse_qs(url) or {'list': playlist_id})
6356 if is_music_url:
6357 url = smuggle_url(url, {'is_music_url': True})
6358 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 6359
6360
6361class YoutubeYtBeIE(InfoExtractor):
c76eb41b 6362 IE_DESC = 'youtu.be'
29f7c58a 6363 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6364 _TESTS = [{
8bdd16b4 6365 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6366 'info_dict': {
6367 'id': 'yeWKywCrFtk',
6368 'ext': 'mp4',
6369 'title': 'Small Scale Baler and Braiding Rugs',
6370 'uploader': 'Backus-Page House Museum',
6371 'uploader_id': 'backuspagemuseum',
6372 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6373 'upload_date': '20161008',
6374 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6375 'categories': ['Nonprofits & Activism'],
6376 'tags': list,
6377 'like_count': int,
976ae3ea 6378 'age_limit': 0,
6379 'playable_in_embed': True,
6380 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6381 'channel': 'Backus-Page House Museum',
6382 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6383 'live_status': 'not_live',
6384 'view_count': int,
6385 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6386 'availability': 'public',
6387 'duration': 59,
12a1b225
A
6388 'comment_count': int,
6389 'channel_follower_count': int
8bdd16b4 6390 },
6391 'params': {
6392 'noplaylist': True,
6393 'skip_download': True,
6394 },
39e7107d 6395 }, {
8bdd16b4 6396 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 6397 'only_matching': True,
cdc628a4
PH
6398 }]
6399
8bdd16b4 6400 def _real_extract(self, url):
5ad28e7f 6401 mobj = self._match_valid_url(url)
29f7c58a 6402 video_id = mobj.group('id')
6403 playlist_id = mobj.group('playlist_id')
8bdd16b4 6404 return self.url_result(
29f7c58a 6405 update_url_query('https://www.youtube.com/watch', {
6406 'v': video_id,
6407 'list': playlist_id,
6408 'feature': 'youtu.be',
6409 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 6410
6411
b6ce9bb0 6412class YoutubeLivestreamEmbedIE(InfoExtractor):
6413 IE_DESC = 'YouTube livestream embeds'
6414 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6415 _TESTS = [{
6416 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6417 'only_matching': True,
6418 }]
6419
6420 def _real_extract(self, url):
6421 channel_id = self._match_id(url)
6422 return self.url_result(
6423 f'https://www.youtube.com/channel/{channel_id}/live',
6424 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6425
6426
8bdd16b4 6427class YoutubeYtUserIE(InfoExtractor):
96565c7e 6428 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 6429 IE_NAME = 'youtube:user'
8bdd16b4 6430 _VALID_URL = r'ytuser:(?P<id>.+)'
6431 _TESTS = [{
6432 'url': 'ytuser:phihag',
6433 'only_matching': True,
6434 }]
6435
6436 def _real_extract(self, url):
6437 user_id = self._match_id(url)
08270da5 6438 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
9558dcec 6439
b05654f0 6440
3d3dddc9 6441class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 6442 IE_NAME = 'youtube:favorites'
96565c7e 6443 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 6444 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6445 _LOGIN_REQUIRED = True
6446 _TESTS = [{
6447 'url': ':ytfav',
6448 'only_matching': True,
6449 }, {
6450 'url': ':ytfavorites',
6451 'only_matching': True,
6452 }]
6453
6454 def _real_extract(self, url):
6455 return self.url_result(
6456 'https://www.youtube.com/playlist?list=LL',
6457 ie=YoutubeTabIE.ie_key())
6458
6459
ca5300c7 6460class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6461 IE_NAME = 'youtube:notif'
6462 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6463 _VALID_URL = r':ytnotif(?:ication)?s?'
6464 _LOGIN_REQUIRED = True
6465 _TESTS = [{
6466 'url': ':ytnotif',
6467 'only_matching': True,
6468 }, {
6469 'url': ':ytnotifications',
6470 'only_matching': True,
6471 }]
6472
6473 def _extract_notification_menu(self, response, continuation_list):
6474 notification_list = traverse_obj(
6475 response,
6476 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6477 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6478 expected_type=list) or []
6479 continuation_list[0] = None
6480 for item in notification_list:
6481 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6482 if entry:
6483 yield entry
6484 continuation = item.get('continuationItemRenderer')
6485 if continuation:
6486 continuation_list[0] = continuation
6487
6488 def _extract_notification_renderer(self, notification):
6489 video_id = traverse_obj(
6490 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6491 url = f'https://www.youtube.com/watch?v={video_id}'
6492 channel_id = None
6493 if not video_id:
6494 browse_ep = traverse_obj(
6495 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6496 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6497 post_id = self._search_regex(
6498 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6499 'post id', default=None)
6500 if not channel_id or not post_id:
6501 return
6502 # The direct /post url redirects to this in the browser
6503 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6504
6505 channel = traverse_obj(
6506 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6507 expected_type=str)
c7a7baaa 6508 notification_title = self._get_text(notification, 'shortMessage')
6509 if notification_title:
6510 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6511 # TODO: handle recommended videos
ca5300c7 6512 title = self._search_regex(
c7a7baaa 6513 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 6514 'video title', default=None)
5225df50 6515 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6516 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6517 else None)
ca5300c7 6518 return {
6519 '_type': 'url',
6520 'url': url,
6521 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6522 'video_id': video_id,
6523 'title': title,
6524 'channel_id': channel_id,
6525 'channel': channel,
6526 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5225df50 6527 'timestamp': timestamp,
ca5300c7 6528 }
6529
6530 def _notification_menu_entries(self, ytcfg):
6531 continuation_list = [None]
6532 response = None
6533 for page in itertools.count(1):
6534 ctoken = traverse_obj(
6535 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6536 response = self._extract_response(
6537 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6538 ep='notification/get_notification_menu', check_get_keys='actions',
6539 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6540 yield from self._extract_notification_menu(response, continuation_list)
6541 if not continuation_list[0]:
6542 break
6543
6544 def _real_extract(self, url):
6545 display_id = 'notifications'
6546 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6547 self._report_playlist_authcheck(ytcfg)
6548 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6549
6550
a6213a49 6551class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6552 IE_DESC = 'YouTube search'
78caa52a 6553 IE_NAME = 'youtube:search'
b05654f0 6554 _SEARCH_KEY = 'ytsearch'
a61fd4cf 6555 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 6556 _TESTS = [{
6557 'url': 'ytsearch5:youtube-dl test video',
6558 'playlist_count': 5,
6559 'info_dict': {
6560 'id': 'youtube-dl test video',
6561 'title': 'youtube-dl test video',
6562 }
6563 }]
b05654f0 6564
a61fd4cf 6565
5f7cb91a 6566class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 6567 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 6568 _SEARCH_KEY = 'ytsearchdate'
a6213a49 6569 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 6570 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 6571 _TESTS = [{
6572 'url': 'ytsearchdate5:youtube-dl test video',
6573 'playlist_count': 5,
6574 'info_dict': {
6575 'id': 'youtube-dl test video',
6576 'title': 'youtube-dl test video',
6577 }
6578 }]
75dff0ee 6579
c9ae7b95 6580
a6213a49 6581class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 6582 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 6583 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 6584 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 6585 _TESTS = [{
6586 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6587 'playlist_mincount': 5,
6588 'info_dict': {
11f9be09 6589 'id': 'youtube-dl test video',
3462ffa8 6590 'title': 'youtube-dl test video',
6591 }
a61fd4cf 6592 }, {
6593 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6594 'playlist_mincount': 5,
6595 'info_dict': {
6596 'id': 'python',
6597 'title': 'python',
6598 }
ad210f4f 6599 }, {
6600 'url': 'https://www.youtube.com/results?search_query=%23cats',
6601 'playlist_mincount': 1,
6602 'info_dict': {
6603 'id': '#cats',
6604 'title': '#cats',
12a1b225
A
6605 # The test suite does not have support for nested playlists
6606 # 'entries': [{
6607 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6608 # 'title': '#cats',
6609 # }],
ad210f4f 6610 },
c7335551
M
6611 }, {
6612 # Channel results
6613 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6614 'info_dict': {
6615 'id': 'kurzgesagt',
6616 'title': 'kurzgesagt',
6617 },
6618 'playlist': [{
6619 'info_dict': {
6620 '_type': 'url',
6621 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6622 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6623 'ie_key': 'YoutubeTab',
6624 'channel': 'Kurzgesagt – In a Nutshell',
6625 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6626 'title': 'Kurzgesagt – In a Nutshell',
6627 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6628 'playlist_count': int, # XXX: should have a way of saying > 1
6629 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6630 'thumbnails': list
6631 }
6632 }],
6633 'params': {'extract_flat': True, 'playlist_items': '1'},
6634 'playlist_mincount': 1,
3462ffa8 6635 }, {
6636 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6637 'only_matching': True,
6638 }]
6639
6640 def _real_extract(self, url):
4dfbf869 6641 qs = parse_qs(url)
386e1dd9 6642 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 6643 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 6644
6645
16aa9ea4 6646class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 6647 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 6648 IE_NAME = 'youtube:music:search_url'
6649 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6650 _TESTS = [{
6651 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6652 'playlist_count': 16,
6653 'info_dict': {
6654 'id': 'royalty free music',
6655 'title': 'royalty free music',
6656 }
6657 }, {
6658 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6659 'playlist_mincount': 30,
6660 'info_dict': {
6661 'id': 'royalty free music - songs',
6662 'title': 'royalty free music - songs',
6663 },
6664 'params': {'extract_flat': 'in_playlist'}
6665 }, {
6666 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6667 'playlist_mincount': 30,
6668 'info_dict': {
6669 'id': 'royalty free music - community playlists',
6670 'title': 'royalty free music - community playlists',
6671 },
6672 'params': {'extract_flat': 'in_playlist'}
6673 }]
6674
6675 _SECTIONS = {
6676 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6677 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6678 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6679 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6680 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6681 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6682 }
6683
6684 def _real_extract(self, url):
6685 qs = parse_qs(url)
6686 query = (qs.get('search_query') or qs.get('q'))[0]
6687 params = qs.get('sp', (None,))[0]
6688 if params:
6689 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6690 else:
ac668111 6691 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 6692 params = self._SECTIONS.get(section)
6693 if not params:
6694 section = None
6695 title = join_nonempty(query, section, delim=' - ')
af5c1c55 6696 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 6697
6698
182bda88 6699class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6700 """
25f14e9f 6701 Base class for feed extractors
82d02080 6702 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6703 """
b2e8bc1b 6704 _LOGIN_REQUIRED = True
82d02080 6705 _FEED_NAME = 'feeds'
a25bca9f 6706
6707 def _real_initialize(self):
6708 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6709
82d02080 6710 @classproperty
d7ae0639 6711 def IE_NAME(self):
82d02080 6712 return f'youtube:{self._FEED_NAME}'
04cc9617 6713
3853309f 6714 def _real_extract(self, url):
3d3dddc9 6715 return self.url_result(
182bda88 6716 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6717
6718
ef2f3c7f 6719class YoutubeWatchLaterIE(InfoExtractor):
6720 IE_NAME = 'youtube:watchlater'
96565c7e 6721 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6722 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6723 _TESTS = [{
8bdd16b4 6724 'url': ':ytwatchlater',
bc7a9cd8
S
6725 'only_matching': True,
6726 }]
25f14e9f
S
6727
6728 def _real_extract(self, url):
ef2f3c7f 6729 return self.url_result(
6730 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6731
6732
25f14e9f 6733class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6734 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6735 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6736 _FEED_NAME = 'recommended'
45db527f 6737 _LOGIN_REQUIRED = False
3d3dddc9 6738 _TESTS = [{
6739 'url': ':ytrec',
6740 'only_matching': True,
6741 }, {
6742 'url': ':ytrecommended',
6743 'only_matching': True,
6744 }, {
6745 'url': 'https://youtube.com',
6746 'only_matching': True,
6747 }]
1ed5b5c9 6748
1ed5b5c9 6749
25f14e9f 6750class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6751 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6752 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6753 _FEED_NAME = 'subscriptions'
3d3dddc9 6754 _TESTS = [{
6755 'url': ':ytsubs',
6756 'only_matching': True,
6757 }, {
6758 'url': ':ytsubscriptions',
6759 'only_matching': True,
6760 }]
1ed5b5c9 6761
1ed5b5c9 6762
25f14e9f 6763class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6764 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6765 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6766 _FEED_NAME = 'history'
3d3dddc9 6767 _TESTS = [{
6768 'url': ':ythistory',
6769 'only_matching': True,
6770 }]
1ed5b5c9
JMF
6771
6772
6e634cbe 6773class YoutubeStoriesIE(InfoExtractor):
6774 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6775 IE_NAME = 'youtube:stories'
6776 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6777 _TESTS = [{
6778 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6779 'only_matching': True,
6780 }]
6781
6782 def _real_extract(self, url):
6783 playlist_id = f'RLTD{self._match_id(url)}'
6784 return self.url_result(
50ac0e54 6785 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6786 ie=YoutubeTabIE, video_id=playlist_id)
6787
6788
80eb0bd9 6789class YoutubeShortsAudioPivotIE(InfoExtractor):
1dd18a88 6790 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
80eb0bd9 6791 IE_NAME = 'youtube:shorts:pivot:audio'
1dd18a88 6792 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
80eb0bd9 6793 _TESTS = [{
1dd18a88 6794 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
80eb0bd9 6795 'only_matching': True,
6796 }]
6797
6798 @staticmethod
6799 def _generate_audio_pivot_params(video_id):
6800 """
6801 Generates sfv_audio_pivot browse params for this video id
6802 """
6803 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6804 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6805
6806 def _real_extract(self, url):
6807 video_id = self._match_id(url)
6808 return self.url_result(
6809 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6810 ie=YoutubeTabIE)
6811
6812
15870e90
PH
6813class YoutubeTruncatedURLIE(InfoExtractor):
6814 IE_NAME = 'youtube:truncated_url'
6815 IE_DESC = False # Do not list
975d35db 6816 _VALID_URL = r'''(?x)
b95aab84
PH
6817 (?:https?://)?
6818 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6819 (?:watch\?(?:
c4808c60 6820 feature=[a-z_]+|
b95aab84
PH
6821 annotation_id=annotation_[^&]+|
6822 x-yt-cl=[0-9]+|
c1708b89 6823 hl=[^&]*|
287be8c6 6824 t=[0-9]+
b95aab84
PH
6825 )?
6826 |
6827 attribution_link\?a=[^&]+
6828 )
6829 $
975d35db 6830 '''
15870e90 6831
c4808c60 6832 _TESTS = [{
2d3d2997 6833 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6834 'only_matching': True,
dc2fc736 6835 }, {
2d3d2997 6836 'url': 'https://www.youtube.com/watch?',
dc2fc736 6837 'only_matching': True,
b95aab84
PH
6838 }, {
6839 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6840 'only_matching': True,
6841 }, {
6842 'url': 'https://www.youtube.com/watch?feature=foo',
6843 'only_matching': True,
c1708b89
PH
6844 }, {
6845 'url': 'https://www.youtube.com/watch?hl=en-GB',
6846 'only_matching': True,
287be8c6
PH
6847 }, {
6848 'url': 'https://www.youtube.com/watch?t=2372',
6849 'only_matching': True,
c4808c60
PH
6850 }]
6851
15870e90
PH
6852 def _real_extract(self, url):
6853 raise ExtractorError(
78caa52a
PH
6854 'Did you forget to quote the URL? Remember that & is a meta '
6855 'character in most shells, so you want to put the URL in quotes, '
3867038a 6856 'like youtube-dl '
2d3d2997 6857 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6858 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6859 expected=True)
772fd5cc
PH
6860
6861
471d0367 6862class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6863 IE_NAME = 'youtube:clip'
471d0367 6864 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6865 _TESTS = [{
6866 # FIXME: Other metadata should be extracted from the clip, not from the base video
6867 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6868 'info_dict': {
6869 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6870 'ext': 'mp4',
6871 'section_start': 29.0,
6872 'section_end': 39.7,
6873 'duration': 10.7,
12a1b225
A
6874 'age_limit': 0,
6875 'availability': 'public',
6876 'categories': ['Gaming'],
6877 'channel': 'Scott The Woz',
6878 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6879 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6880 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6881 'like_count': int,
6882 'playable_in_embed': True,
6883 'tags': 'count:17',
6884 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6885 'title': 'Mobile Games on Console - Scott The Woz',
6886 'upload_date': '20210920',
6887 'uploader': 'Scott The Woz',
6888 'uploader_id': 'scottthewoz',
6889 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6890 'view_count': int,
6891 'live_status': 'not_live',
6892 'channel_follower_count': int
471d0367 6893 }
6894 }]
3cd786db 6895
6896 def _real_extract(self, url):
471d0367 6897 clip_id = self._match_id(url)
6898 _, data = self._extract_webpage(url, clip_id)
6899
6900 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6901 if not video_id:
6902 raise ExtractorError('Unable to find video ID')
6903
6904 clip_data = traverse_obj(data, (
6905 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6906 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6907 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6908 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6909
6910 return {
6911 '_type': 'url_transparent',
6912 'url': f'https://www.youtube.com/watch?v={video_id}',
6913 'ie_key': YoutubeIE.ie_key(),
6914 'id': clip_id,
6915 'section_start': int(clip_data['startTimeMs']) / 1000,
6916 'section_end': int(clip_data['endTimeMs']) / 1000,
6917 }
3cd786db 6918
6919
772fd5cc
PH
6920class YoutubeTruncatedIDIE(InfoExtractor):
6921 IE_NAME = 'youtube:truncated_id'
6922 IE_DESC = False # Do not list
b95aab84 6923 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6924
6925 _TESTS = [{
6926 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6927 'only_matching': True,
6928 }]
6929
6930 def _real_extract(self, url):
6931 video_id = self._match_id(url)
6932 raise ExtractorError(
86e5f3ed 6933 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6934 expected=True)