]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cbs] Add support for ParamountPlus (#138)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c5e8d7af 28 clean_html,
c5e8d7af 29 ExtractorError,
b60419c5 30 format_field,
2d30521a 31 float_or_none,
dd27fd17 32 int_or_none,
94278f72 33 mimetype2ext,
6310acf5 34 parse_codecs,
7c80519c 35 parse_duration,
dca3ff4a 36 qualities,
3995d37d 37 remove_start,
cf7e015f 38 smuggle_url,
dbdaaa23 39 str_or_none,
c93d53f5 40 str_to_int,
556dbe7f 41 try_get,
c5e8d7af
PH
42 unescapeHTML,
43 unified_strdate,
cf7e015f 44 unsmuggle_url,
8bdd16b4 45 update_url_query,
21c340b8 46 url_or_none,
6e6bc8da 47 urlencode_postdata,
8bdd16b4 48 urljoin,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
56
57 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
58 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
59 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 60
3462ffa8 61 _RESERVED_NAMES = (
9ba5705a 62 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
29f7c58a 63 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
64 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 65
b2e8bc1b
JMF
66 _NETRC_MACHINE = 'youtube'
67 # If True it will raise an error if no login info is provided
68 _LOGIN_REQUIRED = False
69
70d5c17b 70 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 71
25f14e9f
S
72 def _ids_to_results(self, ids):
73 return [
74 self.url_result(vid_id, 'Youtube', video_id=vid_id)
75 for vid_id in ids]
76
b2e8bc1b 77 def _login(self):
83317f69 78 """
79 Attempt to log in to YouTube.
80 True is returned if successful or skipped.
81 False is returned if login failed.
82
83 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
84 """
68217024 85 username, password = self._get_login_info()
b2e8bc1b
JMF
86 # No authentication to be performed
87 if username is None:
70d35d16 88 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 89 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 90 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
91 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 92 return True
b2e8bc1b 93
7cc3570e
PH
94 login_page = self._download_webpage(
95 self._LOGIN_URL, None,
69ea8ca4
PH
96 note='Downloading login page',
97 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
98 if login_page is False:
99 return
b2e8bc1b 100
1212e997 101 login_form = self._hidden_inputs(login_page)
c5e8d7af 102
e00eb564
S
103 def req(url, f_req, note, errnote):
104 data = login_form.copy()
105 data.update({
106 'pstMsg': 1,
107 'checkConnection': 'youtube',
108 'checkedDomains': 'youtube',
109 'hl': 'en',
110 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 111 'f.req': json.dumps(f_req),
e00eb564
S
112 'flowName': 'GlifWebSignIn',
113 'flowEntry': 'ServiceLogin',
baf67a60
S
114 # TODO: reverse actual botguard identifier generation algo
115 'bgRequest': '["identifier",""]',
041bc3ad 116 })
e00eb564
S
117 return self._download_json(
118 url, None, note=note, errnote=errnote,
119 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 fatal=False,
121 data=urlencode_postdata(data), headers={
122 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
123 'Google-Accounts-XSRF': 1,
124 })
125
3995d37d
S
126 def warn(message):
127 self._downloader.report_warning(message)
128
129 lookup_req = [
130 username,
131 None, [], None, 'US', None, None, 2, False, True,
132 [
133 None, None,
134 [2, 1, None, 1,
135 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 None, [], 4],
137 1, [None, None, []], None, None, None, True
138 ],
139 username,
140 ]
141
e00eb564 142 lookup_results = req(
3995d37d 143 self._LOOKUP_URL, lookup_req,
e00eb564
S
144 'Looking up account info', 'Unable to look up account info')
145
146 if lookup_results is False:
147 return False
041bc3ad 148
3995d37d
S
149 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 if not user_hash:
151 warn('Unable to extract user hash')
152 return False
153
154 challenge_req = [
155 user_hash,
156 None, 1, None, [1, None, None, None, [password, None, True]],
157 [
158 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ]]
83317f69 161
3995d37d
S
162 challenge_results = req(
163 self._CHALLENGE_URL, challenge_req,
164 'Logging in', 'Unable to log in')
83317f69 165
3995d37d 166 if challenge_results is False:
e00eb564 167 return
83317f69 168
3995d37d
S
169 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 if login_res:
171 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 warn(
173 'Unable to login: %s' % 'Invalid password'
174 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
175 return False
176
177 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 if not res:
179 warn('Unable to extract result entry')
180 return False
181
9a6628aa
S
182 login_challenge = try_get(res, lambda x: x[0][0], list)
183 if login_challenge:
184 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
185 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
186 # SEND_SUCCESS - TFA code has been successfully sent to phone
187 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 188 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
189 if status == 'QUOTA_EXCEEDED':
190 warn('Exceeded the limit of TFA codes, try later')
191 return False
192
193 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 if not tl:
195 warn('Unable to extract TL')
196 return False
197
198 tfa_code = self._get_tfa_info('2-step verification code')
199
200 if not tfa_code:
201 warn(
202 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
203 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
204 return False
205
206 tfa_code = remove_start(tfa_code, 'G-')
207
208 tfa_req = [
209 user_hash, None, 2, None,
210 [
211 9, None, None, None, None, None, None, None,
212 [None, tfa_code, True, 2]
213 ]]
214
215 tfa_results = req(
216 self._TFA_URL.format(tl), tfa_req,
217 'Submitting TFA code', 'Unable to submit TFA code')
218
219 if tfa_results is False:
220 return False
221
222 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 if tfa_res:
224 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 warn(
226 'Unable to finish TFA: %s' % 'Invalid TFA code'
227 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
228 return False
229
230 check_cookie_url = try_get(
231 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
232 else:
233 CHALLENGES = {
234 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
235 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
236 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 }
238 challenge = CHALLENGES.get(
239 challenge_str,
240 '%s returned error %s.' % (self.IE_NAME, challenge_str))
241 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
242 return False
3995d37d
S
243 else:
244 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245
246 if not check_cookie_url:
247 warn('Unable to extract CheckCookie URL')
248 return False
e00eb564
S
249
250 check_cookie_results = self._download_webpage(
3995d37d
S
251 check_cookie_url, None, 'Checking cookie', fatal=False)
252
253 if check_cookie_results is False:
254 return False
e00eb564 255
3995d37d
S
256 if 'https://myaccount.google.com/' not in check_cookie_results:
257 warn('Unable to log in')
b2e8bc1b 258 return False
e00eb564 259
b2e8bc1b
JMF
260 return True
261
30226342 262 def _download_webpage_handle(self, *args, **kwargs):
c1148516 263 query = kwargs.get('query', {}).copy()
c1148516 264 kwargs['query'] = query
30226342 265 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
266 *args, **compat_kwargs(kwargs))
267
b2e8bc1b
JMF
268 def _real_initialize(self):
269 if self._downloader is None:
270 return
b2e8bc1b
JMF
271 if not self._login():
272 return
c5e8d7af 273
8bdd16b4 274 _DEFAULT_API_DATA = {
275 'context': {
276 'client': {
277 'clientName': 'WEB',
a5c56234 278 'clientVersion': '2.20210301.08.00',
8bdd16b4 279 }
280 },
281 }
8377574c 282
a0566bbf 283 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 284 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
285 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 286
a5c56234
M
287 def _generate_sapisidhash_header(self):
288 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
289 if sapisid_cookie is None:
290 return
291 time_now = round(time.time())
292 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
293 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
294
295 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
296 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 297 data = self._DEFAULT_API_DATA.copy()
298 data.update(query)
a5c56234
M
299 headers = headers or {}
300 headers.update({'content-type': 'application/json'})
301 auth = self._generate_sapisidhash_header()
302 if auth is not None:
303 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
9833e7a0 304
545cc85d 305 return self._download_json(
a5c56234
M
306 'https://www.youtube.com/youtubei/v1/%s' % ep,
307 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
308 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 309 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 310
8bdd16b4 311 def _extract_yt_initial_data(self, video_id, webpage):
312 return self._parse_json(
313 self._search_regex(
29f7c58a 314 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 315 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 316 video_id)
0c148415 317
29f7c58a 318 def _extract_ytcfg(self, video_id, webpage):
319 return self._parse_json(
320 self._search_regex(
321 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
322 default='{}'), video_id, fatal=False)
323
30a074c2 324 def _extract_video(self, renderer):
325 video_id = renderer.get('videoId')
326 title = try_get(
327 renderer,
328 (lambda x: x['title']['runs'][0]['text'],
329 lambda x: x['title']['simpleText']), compat_str)
330 description = try_get(
331 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
332 compat_str)
333 duration = parse_duration(try_get(
334 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
335 view_count_text = try_get(
336 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
337 view_count = str_to_int(self._search_regex(
338 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
339 'view count', default=None))
340 uploader = try_get(
bc2ca1bb 341 renderer,
342 (lambda x: x['ownerText']['runs'][0]['text'],
343 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 344 return {
345 '_type': 'url_transparent',
346 'ie_key': YoutubeIE.ie_key(),
347 'id': video_id,
348 'url': video_id,
349 'title': title,
350 'description': description,
351 'duration': duration,
352 'view_count': view_count,
353 'uploader': uploader,
354 }
355
0c148415 356
360e1ca5 357class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 358 IE_DESC = 'YouTube.com'
bc2ca1bb 359 _INVIDIOUS_SITES = (
360 # invidious-redirect websites
361 r'(?:www\.)?redirect\.invidious\.io',
362 r'(?:(?:www|dev)\.)?invidio\.us',
363 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
364 r'(?:www\.)?invidious\.pussthecat\.org',
365 r'(?:www\.)?invidious\.048596\.xyz',
366 r'(?:www\.)?invidious\.zee\.li',
367 r'(?:www\.)?vid\.puffyan\.us',
368 r'(?:(?:www|au)\.)?ytprivate\.com',
369 r'(?:www\.)?invidious\.namazso\.eu',
370 r'(?:www\.)?invidious\.ethibox\.fr',
371 r'(?:www\.)?inv\.skyn3t\.in',
372 r'(?:www\.)?invidious\.himiko\.cloud',
373 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
374 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
375 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
376 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
377 # youtube-dl invidious instances list
378 r'(?:(?:www|no)\.)?invidiou\.sh',
379 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
380 r'(?:www\.)?invidious\.kabi\.tk',
381 r'(?:www\.)?invidious\.13ad\.de',
382 r'(?:www\.)?invidious\.mastodon\.host',
383 r'(?:www\.)?invidious\.zapashcanon\.fr',
384 r'(?:www\.)?invidious\.kavin\.rocks',
385 r'(?:www\.)?invidious\.tube',
386 r'(?:www\.)?invidiou\.site',
387 r'(?:www\.)?invidious\.site',
388 r'(?:www\.)?invidious\.xyz',
389 r'(?:www\.)?invidious\.nixnet\.xyz',
390 r'(?:www\.)?invidious\.drycat\.fr',
391 r'(?:www\.)?tube\.poal\.co',
392 r'(?:www\.)?tube\.connect\.cafe',
393 r'(?:www\.)?vid\.wxzm\.sx',
394 r'(?:www\.)?vid\.mint\.lgbt',
395 r'(?:www\.)?yewtu\.be',
396 r'(?:www\.)?yt\.elukerio\.org',
397 r'(?:www\.)?yt\.lelux\.fi',
398 r'(?:www\.)?invidious\.ggc-project\.de',
399 r'(?:www\.)?yt\.maisputain\.ovh',
400 r'(?:www\.)?invidious\.toot\.koeln',
401 r'(?:www\.)?invidious\.fdn\.fr',
402 r'(?:www\.)?watch\.nettohikari\.com',
403 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
404 r'(?:www\.)?qklhadlycap4cnod\.onion',
405 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
406 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
407 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
408 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
409 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
410 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
411 )
cb7dfeea 412 _VALID_URL = r"""(?x)^
c5e8d7af 413 (
edb53e2d 414 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 415 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
416 (?:www\.)?deturl\.com/www\.youtube\.com|
417 (?:www\.)?pwnyoutube\.com|
418 (?:www\.)?hooktube\.com|
419 (?:www\.)?yourepeat\.com|
420 tube\.majestyc\.net|
421 %(invidious)s|
422 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
423 (?:.*?\#/)? # handle anchor (#/) redirect urls
424 (?: # the various things that can precede the ID:
ac7553d0 425 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 426 |(?: # or the v= param in all its forms
f7000f3a 427 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 428 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 429 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
430 v=
431 )
f4b05232 432 ))
cbaed4bb
S
433 |(?:
434 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
435 vid\.plus| # or vid.plus/xxxx
436 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 437 %(invidious)s
cbaed4bb 438 )/
edb53e2d 439 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 440 )
c5e8d7af 441 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 442 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
443 (?!.*?\blist=
444 (?:
445 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
446 WL # WL are handled by the watch later IE
447 )
448 )
c5e8d7af 449 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 450 $""" % {
451 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
452 'invidious': '|'.join(_INVIDIOUS_SITES),
453 }
e40c758c 454 _PLAYER_INFO_RE = (
cc2db878 455 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
456 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 457 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 458 )
2c62dc26 459 _formats = {
c2d3cb4c 460 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
461 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
462 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
463 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
464 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
465 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
466 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
467 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 468 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 469 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
470 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
471 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
472 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
473 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
474 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 475 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 476 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
477 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 478
479
480 # 3D videos
c2d3cb4c 481 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
482 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
483 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
484 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 485 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
486 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
487 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 488
96fb5605 489 # Apple HTTP Live Streaming
11f12195 490 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 491 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
492 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
493 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
494 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
495 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 496 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
497 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
498
499 # DASH mp4 video
d23028a8
S
500 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
501 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
502 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
503 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
504 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 505 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
506 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
507 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
508 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
509 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
510 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
511 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 512
f6f1fc92 513 # Dash mp4 audio
d23028a8
S
514 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
515 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
516 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
517 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
518 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
519 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
520 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
521
522 # Dash webm
d23028a8
S
523 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
524 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
525 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
526 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
527 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
528 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
529 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
530 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
532 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
534 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
536 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
537 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 538 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
539 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
540 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
541 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
542 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
543 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
544 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
545
546 # Dash webm audio
d23028a8
S
547 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
548 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 549
0857baad 550 # Dash webm audio with opus inside
d23028a8
S
551 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
552 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
553 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 554
ce6b9a2d
PH
555 # RTMP (unnamed)
556 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
557
558 # av01 video only formats sometimes served with "unknown" codecs
559 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
560 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
561 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
562 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 563 }
29f7c58a 564 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 565
fd5c4aab
S
566 _GEO_BYPASS = False
567
78caa52a 568 IE_NAME = 'youtube'
2eb88d95
PH
569 _TESTS = [
570 {
2d3d2997 571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
572 'info_dict': {
573 'id': 'BaW_jenozKc',
574 'ext': 'mp4',
3867038a 575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
579 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
580 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 581 'upload_date': '20121002',
3867038a 582 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 583 'categories': ['Science & Technology'],
3867038a 584 'tags': ['youtube-dl'],
556dbe7f 585 'duration': 10,
dbdaaa23 586 'view_count': int,
3e7c1224
PH
587 'like_count': int,
588 'dislike_count': int,
7c80519c 589 'start_time': 1,
297a564b 590 'end_time': 9,
2eb88d95 591 }
0e853ca4 592 },
fccd3771 593 {
4bc3a23e
PH
594 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
595 'note': 'Embed-only video (#1746)',
596 'info_dict': {
597 'id': 'yZIXLfi8CZQ',
598 'ext': 'mp4',
599 'upload_date': '20120608',
600 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
601 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
602 'uploader': 'SET India',
94bfcd23 603 'uploader_id': 'setindia',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 605 'age_limit': 18,
545cc85d 606 },
607 'skip': 'Private video',
fccd3771 608 },
11b56058 609 {
8bdd16b4 610 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
611 'note': 'Use the first video ID in the URL',
612 'info_dict': {
613 'id': 'BaW_jenozKc',
614 'ext': 'mp4',
3867038a 615 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
616 'uploader': 'Philipp Hagemeister',
617 'uploader_id': 'phihag',
ec85ded8 618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 619 'upload_date': '20121002',
3867038a 620 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 621 'categories': ['Science & Technology'],
3867038a 622 'tags': ['youtube-dl'],
556dbe7f 623 'duration': 10,
dbdaaa23 624 'view_count': int,
11b56058
PM
625 'like_count': int,
626 'dislike_count': int,
34a7de29
S
627 },
628 'params': {
629 'skip_download': True,
630 },
11b56058 631 },
dd27fd17 632 {
2d3d2997 633 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
634 'note': '256k DASH audio (format 141) via DASH manifest',
635 'info_dict': {
636 'id': 'a9LDPn-MO4I',
637 'ext': 'm4a',
638 'upload_date': '20121002',
639 'uploader_id': '8KVIDEO',
ec85ded8 640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
641 'description': '',
642 'uploader': '8KVIDEO',
643 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 644 },
4bc3a23e
PH
645 'params': {
646 'youtube_include_dash_manifest': True,
647 'format': '141',
4919603f 648 },
de3c7fe0 649 'skip': 'format 141 not served anymore',
dd27fd17 650 },
8bdd16b4 651 # DASH manifest with encrypted signature
652 {
653 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
654 'info_dict': {
655 'id': 'IB3lcPjvWLA',
656 'ext': 'm4a',
657 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
658 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
659 'duration': 244,
660 'uploader': 'AfrojackVEVO',
661 'uploader_id': 'AfrojackVEVO',
662 'upload_date': '20131011',
cc2db878 663 'abr': 129.495,
8bdd16b4 664 },
665 'params': {
666 'youtube_include_dash_manifest': True,
667 'format': '141/bestaudio[ext=m4a]',
668 },
669 },
aa79ac0c
PH
670 # Controversy video
671 {
672 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
673 'info_dict': {
674 'id': 'T4XJQO3qol8',
675 'ext': 'mp4',
556dbe7f 676 'duration': 219,
aa79ac0c 677 'upload_date': '20100909',
4fe54c12 678 'uploader': 'Amazing Atheist',
aa79ac0c 679 'uploader_id': 'TheAmazingAtheist',
ec85ded8 680 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 681 'title': 'Burning Everyone\'s Koran',
545cc85d 682 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 683 }
c522adb1 684 },
dd2d55f1 685 # Normal age-gate video (embed allowed)
c522adb1 686 {
2d3d2997 687 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
688 'info_dict': {
689 'id': 'HtVdAasjOgU',
690 'ext': 'mp4',
691 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 692 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 693 'duration': 142,
c522adb1
JMF
694 'uploader': 'The Witcher',
695 'uploader_id': 'WitcherGame',
ec85ded8 696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 697 'upload_date': '20140605',
34952f09 698 'age_limit': 18,
c522adb1
JMF
699 },
700 },
8bdd16b4 701 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
702 # YouTube Red ad is not captured for creator
703 {
704 'url': '__2ABJjxzNo',
705 'info_dict': {
706 'id': '__2ABJjxzNo',
707 'ext': 'mp4',
708 'duration': 266,
709 'upload_date': '20100430',
710 'uploader_id': 'deadmau5',
711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 712 'creator': 'deadmau5',
713 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 714 'uploader': 'deadmau5',
715 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 716 'alt_title': 'Some Chords',
8bdd16b4 717 },
718 'expected_warnings': [
719 'DASH manifest missing',
720 ]
721 },
067aa17e 722 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
723 {
724 'url': 'lqQg6PlCWgI',
725 'info_dict': {
726 'id': 'lqQg6PlCWgI',
727 'ext': 'mp4',
556dbe7f 728 'duration': 6085,
90227264 729 'upload_date': '20150827',
cbe2bd91 730 'uploader_id': 'olympic',
ec85ded8 731 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 732 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 733 'uploader': 'Olympic',
cbe2bd91
PH
734 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
735 },
736 'params': {
737 'skip_download': 'requires avconv',
e52a40ab 738 }
cbe2bd91 739 },
6271f1ca
PH
740 # Non-square pixels
741 {
742 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
743 'info_dict': {
744 'id': '_b-2C3KPAM0',
745 'ext': 'mp4',
746 'stretched_ratio': 16 / 9.,
556dbe7f 747 'duration': 85,
6271f1ca
PH
748 'upload_date': '20110310',
749 'uploader_id': 'AllenMeow',
ec85ded8 750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 751 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 752 'uploader': '孫ᄋᄅ',
6271f1ca
PH
753 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
754 },
06b491eb
S
755 },
756 # url_encoded_fmt_stream_map is empty string
757 {
758 'url': 'qEJwOuvDf7I',
759 'info_dict': {
760 'id': 'qEJwOuvDf7I',
f57b7835 761 'ext': 'webm',
06b491eb
S
762 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
763 'description': '',
764 'upload_date': '20150404',
765 'uploader_id': 'spbelect',
766 'uploader': 'Наблюдатели Петербурга',
767 },
768 'params': {
769 'skip_download': 'requires avconv',
e323cf3f
S
770 },
771 'skip': 'This live event has ended.',
06b491eb 772 },
067aa17e 773 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
774 {
775 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
776 'info_dict': {
777 'id': 'FIl7x6_3R5Y',
eb6793ba 778 'ext': 'webm',
da77d856
S
779 'title': 'md5:7b81415841e02ecd4313668cde88737a',
780 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 781 'duration': 220,
da77d856
S
782 'upload_date': '20150625',
783 'uploader_id': 'dorappi2000',
ec85ded8 784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 785 'uploader': 'dorappi2000',
eb6793ba 786 'formats': 'mincount:31',
da77d856 787 },
eb6793ba 788 'skip': 'not actual anymore',
2ee8f5d8 789 },
8a1a26ce
YCH
790 # DASH manifest with segment_list
791 {
792 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
793 'md5': '8ce563a1d667b599d21064e982ab9e31',
794 'info_dict': {
795 'id': 'CsmdDsKjzN8',
796 'ext': 'mp4',
17ee98e1 797 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
798 'uploader': 'Airtek',
799 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
800 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
801 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
802 },
803 'params': {
804 'youtube_include_dash_manifest': True,
805 'format': '135', # bestvideo
be49068d
S
806 },
807 'skip': 'This live event has ended.',
2ee8f5d8 808 },
cf7e015f
S
809 {
810 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 811 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 812 'info_dict': {
545cc85d 813 'id': 'jvGDaLqkpTg',
814 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
815 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
816 },
817 'playlist': [{
818 'info_dict': {
545cc85d 819 'id': 'jvGDaLqkpTg',
cf7e015f 820 'ext': 'mp4',
545cc85d 821 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
822 'description': 'md5:e03b909557865076822aa169218d6a5d',
823 'duration': 10643,
824 'upload_date': '20161111',
825 'uploader': 'Team PGP',
826 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
828 },
829 }, {
830 'info_dict': {
545cc85d 831 'id': '3AKt1R1aDnw',
cf7e015f 832 'ext': 'mp4',
545cc85d 833 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
834 'description': 'md5:e03b909557865076822aa169218d6a5d',
835 'duration': 10991,
836 'upload_date': '20161111',
837 'uploader': 'Team PGP',
838 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
839 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
840 },
841 }, {
842 'info_dict': {
545cc85d 843 'id': 'RtAMM00gpVc',
cf7e015f 844 'ext': 'mp4',
545cc85d 845 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
846 'description': 'md5:e03b909557865076822aa169218d6a5d',
847 'duration': 10995,
848 'upload_date': '20161111',
849 'uploader': 'Team PGP',
850 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
852 },
853 }, {
854 'info_dict': {
545cc85d 855 'id': '6N2fdlP3C5U',
cf7e015f 856 'ext': 'mp4',
545cc85d 857 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
858 'description': 'md5:e03b909557865076822aa169218d6a5d',
859 'duration': 10990,
860 'upload_date': '20161111',
861 'uploader': 'Team PGP',
862 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
864 },
865 }],
866 'params': {
867 'skip_download': True,
868 },
cbaed4bb 869 },
f9f49d87 870 {
067aa17e 871 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
872 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
873 'info_dict': {
874 'id': 'gVfLd0zydlo',
875 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
876 },
877 'playlist_count': 2,
be49068d 878 'skip': 'Not multifeed anymore',
f9f49d87 879 },
cbaed4bb 880 {
2d3d2997 881 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 882 'only_matching': True,
0e49d9a6 883 },
6d4fc66b 884 {
2d3d2997 885 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
886 'only_matching': True,
887 },
0e49d9a6 888 {
067aa17e 889 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 890 # Also tests cut-off URL expansion in video description (see
067aa17e
S
891 # https://github.com/ytdl-org/youtube-dl/issues/1892,
892 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
893 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
894 'info_dict': {
895 'id': 'lsguqyKfVQg',
896 'ext': 'mp4',
897 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 898 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 899 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 900 'duration': 133,
0e49d9a6
LL
901 'upload_date': '20151119',
902 'uploader_id': 'IronSoulElf',
ec85ded8 903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 904 'uploader': 'IronSoulElf',
eb6793ba
S
905 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
906 'track': 'Dark Walk - Position Music',
907 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 908 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
909 },
910 'params': {
911 'skip_download': True,
912 },
913 },
61f92af1 914 {
067aa17e 915 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
916 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
917 'only_matching': True,
918 },
313dfc45
LL
919 {
920 # Video with yt:stretch=17:0
921 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
922 'info_dict': {
923 'id': 'Q39EVAstoRM',
924 'ext': 'mp4',
925 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
926 'description': 'md5:ee18a25c350637c8faff806845bddee9',
927 'upload_date': '20151107',
928 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
929 'uploader': 'CH GAMER DROID',
930 },
931 'params': {
932 'skip_download': True,
933 },
be49068d 934 'skip': 'This video does not exist.',
313dfc45 935 },
7caf9830
S
936 {
937 # Video licensed under Creative Commons
938 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
939 'info_dict': {
940 'id': 'M4gD1WSo5mA',
941 'ext': 'mp4',
942 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
943 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 944 'duration': 721,
7caf9830
S
945 'upload_date': '20150127',
946 'uploader_id': 'BerkmanCenter',
ec85ded8 947 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 948 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
949 'license': 'Creative Commons Attribution license (reuse allowed)',
950 },
951 'params': {
952 'skip_download': True,
953 },
954 },
fd050249
S
955 {
956 # Channel-like uploader_url
957 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
958 'info_dict': {
959 'id': 'eQcmzGIKrzg',
960 'ext': 'mp4',
961 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 962 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 963 'duration': 4060,
fd050249 964 'upload_date': '20151119',
eb6793ba 965 'uploader': 'Bernie Sanders',
fd050249 966 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
968 'license': 'Creative Commons Attribution license (reuse allowed)',
969 },
970 'params': {
971 'skip_download': True,
972 },
973 },
040ac686
S
974 {
975 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
976 'only_matching': True,
7f29cf54
S
977 },
978 {
067aa17e 979 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
980 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
981 'only_matching': True,
6496ccb4
S
982 },
983 {
984 # Rental video preview
985 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
986 'info_dict': {
987 'id': 'uGpuVWrhIzE',
988 'ext': 'mp4',
989 'title': 'Piku - Trailer',
990 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
991 'upload_date': '20150811',
992 'uploader': 'FlixMatrix',
993 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
995 'license': 'Standard YouTube License',
996 },
997 'params': {
998 'skip_download': True,
999 },
eb6793ba 1000 'skip': 'This video is not available.',
022a5d66 1001 },
12afdc2a
S
1002 {
1003 # YouTube Red video with episode data
1004 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1005 'info_dict': {
1006 'id': 'iqKdEhx-dD4',
1007 'ext': 'mp4',
1008 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1009 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1010 'duration': 2085,
12afdc2a
S
1011 'upload_date': '20170118',
1012 'uploader': 'Vsauce',
1013 'uploader_id': 'Vsauce',
1014 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1015 'series': 'Mind Field',
1016 'season_number': 1,
1017 'episode_number': 1,
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
1022 'expected_warnings': [
1023 'Skipping DASH manifest',
1024 ],
1025 },
c7121fa7
S
1026 {
1027 # The following content has been identified by the YouTube community
1028 # as inappropriate or offensive to some audiences.
1029 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1030 'info_dict': {
1031 'id': '6SJNVb0GnPI',
1032 'ext': 'mp4',
1033 'title': 'Race Differences in Intelligence',
1034 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1035 'duration': 965,
1036 'upload_date': '20140124',
1037 'uploader': 'New Century Foundation',
1038 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1040 },
1041 'params': {
1042 'skip_download': True,
1043 },
545cc85d 1044 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1045 },
022a5d66
S
1046 {
1047 # itag 212
1048 'url': '1t24XAntNCY',
1049 'only_matching': True,
fd5c4aab
S
1050 },
1051 {
1052 # geo restricted to JP
1053 'url': 'sJL6WA-aGkQ',
1054 'only_matching': True,
1055 },
cd5a74a2
S
1056 {
1057 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1058 'only_matching': True,
1059 },
bc2ca1bb 1060 {
1061 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1062 'only_matching': True,
1063 },
1064 {
1065 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1066 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1067 'only_matching': True,
1068 },
825cd268
RA
1069 {
1070 # DRM protected
1071 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1072 'only_matching': True,
4fe54c12
S
1073 },
1074 {
1075 # Video with unsupported adaptive stream type formats
1076 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1077 'info_dict': {
1078 'id': 'Z4Vy8R84T1U',
1079 'ext': 'mp4',
1080 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1081 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1082 'duration': 433,
1083 'upload_date': '20130923',
1084 'uploader': 'Amelia Putri Harwita',
1085 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1087 'formats': 'maxcount:10',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 'youtube_include_dash_manifest': False,
1092 },
5429d6a9 1093 'skip': 'not actual anymore',
5caabd3c 1094 },
1095 {
822b9d9c 1096 # Youtube Music Auto-generated description
5caabd3c 1097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1098 'info_dict': {
1099 'id': 'MgNrAu2pzNs',
1100 'ext': 'mp4',
1101 'title': 'Voyeur Girl',
1102 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1103 'upload_date': '20190312',
5429d6a9
S
1104 'uploader': 'Stephen - Topic',
1105 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1106 'artist': 'Stephen',
1107 'track': 'Voyeur Girl',
1108 'album': 'it\'s too much love to know my dear',
1109 'release_date': '20190313',
1110 'release_year': 2019,
1111 },
1112 'params': {
1113 'skip_download': True,
1114 },
1115 },
66b48727
RA
1116 {
1117 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1118 'only_matching': True,
1119 },
011e75e6
S
1120 {
1121 # invalid -> valid video id redirection
1122 'url': 'DJztXj2GPfl',
1123 'info_dict': {
1124 'id': 'DJztXj2GPfk',
1125 'ext': 'mp4',
1126 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1127 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1128 'upload_date': '20090125',
1129 'uploader': 'Prochorowka',
1130 'uploader_id': 'Prochorowka',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1132 'artist': 'Panjabi MC',
1133 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1134 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1135 },
1136 'params': {
1137 'skip_download': True,
1138 },
545cc85d 1139 'skip': 'Video unavailable',
ea74e00b
DP
1140 },
1141 {
1142 # empty description results in an empty string
1143 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1144 'info_dict': {
1145 'id': 'x41yOUIvK2k',
1146 'ext': 'mp4',
1147 'title': 'IMG 3456',
1148 'description': '',
1149 'upload_date': '20170613',
1150 'uploader_id': 'ElevageOrVert',
1151 'uploader': 'ElevageOrVert',
1152 },
1153 'params': {
1154 'skip_download': True,
1155 },
1156 },
a0566bbf 1157 {
29f7c58a 1158 # with '};' inside yt initial data (see [1])
1159 # see [2] for an example with '};' inside ytInitialPlayerResponse
1160 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1161 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1162 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1163 'info_dict': {
1164 'id': 'CHqg6qOn4no',
1165 'ext': 'mp4',
1166 'title': 'Part 77 Sort a list of simple types in c#',
1167 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1168 'upload_date': '20130831',
1169 'uploader_id': 'kudvenkat',
1170 'uploader': 'kudvenkat',
1171 },
1172 'params': {
1173 'skip_download': True,
1174 },
1175 },
29f7c58a 1176 {
1177 # another example of '};' in ytInitialData
1178 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1179 'only_matching': True,
1180 },
1181 {
1182 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1183 'only_matching': True,
1184 },
545cc85d 1185 {
cc2db878 1186 # https://github.com/ytdl-org/youtube-dl/pull/28094
1187 'url': 'OtqTfy26tG0',
1188 'info_dict': {
1189 'id': 'OtqTfy26tG0',
1190 'ext': 'mp4',
1191 'title': 'Burn Out',
1192 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1193 'upload_date': '20141120',
1194 'uploader': 'The Cinematic Orchestra - Topic',
1195 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1196 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1197 'artist': 'The Cinematic Orchestra',
1198 'track': 'Burn Out',
1199 'album': 'Every Day',
1200 'release_data': None,
1201 'release_year': None,
1202 },
1203 'params': {
1204 'skip_download': True,
1205 },
545cc85d 1206 },
bc2ca1bb 1207 {
1208 # controversial video, only works with bpctr when authenticated with cookies
1209 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1210 'only_matching': True,
1211 },
2eb88d95
PH
1212 ]
1213
e0df6211
PH
1214 def __init__(self, *args, **kwargs):
1215 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1216 self._code_cache = {}
83799698 1217 self._player_cache = {}
e0df6211 1218
60064c53
PH
1219 def _signature_cache_id(self, example_sig):
1220 """ Return a string representation of a signature """
78caa52a 1221 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1222
e40c758c
S
1223 @classmethod
1224 def _extract_player_info(cls, player_url):
1225 for player_re in cls._PLAYER_INFO_RE:
1226 id_m = re.search(player_re, player_url)
1227 if id_m:
1228 break
1229 else:
c081b35c 1230 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1231 return id_m.group('id')
e40c758c
S
1232
1233 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1234 player_id = self._extract_player_info(player_url)
e0df6211 1235
c4417ddb 1236 # Read from filesystem cache
545cc85d 1237 func_id = 'js_%s_%s' % (
1238 player_id, self._signature_cache_id(example_sig))
c4417ddb 1239 assert os.path.basename(func_id) == func_id
a0e07d31 1240
69ea8ca4 1241 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1242 if cache_spec is not None:
78caa52a 1243 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1244
545cc85d 1245 if player_id not in self._code_cache:
1246 self._code_cache[player_id] = self._download_webpage(
e0df6211 1247 player_url, video_id,
545cc85d 1248 note='Downloading player ' + player_id,
69ea8ca4 1249 errnote='Download of %s failed' % player_url)
545cc85d 1250 code = self._code_cache[player_id]
1251 res = self._parse_sig_js(code)
e0df6211 1252
785521bf
PH
1253 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1254 cache_res = res(test_string)
1255 cache_spec = [ord(c) for c in cache_res]
83799698 1256
69ea8ca4 1257 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1258 return res
1259
60064c53 1260 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1261 def gen_sig_code(idxs):
1262 def _genslice(start, end, step):
78caa52a 1263 starts = '' if start == 0 else str(start)
8bcc8756 1264 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1265 steps = '' if step == 1 else (':%d' % step)
78caa52a 1266 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1267
1268 step = None
7af808a5
PH
1269 # Quelch pyflakes warnings - start will be set when step is set
1270 start = '(Never used)'
edf3e38e
PH
1271 for i, prev in zip(idxs[1:], idxs[:-1]):
1272 if step is not None:
1273 if i - prev == step:
1274 continue
1275 yield _genslice(start, prev, step)
1276 step = None
1277 continue
1278 if i - prev in [-1, 1]:
1279 step = i - prev
1280 start = prev
1281 continue
1282 else:
78caa52a 1283 yield 's[%d]' % prev
edf3e38e 1284 if step is None:
78caa52a 1285 yield 's[%d]' % i
edf3e38e
PH
1286 else:
1287 yield _genslice(start, i, step)
1288
78caa52a 1289 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1290 cache_res = func(test_string)
edf3e38e 1291 cache_spec = [ord(c) for c in cache_res]
78caa52a 1292 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1293 signature_id_tuple = '(%s)' % (
1294 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1295 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1296 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1297 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1298
e0df6211
PH
1299 def _parse_sig_js(self, jscode):
1300 funcname = self._search_regex(
abefc03f
S
1301 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1302 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1303 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1304 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1305 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1306 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1307 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1308 # Obsolete patterns
1309 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1310 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1311 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1312 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1313 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1314 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1315 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1316 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1317 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1318
1319 jsi = JSInterpreter(jscode)
1320 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1321 return lambda s: initial_function([s])
1322
545cc85d 1323 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1324 """Turn the encrypted s field into a working signature"""
6b37f0be 1325
c8bf86d5 1326 if player_url is None:
69ea8ca4 1327 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1328
69ea8ca4 1329 if player_url.startswith('//'):
78caa52a 1330 player_url = 'https:' + player_url
3c90cc8b
S
1331 elif not re.match(r'https?://', player_url):
1332 player_url = compat_urlparse.urljoin(
1333 'https://www.youtube.com', player_url)
c8bf86d5 1334 try:
62af3a0e 1335 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1336 if player_id not in self._player_cache:
1337 func = self._extract_signature_function(
60064c53 1338 video_id, player_url, s
c8bf86d5
PH
1339 )
1340 self._player_cache[player_id] = func
1341 func = self._player_cache[player_id]
1342 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1343 self._print_sig_code(func, s)
c8bf86d5
PH
1344 return func(s)
1345 except Exception as e:
1346 tb = traceback.format_exc()
1347 raise ExtractorError(
78caa52a 1348 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1349
545cc85d 1350 def _mark_watched(self, video_id, player_response):
21c340b8
S
1351 playback_url = url_or_none(try_get(
1352 player_response,
545cc85d 1353 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1354 if not playback_url:
1355 return
1356 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1357 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1358
1359 # cpn generation algorithm is reverse engineered from base.js.
1360 # In fact it works even with dummy cpn.
1361 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1362 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1363
1364 qs.update({
1365 'ver': ['2'],
1366 'cpn': [cpn],
1367 })
1368 playback_url = compat_urlparse.urlunparse(
15707c7e 1369 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1370
1371 self._download_webpage(
1372 playback_url, video_id, 'Marking watched',
1373 'Unable to mark watched', fatal=False)
1374
66c9fa36
S
1375 @staticmethod
1376 def _extract_urls(webpage):
1377 # Embedded YouTube player
1378 entries = [
1379 unescapeHTML(mobj.group('url'))
1380 for mobj in re.finditer(r'''(?x)
1381 (?:
1382 <iframe[^>]+?src=|
1383 data-video-url=|
1384 <embed[^>]+?src=|
1385 embedSWF\(?:\s*|
1386 <object[^>]+data=|
1387 new\s+SWFObject\(
1388 )
1389 (["\'])
1390 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1391 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1392 \1''', webpage)]
1393
1394 # lazyYT YouTube embed
1395 entries.extend(list(map(
1396 unescapeHTML,
1397 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1398
1399 # Wordpress "YouTube Video Importer" plugin
1400 matches = re.findall(r'''(?x)<div[^>]+
1401 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1402 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1403 entries.extend(m[-1] for m in matches)
1404
1405 return entries
1406
1407 @staticmethod
1408 def _extract_url(webpage):
1409 urls = YoutubeIE._extract_urls(webpage)
1410 return urls[0] if urls else None
1411
97665381
PH
1412 @classmethod
1413 def extract_id(cls, url):
1414 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1415 if mobj is None:
69ea8ca4 1416 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1417 video_id = mobj.group(2)
1418 return video_id
1419
545cc85d 1420 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1421 chapters_list = try_get(
8bdd16b4 1422 data,
84213ea8
S
1423 lambda x: x['playerOverlays']
1424 ['playerOverlayRenderer']
1425 ['decoratedPlayerBarRenderer']
1426 ['decoratedPlayerBarRenderer']
1427 ['playerBar']
1428 ['chapteredPlayerBarRenderer']
1429 ['chapters'],
1430 list)
1431 if not chapters_list:
1432 return
1433
1434 def chapter_time(chapter):
1435 return float_or_none(
1436 try_get(
1437 chapter,
1438 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1439 int),
1440 scale=1000)
1441 chapters = []
1442 for next_num, chapter in enumerate(chapters_list, start=1):
1443 start_time = chapter_time(chapter)
1444 if start_time is None:
1445 continue
1446 end_time = (chapter_time(chapters_list[next_num])
1447 if next_num < len(chapters_list) else duration)
1448 if end_time is None:
1449 continue
1450 title = try_get(
1451 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1452 compat_str)
1453 chapters.append({
1454 'start_time': start_time,
1455 'end_time': end_time,
1456 'title': title,
1457 })
1458 return chapters
1459
545cc85d 1460 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1461 return self._parse_json(self._search_regex(
1462 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1463 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1464
c5e8d7af 1465 def _real_extract(self, url):
cf7e015f 1466 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1467 video_id = self._match_id(url)
1468 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1469 webpage_url = base_url + 'watch?v=' + video_id
1470 webpage = self._download_webpage(
1471 webpage_url + '&has_verified=1&bpctr=9999999999',
1472 video_id, fatal=False)
545cc85d 1473
1474 player_response = None
1475 if webpage:
1476 player_response = self._extract_yt_initial_variable(
1477 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1478 video_id, 'initial player response')
1479 if not player_response:
1480 player_response = self._call_api(
1481 'player', {'videoId': video_id}, video_id)
1482
1483 playability_status = player_response.get('playabilityStatus') or {}
1484 if playability_status.get('reason') == 'Sign in to confirm your age':
1485 pr = self._parse_json(try_get(compat_parse_qs(
1486 self._download_webpage(
1487 base_url + 'get_video_info', video_id,
1488 'Refetching age-gated info webpage',
1489 'unable to download video info webpage', query={
1490 'video_id': video_id,
7c60c33e 1491 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1492 }, fatal=False)),
1493 lambda x: x['player_response'][0],
1494 compat_str) or '{}', video_id)
1495 if pr:
1496 player_response = pr
1497
1498 trailer_video_id = try_get(
1499 playability_status,
1500 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1501 compat_str)
1502 if trailer_video_id:
1503 return self.url_result(
1504 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1505
545cc85d 1506 def get_text(x):
1507 if not x:
c2d125d9 1508 return
545cc85d 1509 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1510
545cc85d 1511 search_meta = (
1512 lambda x: self._html_search_meta(x, webpage, default=None)) \
1513 if webpage else lambda x: None
dbdaaa23 1514
545cc85d 1515 video_details = player_response.get('videoDetails') or {}
37357d21 1516 microformat = try_get(
545cc85d 1517 player_response,
1518 lambda x: x['microformat']['playerMicroformatRenderer'],
1519 dict) or {}
1520 video_title = video_details.get('title') \
1521 or get_text(microformat.get('title')) \
1522 or search_meta(['og:title', 'twitter:title', 'title'])
1523 video_description = video_details.get('shortDescription')
cf7e015f 1524
8fe10494 1525 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1526 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1527 multifeed_metadata_list = try_get(
1528 player_response,
1529 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1530 compat_str)
8fe10494
S
1531 if multifeed_metadata_list:
1532 entries = []
1533 feed_ids = []
1534 for feed in multifeed_metadata_list.split(','):
1535 # Unquote should take place before split on comma (,) since textual
1536 # fields may contain comma as well (see
067aa17e 1537 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1538 feed_data = compat_parse_qs(
1539 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1540
1541 def feed_entry(name):
545cc85d 1542 return try_get(
1543 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1544
1545 feed_id = feed_entry('id')
1546 if not feed_id:
1547 continue
1548 feed_title = feed_entry('title')
1549 title = video_title
1550 if feed_title:
1551 title += ' (%s)' % feed_title
8fe10494
S
1552 entries.append({
1553 '_type': 'url_transparent',
1554 'ie_key': 'Youtube',
1555 'url': smuggle_url(
545cc85d 1556 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1557 {'force_singlefeed': True}),
6b09401b 1558 'title': title,
8fe10494 1559 })
6b09401b 1560 feed_ids.append(feed_id)
8fe10494
S
1561 self.to_screen(
1562 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1563 % (', '.join(feed_ids), video_id))
545cc85d 1564 return self.playlist_result(
1565 entries, video_id, video_title, video_description)
8fe10494
S
1566 else:
1567 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1568
545cc85d 1569 formats = []
1570 itags = []
cc2db878 1571 itag_qualities = {}
545cc85d 1572 player_url = None
dca3ff4a 1573 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1574 streaming_data = player_response.get('streamingData') or {}
1575 streaming_formats = streaming_data.get('formats') or []
1576 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1577 for fmt in streaming_formats:
1578 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1579 continue
321bf820 1580
cc2db878 1581 itag = str_or_none(fmt.get('itag'))
1582 quality = fmt.get('quality')
1583 if itag and quality:
1584 itag_qualities[itag] = quality
1585 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1586 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1587 # number of fragment that would subsequently requested with (`&sq=N`)
1588 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1589 continue
1590
545cc85d 1591 fmt_url = fmt.get('url')
1592 if not fmt_url:
1593 sc = compat_parse_qs(fmt.get('signatureCipher'))
1594 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1595 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1596 if not (sc and fmt_url and encrypted_sig):
1597 continue
1598 if not player_url:
1599 if not webpage:
1600 continue
1601 player_url = self._search_regex(
1602 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1603 webpage, 'player URL', fatal=False)
1604 if not player_url:
201e9eaa 1605 continue
545cc85d 1606 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1607 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1608 fmt_url += '&' + sp + '=' + signature
1609
545cc85d 1610 if itag:
1611 itags.append(itag)
cc2db878 1612 tbr = float_or_none(
1613 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1614 dct = {
1615 'asr': int_or_none(fmt.get('audioSampleRate')),
1616 'filesize': int_or_none(fmt.get('contentLength')),
1617 'format_id': itag,
1618 'format_note': fmt.get('qualityLabel') or quality,
1619 'fps': int_or_none(fmt.get('fps')),
1620 'height': int_or_none(fmt.get('height')),
dca3ff4a 1621 'quality': q(quality),
cc2db878 1622 'tbr': tbr,
545cc85d 1623 'url': fmt_url,
1624 'width': fmt.get('width'),
1625 }
1626 mimetype = fmt.get('mimeType')
1627 if mimetype:
1628 mobj = re.match(
1629 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1630 if mobj:
1631 dct['ext'] = mimetype2ext(mobj.group(1))
1632 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1633 no_audio = dct.get('acodec') == 'none'
1634 no_video = dct.get('vcodec') == 'none'
1635 if no_audio:
1636 dct['vbr'] = tbr
1637 if no_video:
1638 dct['abr'] = tbr
1639 if no_audio or no_video:
545cc85d 1640 dct['downloader_options'] = {
1641 # Youtube throttles chunks >~10M
1642 'http_chunk_size': 10485760,
bf1317d2 1643 }
7c60c33e 1644 if dct.get('ext'):
1645 dct['container'] = dct['ext'] + '_dash'
545cc85d 1646 formats.append(dct)
1647
1648 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1649 if hls_manifest_url:
1650 for f in self._extract_m3u8_formats(
1651 hls_manifest_url, video_id, 'mp4', fatal=False):
1652 itag = self._search_regex(
1653 r'/itag/(\d+)', f['url'], 'itag', default=None)
1654 if itag:
1655 f['format_id'] = itag
1656 formats.append(f)
1657
1658 if self._downloader.params.get('youtube_include_dash_manifest'):
1659 dash_manifest_url = streaming_data.get('dashManifestUrl')
1660 if dash_manifest_url:
545cc85d 1661 for f in self._extract_mpd_formats(
1662 dash_manifest_url, video_id, fatal=False):
cc2db878 1663 itag = f['format_id']
1664 if itag in itags:
1665 continue
dca3ff4a 1666 if itag in itag_qualities:
1667 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1668 # but kept to maintain feature parity (and code similarity) with youtube-dl
1669 # Remove if this causes any issues with sorting in future
1670 f['quality'] = q(itag_qualities[itag])
545cc85d 1671 filesize = int_or_none(self._search_regex(
1672 r'/clen/(\d+)', f.get('fragment_base_url')
1673 or f['url'], 'file size', default=None))
1674 if filesize:
1675 f['filesize'] = filesize
cc2db878 1676 formats.append(f)
bf1317d2 1677
545cc85d 1678 if not formats:
63ad4d43 1679 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1680 raise ExtractorError(
1681 'This video is DRM protected.', expected=True)
1682 pemr = try_get(
1683 playability_status,
1684 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1685 dict) or {}
1686 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1687 subreason = pemr.get('subreason')
1688 if subreason:
1689 subreason = clean_html(get_text(subreason))
1690 if subreason == 'The uploader has not made this video available in your country.':
1691 countries = microformat.get('availableCountries')
1692 if not countries:
1693 regions_allowed = search_meta('regionsAllowed')
1694 countries = regions_allowed.split(',') if regions_allowed else None
1695 self.raise_geo_restricted(
1696 subreason, countries)
1697 reason += '\n' + subreason
1698 if reason:
1699 raise ExtractorError(reason, expected=True)
bf1317d2 1700
545cc85d 1701 self._sort_formats(formats)
bf1317d2 1702
545cc85d 1703 keywords = video_details.get('keywords') or []
1704 if not keywords and webpage:
1705 keywords = [
1706 unescapeHTML(m.group('content'))
1707 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1708 for keyword in keywords:
1709 if keyword.startswith('yt:stretch='):
1710 w, h = keyword.split('=')[1].split(':')
1711 w, h = int(w), int(h)
1712 if w > 0 and h > 0:
1713 ratio = w / h
1714 for f in formats:
1715 if f.get('vcodec') != 'none':
1716 f['stretched_ratio'] = ratio
6449cd80 1717
545cc85d 1718 thumbnails = []
1719 for container in (video_details, microformat):
1720 for thumbnail in (try_get(
1721 container,
1722 lambda x: x['thumbnail']['thumbnails'], list) or []):
1723 thumbnail_url = thumbnail.get('url')
1724 if not thumbnail_url:
bf1317d2 1725 continue
545cc85d 1726 thumbnails.append({
1727 'height': int_or_none(thumbnail.get('height')),
1728 'url': thumbnail_url,
1729 'width': int_or_none(thumbnail.get('width')),
1730 })
1731 if thumbnails:
1732 break
a6211d23 1733 else:
545cc85d 1734 thumbnail = search_meta(['og:image', 'twitter:image'])
1735 if thumbnail:
1736 thumbnails = [{'url': thumbnail}]
1737
1738 category = microformat.get('category') or search_meta('genre')
1739 channel_id = video_details.get('channelId') \
1740 or microformat.get('externalChannelId') \
1741 or search_meta('channelId')
1742 duration = int_or_none(
1743 video_details.get('lengthSeconds')
1744 or microformat.get('lengthSeconds')) \
1745 or parse_duration(search_meta('duration'))
1746 is_live = video_details.get('isLive')
1747 owner_profile_url = microformat.get('ownerProfileUrl')
1748
1749 info = {
1750 'id': video_id,
1751 'title': self._live_title(video_title) if is_live else video_title,
1752 'formats': formats,
1753 'thumbnails': thumbnails,
1754 'description': video_description,
1755 'upload_date': unified_strdate(
1756 microformat.get('uploadDate')
1757 or search_meta('uploadDate')),
1758 'uploader': video_details['author'],
1759 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1760 'uploader_url': owner_profile_url,
1761 'channel_id': channel_id,
1762 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1763 'duration': duration,
1764 'view_count': int_or_none(
1765 video_details.get('viewCount')
1766 or microformat.get('viewCount')
1767 or search_meta('interactionCount')),
1768 'average_rating': float_or_none(video_details.get('averageRating')),
1769 'age_limit': 18 if (
1770 microformat.get('isFamilySafe') is False
1771 or search_meta('isFamilyFriendly') == 'false'
1772 or search_meta('og:restrictions:age') == '18+') else 0,
1773 'webpage_url': webpage_url,
1774 'categories': [category] if category else None,
1775 'tags': keywords,
1776 'is_live': is_live,
1777 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 1778 'was_live': video_details.get('isLiveContent')
545cc85d 1779 }
b477fc13 1780
545cc85d 1781 pctr = try_get(
1782 player_response,
1783 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1784 subtitles = {}
1785 if pctr:
1786 def process_language(container, base_url, lang_code, query):
1787 lang_subs = []
1788 for fmt in self._SUBTITLE_FORMATS:
1789 query.update({
1790 'fmt': fmt,
1791 })
1792 lang_subs.append({
1793 'ext': fmt,
1794 'url': update_url_query(base_url, query),
1795 })
1796 container[lang_code] = lang_subs
7e72694b 1797
545cc85d 1798 for caption_track in (pctr.get('captionTracks') or []):
1799 base_url = caption_track.get('baseUrl')
1800 if not base_url:
1801 continue
1802 if caption_track.get('kind') != 'asr':
1803 lang_code = caption_track.get('languageCode')
1804 if not lang_code:
1805 continue
1806 process_language(
1807 subtitles, base_url, lang_code, {})
1808 continue
1809 automatic_captions = {}
1810 for translation_language in (pctr.get('translationLanguages') or []):
1811 translation_language_code = translation_language.get('languageCode')
1812 if not translation_language_code:
1813 continue
1814 process_language(
1815 automatic_captions, base_url, translation_language_code,
1816 {'tlang': translation_language_code})
1817 info['automatic_captions'] = automatic_captions
1818 info['subtitles'] = subtitles
7e72694b 1819
545cc85d 1820 parsed_url = compat_urllib_parse_urlparse(url)
1821 for component in [parsed_url.fragment, parsed_url.query]:
1822 query = compat_parse_qs(component)
1823 for k, v in query.items():
1824 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1825 d_k += '_time'
1826 if d_k not in info and k in s_ks:
1827 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1828
1829 # Youtube Music Auto-generated description
822b9d9c 1830 if video_description:
38d70284 1831 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1832 if mobj:
822b9d9c
RA
1833 release_year = mobj.group('release_year')
1834 release_date = mobj.group('release_date')
1835 if release_date:
1836 release_date = release_date.replace('-', '')
1837 if not release_year:
545cc85d 1838 release_year = release_date[:4]
1839 info.update({
1840 'album': mobj.group('album'.strip()),
1841 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1842 'track': mobj.group('track').strip(),
1843 'release_date': release_date,
cc2db878 1844 'release_year': int_or_none(release_year),
545cc85d 1845 })
7e72694b 1846
545cc85d 1847 initial_data = None
1848 if webpage:
1849 initial_data = self._extract_yt_initial_variable(
1850 webpage, self._YT_INITIAL_DATA_RE, video_id,
1851 'yt initial data')
1852 if not initial_data:
1853 initial_data = self._call_api(
1854 'next', {'videoId': video_id}, video_id, fatal=False)
1855
1856 if not is_live:
1857 try:
1858 # This will error if there is no livechat
1859 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1860 info['subtitles']['live_chat'] = [{
1861 'video_id': video_id,
1862 'ext': 'json',
1863 'protocol': 'youtube_live_chat_replay',
1864 }]
1865 except (KeyError, IndexError, TypeError):
1866 pass
1867
1868 if initial_data:
1869 chapters = self._extract_chapters_from_json(
1870 initial_data, video_id, duration)
1871 if not chapters:
1872 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1873 contents = try_get(
1874 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1875 list)
1876 if not contents:
1877 continue
1878
1879 def chapter_time(mmlir):
1880 return parse_duration(
1881 get_text(mmlir.get('timeDescription')))
1882
1883 chapters = []
1884 for next_num, content in enumerate(contents, start=1):
1885 mmlir = content.get('macroMarkersListItemRenderer') or {}
1886 start_time = chapter_time(mmlir)
1887 end_time = chapter_time(try_get(
1888 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1889 if next_num < len(contents) else duration
1890 if start_time is None or end_time is None:
1891 continue
1892 chapters.append({
1893 'start_time': start_time,
1894 'end_time': end_time,
1895 'title': get_text(mmlir.get('title')),
1896 })
1897 if chapters:
1898 break
1899 if chapters:
1900 info['chapters'] = chapters
1901
1902 contents = try_get(
1903 initial_data,
1904 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1905 list) or []
1906 for content in contents:
1907 vpir = content.get('videoPrimaryInfoRenderer')
1908 if vpir:
1909 stl = vpir.get('superTitleLink')
1910 if stl:
1911 stl = get_text(stl)
1912 if try_get(
1913 vpir,
1914 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1915 info['location'] = stl
1916 else:
1917 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1918 if mobj:
1919 info.update({
1920 'series': mobj.group(1),
1921 'season_number': int(mobj.group(2)),
1922 'episode_number': int(mobj.group(3)),
1923 })
1924 for tlb in (try_get(
1925 vpir,
1926 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1927 list) or []):
1928 tbr = tlb.get('toggleButtonRenderer') or {}
1929 for getter, regex in [(
1930 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1931 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1932 lambda x: x['accessibility'],
1933 lambda x: x['accessibilityData']['accessibilityData'],
1934 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1935 label = (try_get(tbr, getter, dict) or {}).get('label')
1936 if label:
1937 mobj = re.match(regex, label)
1938 if mobj:
1939 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1940 break
1941 sbr_tooltip = try_get(
1942 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1943 if sbr_tooltip:
1944 like_count, dislike_count = sbr_tooltip.split(' / ')
1945 info.update({
1946 'like_count': str_to_int(like_count),
1947 'dislike_count': str_to_int(dislike_count),
1948 })
1949 vsir = content.get('videoSecondaryInfoRenderer')
1950 if vsir:
1951 info['channel'] = get_text(try_get(
1952 vsir,
1953 lambda x: x['owner']['videoOwnerRenderer']['title'],
1954 compat_str))
1955 rows = try_get(
1956 vsir,
1957 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1958 list) or []
1959 multiple_songs = False
1960 for row in rows:
1961 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1962 multiple_songs = True
1963 break
1964 for row in rows:
1965 mrr = row.get('metadataRowRenderer') or {}
1966 mrr_title = mrr.get('title')
1967 if not mrr_title:
1968 continue
1969 mrr_title = get_text(mrr['title'])
1970 mrr_contents_text = get_text(mrr['contents'][0])
1971 if mrr_title == 'License':
1972 info['license'] = mrr_contents_text
1973 elif not multiple_songs:
1974 if mrr_title == 'Album':
1975 info['album'] = mrr_contents_text
1976 elif mrr_title == 'Artist':
1977 info['artist'] = mrr_contents_text
1978 elif mrr_title == 'Song':
1979 info['track'] = mrr_contents_text
1980
1981 fallbacks = {
1982 'channel': 'uploader',
1983 'channel_id': 'uploader_id',
1984 'channel_url': 'uploader_url',
1985 }
1986 for to, frm in fallbacks.items():
1987 if not info.get(to):
1988 info[to] = info.get(frm)
1989
1990 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1991 v = info.get(s_k)
1992 if v:
1993 info[d_k] = v
b84071c0 1994
06167fbb 1995 # get xsrf for annotations or comments
1996 get_annotations = self._downloader.params.get('writeannotations', False)
1997 get_comments = self._downloader.params.get('getcomments', False)
1998 if get_annotations or get_comments:
29f7c58a 1999 xsrf_token = None
545cc85d 2000 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2001 if ytcfg:
2002 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2003 if not xsrf_token:
2004 xsrf_token = self._search_regex(
2005 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2006 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2007
2008 # annotations
06167fbb 2009 if get_annotations:
64b6a4e9
RA
2010 invideo_url = try_get(
2011 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2012 if xsrf_token and invideo_url:
29f7c58a 2013 xsrf_field_name = None
2014 if ytcfg:
2015 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2016 if not xsrf_field_name:
2017 xsrf_field_name = self._search_regex(
2018 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2019 webpage, 'xsrf field name',
29f7c58a 2020 group='xsrf_field_name', default='session_token')
8a784c74 2021 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2022 self._proto_relative_url(invideo_url),
2023 video_id, note='Downloading annotations',
2024 errnote='Unable to download video annotations', fatal=False,
2025 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2026
06167fbb 2027 # Get comments
2028 # TODO: Refactor and move to seperate function
277d6ff5 2029 def extract_comments():
06167fbb 2030 expected_video_comment_count = 0
2031 video_comments = []
277d6ff5 2032 comment_xsrf = xsrf_token
06167fbb 2033
2034 def find_value(html, key, num_chars=2, separator='"'):
2035 pos_begin = html.find(key) + len(key) + num_chars
2036 pos_end = html.find(separator, pos_begin)
2037 return html[pos_begin: pos_end]
2038
2039 def search_dict(partial, key):
2040 if isinstance(partial, dict):
2041 for k, v in partial.items():
2042 if k == key:
2043 yield v
2044 else:
2045 for o in search_dict(v, key):
2046 yield o
2047 elif isinstance(partial, list):
2048 for i in partial:
2049 for o in search_dict(i, key):
2050 yield o
2051
8a784c74 2052 continuations = []
2053 if initial_data:
2054 try:
2055 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2056 continuations = [ncd['continuation']]
2057 # Handle videos where comments have been disabled entirely
2058 except StopIteration:
2059 pass
06167fbb 2060
8d0ea5f9 2061 def get_continuation(continuation, session_token, replies=False):
06167fbb 2062 query = {
66c935fb 2063 'pbj': 1,
2064 'ctoken': continuation,
06167fbb 2065 }
2066 if replies:
2067 query['action_get_comment_replies'] = 1
2068 else:
2069 query['action_get_comments'] = 1
2070
2071 while True:
2072 content, handle = self._download_webpage_handle(
2073 'https://www.youtube.com/comment_service_ajax',
2074 video_id,
2075 note=False,
2076 expected_status=[413],
2077 data=urlencode_postdata({
2078 'session_token': session_token
2079 }),
2080 query=query,
2081 headers={
2082 'Accept': '*/*',
2083 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2084 'X-YouTube-Client-Name': '1',
2085 'X-YouTube-Client-Version': '2.20201202.06.01'
2086 }
2087 )
2088
2089 response_code = handle.getcode()
2090 if (response_code == 200):
2091 return self._parse_json(content, video_id)
8d0ea5f9 2092 if (response_code == 413):
06167fbb 2093 return None
2094 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2095
2096 first_continuation = True
885d36d4 2097 chain_msg = ''
2098 self.to_screen('Downloading comments')
06167fbb 2099 while continuations:
885d36d4 2100 continuation = continuations.pop()
277d6ff5 2101 comment_response = get_continuation(continuation, comment_xsrf)
06167fbb 2102 if not comment_response:
2103 continue
2104 if list(search_dict(comment_response, 'externalErrorMessage')):
2105 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2106
8d0ea5f9
B
2107 if 'continuationContents' not in comment_response['response']:
2108 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2109 continue
2110 # not sure if this actually helps
2111 if 'xsrf_token' in comment_response:
277d6ff5 2112 comment_xsrf = comment_response['xsrf_token']
8d0ea5f9 2113
06167fbb 2114 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2115 if first_continuation:
2116 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2117 first_continuation = False
2118 if 'contents' not in item_section:
2119 # continuation returned no comments?
2120 # set an empty array as to not break the for loop
2121 item_section['contents'] = []
2122
2123 for meta_comment in item_section['contents']:
2124 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2125 video_comments.append({
2126 'id': comment['commentId'],
ba7bf12d 2127 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2128 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2129 'author': comment.get('authorText', {}).get('simpleText', ''),
2130 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2131 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2132 'parent': 'root'
2133 })
2134 if 'replies' not in meta_comment['commentThreadRenderer']:
2135 continue
2136
8d0ea5f9
B
2137 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2138 while reply_continuations:
06167fbb 2139 time.sleep(1)
8d0ea5f9 2140 continuation = reply_continuations.pop()
277d6ff5 2141 replies_data = get_continuation(continuation, comment_xsrf, True)
06167fbb 2142 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2143 continue
06167fbb 2144
2145 if self._downloader.params.get('verbose', False):
885d36d4 2146 chain_msg = ' (chain %s)' % comment['commentId']
2147 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2148 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2149 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2150 reply_comment = reply_meta['commentRenderer']
2151 video_comments.append({
2152 'id': reply_comment['commentId'],
2153 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2154 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2155 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2156 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2157 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2158 'parent': comment['commentId']
2159 })
2160 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2161 continue
8d0ea5f9 2162 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2163
885d36d4 2164 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2165 if 'continuations' in item_section:
8d0ea5f9 2166 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2167 time.sleep(1)
2168
885d36d4 2169 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
277d6ff5 2170 return {
545cc85d 2171 'comments': video_comments,
2172 'comment_count': expected_video_comment_count
277d6ff5 2173 }
2174
2175 if get_comments:
2176 info['__post_extractor'] = extract_comments
4ea3be0a 2177
545cc85d 2178 self.mark_watched(video_id, player_response)
d77ab8e2 2179
545cc85d 2180 return info
c5e8d7af 2181
5f6a1245 2182
8bdd16b4 2183class YoutubeTabIE(YoutubeBaseInfoExtractor):
2184 IE_DESC = 'YouTube.com tab'
70d5c17b 2185 _VALID_URL = r'''(?x)
2186 https?://
2187 (?:\w+\.)?
2188 (?:
2189 youtube(?:kids)?\.com|
2190 invidio\.us
2191 )/
2192 (?:
2193 (?:channel|c|user)/|
2194 (?P<not_channel>
9ba5705a 2195 feed/|hashtag/|
70d5c17b 2196 (?:playlist|watch)\?.*?\blist=
2197 )|
29f7c58a 2198 (?!(?:%s)\b) # Direct URLs
70d5c17b 2199 )
2200 (?P<id>[^/?\#&]+)
2201 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2202 IE_NAME = 'youtube:tab'
2203
81127aa5 2204 _TESTS = [{
8bdd16b4 2205 # playlists, multipage
2206 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2207 'playlist_mincount': 94,
2208 'info_dict': {
2209 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2210 'title': 'Игорь Клейнер - Playlists',
2211 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2212 'uploader': 'Игорь Клейнер',
2213 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2214 },
2215 }, {
2216 # playlists, multipage, different order
2217 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2218 'playlist_mincount': 94,
2219 'info_dict': {
2220 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2221 'title': 'Игорь Клейнер - Playlists',
2222 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2223 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2224 'uploader': 'Игорь Клейнер',
8bdd16b4 2225 },
2226 }, {
2227 # playlists, singlepage
2228 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2229 'playlist_mincount': 4,
2230 'info_dict': {
2231 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2232 'title': 'ThirstForScience - Playlists',
2233 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2234 'uploader': 'ThirstForScience',
2235 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2236 }
2237 }, {
2238 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2239 'only_matching': True,
2240 }, {
2241 # basic, single video playlist
0e30a7b9 2242 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2243 'info_dict': {
0e30a7b9 2244 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2245 'uploader': 'Sergey M.',
2246 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2247 'title': 'youtube-dl public playlist',
81127aa5 2248 },
0e30a7b9 2249 'playlist_count': 1,
9291475f 2250 }, {
8bdd16b4 2251 # empty playlist
0e30a7b9 2252 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2253 'info_dict': {
0e30a7b9 2254 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2255 'uploader': 'Sergey M.',
2256 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2257 'title': 'youtube-dl empty playlist',
9291475f
PH
2258 },
2259 'playlist_count': 0,
2260 }, {
8bdd16b4 2261 # Home tab
2262 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2263 'info_dict': {
8bdd16b4 2264 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2265 'title': 'lex will - Home',
2266 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2267 'uploader': 'lex will',
2268 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2269 },
8bdd16b4 2270 'playlist_mincount': 2,
9291475f 2271 }, {
8bdd16b4 2272 # Videos tab
2273 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2274 'info_dict': {
8bdd16b4 2275 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2276 'title': 'lex will - Videos',
2277 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2278 'uploader': 'lex will',
2279 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2280 },
8bdd16b4 2281 'playlist_mincount': 975,
9291475f 2282 }, {
8bdd16b4 2283 # Videos tab, sorted by popular
2284 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2285 'info_dict': {
8bdd16b4 2286 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2287 'title': 'lex will - Videos',
2288 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2289 'uploader': 'lex will',
2290 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2291 },
8bdd16b4 2292 'playlist_mincount': 199,
9291475f 2293 }, {
8bdd16b4 2294 # Playlists tab
2295 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2296 'info_dict': {
8bdd16b4 2297 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2298 'title': 'lex will - Playlists',
2299 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2300 'uploader': 'lex will',
2301 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2302 },
8bdd16b4 2303 'playlist_mincount': 17,
ac7553d0 2304 }, {
8bdd16b4 2305 # Community tab
2306 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2307 'info_dict': {
8bdd16b4 2308 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2309 'title': 'lex will - Community',
2310 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2311 'uploader': 'lex will',
2312 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2313 },
2314 'playlist_mincount': 18,
87dadd45 2315 }, {
8bdd16b4 2316 # Channels tab
2317 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2318 'info_dict': {
8bdd16b4 2319 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2320 'title': 'lex will - Channels',
2321 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2322 'uploader': 'lex will',
2323 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2324 },
deaec5af 2325 'playlist_mincount': 12,
6b08cdf6 2326 }, {
a0566bbf 2327 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2328 'only_matching': True,
2329 }, {
a0566bbf 2330 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2331 'only_matching': True,
2332 }, {
a0566bbf 2333 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2334 'only_matching': True,
2335 }, {
2336 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2337 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2338 'info_dict': {
2339 'title': '29C3: Not my department',
2340 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2341 'uploader': 'Christiaan008',
2342 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2343 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2344 },
2345 'playlist_count': 96,
2346 }, {
2347 'note': 'Large playlist',
2348 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2349 'info_dict': {
8bdd16b4 2350 'title': 'Uploads from Cauchemar',
2351 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2352 'uploader': 'Cauchemar',
2353 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2354 },
8bdd16b4 2355 'playlist_mincount': 1123,
2356 }, {
2357 # even larger playlist, 8832 videos
2358 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2359 'only_matching': True,
4b7df0d3
JMF
2360 }, {
2361 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2362 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2363 'info_dict': {
acf757f4
PH
2364 'title': 'Uploads from Interstellar Movie',
2365 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2366 'uploader': 'Interstellar Movie',
8bdd16b4 2367 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2368 },
481cc733 2369 'playlist_mincount': 21,
8bdd16b4 2370 }, {
2371 # https://github.com/ytdl-org/youtube-dl/issues/21844
2372 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2373 'info_dict': {
2374 'title': 'Data Analysis with Dr Mike Pound',
2375 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2376 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2377 'uploader': 'Computerphile',
deaec5af 2378 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2379 },
2380 'playlist_mincount': 11,
2381 }, {
a0566bbf 2382 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2383 'only_matching': True,
dacb3a86
S
2384 }, {
2385 # Playlist URL that does not actually serve a playlist
2386 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2387 'info_dict': {
2388 'id': 'FqZTN594JQw',
2389 'ext': 'webm',
2390 'title': "Smiley's People 01 detective, Adventure Series, Action",
2391 'uploader': 'STREEM',
2392 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2393 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2394 'upload_date': '20150526',
2395 'license': 'Standard YouTube License',
2396 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2397 'categories': ['People & Blogs'],
2398 'tags': list,
dbdaaa23 2399 'view_count': int,
dacb3a86
S
2400 'like_count': int,
2401 'dislike_count': int,
2402 },
2403 'params': {
2404 'skip_download': True,
2405 },
13a75688 2406 'skip': 'This video is not available.',
dacb3a86 2407 'add_ie': [YoutubeIE.ie_key()],
481cc733 2408 }, {
8bdd16b4 2409 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2410 'only_matching': True,
66b48727 2411 }, {
8bdd16b4 2412 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2413 'only_matching': True,
a0566bbf 2414 }, {
2415 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2416 'info_dict': {
2417 'id': '9Auq9mYxFEE',
2418 'ext': 'mp4',
deaec5af 2419 'title': compat_str,
a0566bbf 2420 'uploader': 'Sky News',
2421 'uploader_id': 'skynews',
2422 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2423 'upload_date': '20191102',
deaec5af 2424 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2425 'categories': ['News & Politics'],
2426 'tags': list,
2427 'like_count': int,
2428 'dislike_count': int,
2429 },
2430 'params': {
2431 'skip_download': True,
2432 },
2433 }, {
2434 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2435 'info_dict': {
2436 'id': 'a48o2S1cPoo',
2437 'ext': 'mp4',
2438 'title': 'The Young Turks - Live Main Show',
2439 'uploader': 'The Young Turks',
2440 'uploader_id': 'TheYoungTurks',
2441 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2442 'upload_date': '20150715',
2443 'license': 'Standard YouTube License',
2444 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2445 'categories': ['News & Politics'],
2446 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2447 'like_count': int,
2448 'dislike_count': int,
2449 },
2450 'params': {
2451 'skip_download': True,
2452 },
2453 'only_matching': True,
2454 }, {
2455 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2456 'only_matching': True,
2457 }, {
2458 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2459 'only_matching': True,
3d3dddc9 2460 }, {
2461 'url': 'https://www.youtube.com/feed/trending',
2462 'only_matching': True,
2463 }, {
2464 # needs auth
2465 'url': 'https://www.youtube.com/feed/library',
2466 'only_matching': True,
2467 }, {
2468 # needs auth
2469 'url': 'https://www.youtube.com/feed/history',
2470 'only_matching': True,
2471 }, {
2472 # needs auth
2473 'url': 'https://www.youtube.com/feed/subscriptions',
2474 'only_matching': True,
2475 }, {
2476 # needs auth
2477 'url': 'https://www.youtube.com/feed/watch_later',
2478 'only_matching': True,
2479 }, {
2480 # no longer available?
2481 'url': 'https://www.youtube.com/feed/recommended',
2482 'only_matching': True,
29f7c58a 2483 }, {
2484 # inline playlist with not always working continuations
2485 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2486 'only_matching': True,
2487 }, {
2488 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2489 'only_matching': True,
2490 }, {
2491 'url': 'https://www.youtube.com/course',
2492 'only_matching': True,
2493 }, {
2494 'url': 'https://www.youtube.com/zsecurity',
2495 'only_matching': True,
2496 }, {
2497 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2498 'only_matching': True,
2499 }, {
2500 'url': 'https://www.youtube.com/TheYoungTurks/live',
2501 'only_matching': True,
2502 }]
2503
2504 @classmethod
2505 def suitable(cls, url):
2506 return False if YoutubeIE.suitable(url) else super(
2507 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2508
2509 def _extract_channel_id(self, webpage):
2510 channel_id = self._html_search_meta(
2511 'channelId', webpage, 'channel id', default=None)
2512 if channel_id:
2513 return channel_id
2514 channel_url = self._html_search_meta(
2515 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2516 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2517 'twitter:app:url:googleplay'), webpage, 'channel url')
2518 return self._search_regex(
2519 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2520 channel_url, 'channel id')
15f6397c 2521
8bdd16b4 2522 @staticmethod
2523 def _extract_grid_item_renderer(item):
2524 for item_kind in ('Playlist', 'Video', 'Channel'):
2525 renderer = item.get('grid%sRenderer' % item_kind)
2526 if renderer:
2527 return renderer
2528
8bdd16b4 2529 def _grid_entries(self, grid_renderer):
2530 for item in grid_renderer['items']:
2531 if not isinstance(item, dict):
39b62db1 2532 continue
8bdd16b4 2533 renderer = self._extract_grid_item_renderer(item)
2534 if not isinstance(renderer, dict):
2535 continue
2536 title = try_get(
2537 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2538 # playlist
2539 playlist_id = renderer.get('playlistId')
2540 if playlist_id:
2541 yield self.url_result(
2542 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2543 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2544 video_title=title)
2545 # video
2546 video_id = renderer.get('videoId')
2547 if video_id:
2548 yield self._extract_video(renderer)
2549 # channel
2550 channel_id = renderer.get('channelId')
2551 if channel_id:
2552 title = try_get(
2553 renderer, lambda x: x['title']['simpleText'], compat_str)
2554 yield self.url_result(
2555 'https://www.youtube.com/channel/%s' % channel_id,
2556 ie=YoutubeTabIE.ie_key(), video_title=title)
2557
3d3dddc9 2558 def _shelf_entries_from_content(self, shelf_renderer):
2559 content = shelf_renderer.get('content')
2560 if not isinstance(content, dict):
8bdd16b4 2561 return
3d3dddc9 2562 renderer = content.get('gridRenderer')
2563 if renderer:
2564 # TODO: add support for nested playlists so each shelf is processed
2565 # as separate playlist
2566 # TODO: this includes only first N items
2567 for entry in self._grid_entries(renderer):
2568 yield entry
2569 renderer = content.get('horizontalListRenderer')
2570 if renderer:
2571 # TODO
2572 pass
8bdd16b4 2573
29f7c58a 2574 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2575 ep = try_get(
2576 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2577 compat_str)
2578 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2579 if shelf_url:
29f7c58a 2580 # Skipping links to another channels, note that checking for
2581 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2582 # will not work
2583 if skip_channels and '/channels?' in shelf_url:
2584 return
3d3dddc9 2585 title = try_get(
2586 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2587 yield self.url_result(shelf_url, video_title=title)
2588 # Shelf may not contain shelf URL, fallback to extraction from content
2589 for entry in self._shelf_entries_from_content(shelf_renderer):
2590 yield entry
c5e8d7af 2591
8bdd16b4 2592 def _playlist_entries(self, video_list_renderer):
2593 for content in video_list_renderer['contents']:
2594 if not isinstance(content, dict):
2595 continue
2596 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2597 if not isinstance(renderer, dict):
2598 continue
2599 video_id = renderer.get('videoId')
2600 if not video_id:
2601 continue
2602 yield self._extract_video(renderer)
07aeced6 2603
3d3dddc9 2604 r""" # Not needed in the new implementation
3462ffa8 2605 def _itemSection_entries(self, item_sect_renderer):
2606 for content in item_sect_renderer['contents']:
2607 if not isinstance(content, dict):
2608 continue
2609 renderer = content.get('videoRenderer', {})
2610 if not isinstance(renderer, dict):
2611 continue
2612 video_id = renderer.get('videoId')
2613 if not video_id:
2614 continue
2615 yield self._extract_video(renderer)
3d3dddc9 2616 """
3462ffa8 2617
2618 def _rich_entries(self, rich_grid_renderer):
2619 renderer = try_get(
70d5c17b 2620 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2621 video_id = renderer.get('videoId')
2622 if not video_id:
2623 return
2624 yield self._extract_video(renderer)
2625
8bdd16b4 2626 def _video_entry(self, video_renderer):
2627 video_id = video_renderer.get('videoId')
2628 if video_id:
2629 return self._extract_video(video_renderer)
dacb3a86 2630
8bdd16b4 2631 def _post_thread_entries(self, post_thread_renderer):
2632 post_renderer = try_get(
2633 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2634 if not post_renderer:
2635 return
2636 # video attachment
2637 video_renderer = try_get(
2638 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2639 video_id = None
2640 if video_renderer:
2641 entry = self._video_entry(video_renderer)
2642 if entry:
2643 yield entry
2644 # inline video links
2645 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2646 for run in runs:
2647 if not isinstance(run, dict):
2648 continue
2649 ep_url = try_get(
2650 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2651 if not ep_url:
2652 continue
2653 if not YoutubeIE.suitable(ep_url):
2654 continue
2655 ep_video_id = YoutubeIE._match_id(ep_url)
2656 if video_id == ep_video_id:
2657 continue
2658 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2659
8bdd16b4 2660 def _post_thread_continuation_entries(self, post_thread_continuation):
2661 contents = post_thread_continuation.get('contents')
2662 if not isinstance(contents, list):
2663 return
2664 for content in contents:
2665 renderer = content.get('backstagePostThreadRenderer')
2666 if not isinstance(renderer, dict):
2667 continue
2668 for entry in self._post_thread_entries(renderer):
2669 yield entry
07aeced6 2670
29f7c58a 2671 @staticmethod
2672 def _build_continuation_query(continuation, ctp=None):
2673 query = {
2674 'ctoken': continuation,
2675 'continuation': continuation,
2676 }
2677 if ctp:
2678 query['itct'] = ctp
2679 return query
2680
8bdd16b4 2681 @staticmethod
2682 def _extract_next_continuation_data(renderer):
2683 next_continuation = try_get(
2684 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2685 if not next_continuation:
2686 return
2687 continuation = next_continuation.get('continuation')
2688 if not continuation:
2689 return
2690 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2691 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2692
8bdd16b4 2693 @classmethod
2694 def _extract_continuation(cls, renderer):
2695 next_continuation = cls._extract_next_continuation_data(renderer)
2696 if next_continuation:
2697 return next_continuation
cc2db878 2698 contents = []
2699 for key in ('contents', 'items'):
2700 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2701 for content in contents:
2702 if not isinstance(content, dict):
2703 continue
2704 continuation_ep = try_get(
2705 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2706 dict)
2707 if not continuation_ep:
2708 continue
2709 continuation = try_get(
2710 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2711 if not continuation:
2712 continue
2713 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2714 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2715
a5c56234 2716 def _entries(self, tab, identity_token, item_id):
3462ffa8 2717
70d5c17b 2718 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2719 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2720 for content in contents:
2721 if not isinstance(content, dict):
8bdd16b4 2722 continue
70d5c17b 2723 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2724 if not is_renderer:
70d5c17b 2725 renderer = content.get('richItemRenderer')
3462ffa8 2726 if renderer:
2727 for entry in self._rich_entries(renderer):
2728 yield entry
2729 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2730 continue
3462ffa8 2731 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2732 for isr_content in isr_contents:
2733 if not isinstance(isr_content, dict):
2734 continue
69184e41 2735
2736 known_renderers = {
2737 'playlistVideoListRenderer': self._playlist_entries,
2738 'gridRenderer': self._grid_entries,
2739 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2740 'backstagePostThreadRenderer': self._post_thread_entries,
2741 'videoRenderer': lambda x: [self._video_entry(x)],
2742 }
2743 for key, renderer in isr_content.items():
2744 if key not in known_renderers:
2745 continue
2746 for entry in known_renderers[key](renderer):
2747 if entry:
2748 yield entry
3462ffa8 2749 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2750 break
70d5c17b 2751
3462ffa8 2752 if not continuation_list[0]:
2753 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2754
2755 if not continuation_list[0]:
2756 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2757
2758 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2759 tab_content = try_get(tab, lambda x: x['content'], dict)
2760 if not tab_content:
2761 return
3462ffa8 2762 parent_renderer = (
29f7c58a 2763 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2764 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2765 for entry in extract_entries(parent_renderer):
2766 yield entry
3462ffa8 2767 continuation = continuation_list[0]
8bdd16b4 2768
2769 headers = {
2770 'x-youtube-client-name': '1',
2771 'x-youtube-client-version': '2.20201112.04.01',
2772 }
2773 if identity_token:
2774 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2775
8bdd16b4 2776 for page_num in itertools.count(1):
2777 if not continuation:
2778 break
62bff2c1 2779 retries = self._downloader.params.get('extractor_retries', 3)
2780 count = -1
2781 last_error = None
2782 while count < retries:
2783 count += 1
2784 if last_error:
2785 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2786 try:
a5c56234
M
2787 response = self._call_api(
2788 ep="browse", fatal=True, headers=headers,
2789 video_id='%s page %s' % (item_id, page_num),
2790 query={
2791 'continuation': continuation['continuation'],
2792 'clickTracking': {'clickTrackingParams': continuation['itct']},
2793 },
2794 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2795 except ExtractorError as e:
62bff2c1 2796 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2797 # Downloading page may result in intermittent 5xx HTTP error
2798 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2799 last_error = 'HTTP Error %s' % e.cause.code
2800 if count < retries:
29f7c58a 2801 continue
2802 raise
62bff2c1 2803 else:
62bff2c1 2804 # Youtube sometimes sends incomplete data
2805 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2806 if response.get('continuationContents') or response.get('onResponseReceivedActions'):
2807 break
2808 last_error = 'Incomplete data recieved'
c705177d 2809 if count >= retries:
2810 self._downloader.report_error(last_error)
a5c56234
M
2811
2812 if not response:
8bdd16b4 2813 break
ebf1b291 2814
69184e41 2815 known_continuation_renderers = {
2816 'playlistVideoListContinuation': self._playlist_entries,
2817 'gridContinuation': self._grid_entries,
2818 'itemSectionContinuation': self._post_thread_continuation_entries,
2819 'sectionListContinuation': extract_entries, # for feeds
2820 }
8bdd16b4 2821 continuation_contents = try_get(
69184e41 2822 response, lambda x: x['continuationContents'], dict) or {}
2823 continuation_renderer = None
2824 for key, value in continuation_contents.items():
2825 if key not in known_continuation_renderers:
3462ffa8 2826 continue
69184e41 2827 continuation_renderer = value
2828 continuation_list = [None]
2829 for entry in known_continuation_renderers[key](continuation_renderer):
2830 yield entry
2831 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2832 break
2833 if continuation_renderer:
2834 continue
c5e8d7af 2835
a1b535bd 2836 known_renderers = {
2837 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2838 'gridVideoRenderer': (self._grid_entries, 'items'),
2839 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
2840 'itemSectionRenderer': (self._playlist_entries, 'contents'),
9ba5705a 2841 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
a1b535bd 2842 }
8bdd16b4 2843 continuation_items = try_get(
2844 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2845 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2846 video_items_renderer = None
2847 for key, value in continuation_item.items():
2848 if key not in known_renderers:
8bdd16b4 2849 continue
a1b535bd 2850 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2851 continuation_list = [None]
a1b535bd 2852 for entry in known_renderers[key][0](video_items_renderer):
2853 yield entry
9ba5705a 2854 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2855 break
2856 if video_items_renderer:
2857 continue
8bdd16b4 2858 break
9558dcec 2859
8bdd16b4 2860 @staticmethod
2861 def _extract_selected_tab(tabs):
2862 for tab in tabs:
2863 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2864 return tab['tabRenderer']
2b3c2546 2865 else:
8bdd16b4 2866 raise ExtractorError('Unable to find selected tab')
b82f815f 2867
8bdd16b4 2868 @staticmethod
2869 def _extract_uploader(data):
2870 uploader = {}
2871 sidebar_renderer = try_get(
2872 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2873 if sidebar_renderer:
2874 for item in sidebar_renderer:
2875 if not isinstance(item, dict):
2876 continue
2877 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2878 if not isinstance(renderer, dict):
2879 continue
2880 owner = try_get(
2881 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2882 if owner:
2883 uploader['uploader'] = owner.get('text')
2884 uploader['uploader_id'] = try_get(
2885 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2886 uploader['uploader_url'] = urljoin(
2887 'https://www.youtube.com/',
2888 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2889 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2890
2891 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2892 playlist_id = title = description = channel_url = channel_name = channel_id = None
2893 thumbnails_list = tags = []
2894
8bdd16b4 2895 selected_tab = self._extract_selected_tab(tabs)
2896 renderer = try_get(
2897 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2898 if renderer:
b60419c5 2899 channel_name = renderer.get('title')
2900 channel_url = renderer.get('channelUrl')
2901 channel_id = renderer.get('externalId')
64c0d954 2902
64c0d954 2903 if not renderer:
2904 renderer = try_get(
2905 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2906 if renderer:
2907 title = renderer.get('title')
ecc97af3 2908 description = renderer.get('description', '')
b60419c5 2909 playlist_id = channel_id
2910 tags = renderer.get('keywords', '').split()
2911 thumbnails_list = (
2912 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2913 or try_get(
2914 data,
2915 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2916 list)
b60419c5 2917 or [])
2918
2919 thumbnails = []
2920 for t in thumbnails_list:
2921 if not isinstance(t, dict):
2922 continue
2923 thumbnail_url = url_or_none(t.get('url'))
2924 if not thumbnail_url:
2925 continue
2926 thumbnails.append({
2927 'url': thumbnail_url,
2928 'width': int_or_none(t.get('width')),
2929 'height': int_or_none(t.get('height')),
2930 })
64c0d954 2931
3462ffa8 2932 if playlist_id is None:
70d5c17b 2933 playlist_id = item_id
2934 if title is None:
b60419c5 2935 title = playlist_id
2936 title += format_field(selected_tab, 'title', ' - %s')
2937
2938 metadata = {
2939 'playlist_id': playlist_id,
2940 'playlist_title': title,
2941 'playlist_description': description,
2942 'uploader': channel_name,
2943 'uploader_id': channel_id,
2944 'uploader_url': channel_url,
2945 'thumbnails': thumbnails,
2946 'tags': tags,
2947 }
2948 if not channel_id:
2949 metadata.update(self._extract_uploader(data))
2950 metadata.update({
2951 'channel': metadata['uploader'],
2952 'channel_id': metadata['uploader_id'],
2953 'channel_url': metadata['uploader_url']})
2954 return self.playlist_result(
a5c56234 2955 self._entries(selected_tab, identity_token, playlist_id),
b60419c5 2956 **metadata)
73c4ac2c 2957
29f7c58a 2958 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2959 title = playlist.get('title') or try_get(
2960 data, lambda x: x['titleText']['simpleText'], compat_str)
2961 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 2962 # Inline playlist rendition continuation does not always work
2963 # at Youtube side, so delegating regular tab-based playlist URL
2964 # processing whenever possible.
2965 playlist_url = urljoin(url, try_get(
2966 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2967 compat_str))
2968 if playlist_url and playlist_url != url:
2969 return self.url_result(
2970 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2971 video_title=title)
8bdd16b4 2972 return self.playlist_result(
2973 self._playlist_entries(playlist), playlist_id=playlist_id,
2974 playlist_title=title)
c5e8d7af 2975
29f7c58a 2976 @staticmethod
2977 def _extract_alerts(data):
02ced43c 2978 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2979 if not isinstance(alert_dict, dict):
2980 continue
02ced43c 2981 for renderer in alert_dict:
2982 alert = alert_dict[renderer]
2983 alert_type = alert.get('type')
2984 if not alert_type:
2985 continue
2986 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2987 if message:
2988 yield alert_type, message
2989 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2990 message = try_get(run, lambda x: x['text'], compat_str)
2991 if message:
2992 yield alert_type, message
2993
29f7c58a 2994 def _extract_identity_token(self, webpage, item_id):
2995 ytcfg = self._extract_ytcfg(item_id, webpage)
2996 if ytcfg:
2997 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2998 if token:
2999 return token
3000 return self._search_regex(
3001 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3002 'identity token', default=None)
3003
8bdd16b4 3004 def _real_extract(self, url):
3005 item_id = self._match_id(url)
3006 url = compat_urlparse.urlunparse(
3007 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 3008 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 3009 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 3010 self._downloader.report_warning(
3011 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 3012 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 3013 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3014
8bdd16b4 3015 # Handle both video/playlist URLs
3016 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3017 video_id = qs.get('v', [None])[0]
3018 playlist_id = qs.get('list', [None])[0]
f0c532a4 3019
29f7c58a 3020 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3021 if playlist_id:
3022 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3023 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3024 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3025 else:
3026 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3027 if video_id and playlist_id:
3028 if self._downloader.params.get('noplaylist'):
3029 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3030 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3031 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3032
62bff2c1 3033 retries = self._downloader.params.get('extractor_retries', 3)
3034 count = -1
c705177d 3035 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3036 while count < retries:
62bff2c1 3037 count += 1
14fdfea9 3038 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3039 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3040 if count:
c705177d 3041 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3042 webpage = self._download_webpage(
3043 url, item_id,
62bff2c1 3044 'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
14fdfea9 3045 identity_token = self._extract_identity_token(webpage, item_id)
3046 data = self._extract_yt_initial_data(item_id, webpage)
3047 err_msg = None
3048 for alert_type, alert_message in self._extract_alerts(data):
3049 if alert_type.lower() == 'error':
3050 if err_msg:
3051 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3052 err_msg = alert_message
3053 else:
3054 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3055 if err_msg:
3056 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3057 if data.get('contents') or data.get('currentVideoEndpoint'):
3058 break
c705177d 3059 if count >= retries:
3060 self._downloader.report_error(last_error)
14fdfea9 3061
8bdd16b4 3062 tabs = try_get(
3063 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3064 if tabs:
3065 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3066 playlist = try_get(
3067 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3068 if playlist:
29f7c58a 3069 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3070 # Fallback to video extraction if no playlist alike page is recognized.
3071 # First check for the current video then try the v attribute of URL query.
3072 video_id = try_get(
3073 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3074 compat_str) or video_id
8bdd16b4 3075 if video_id:
3076 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3077 # Failed to recognize
3078 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3079
c5e8d7af 3080
8bdd16b4 3081class YoutubePlaylistIE(InfoExtractor):
3082 IE_DESC = 'YouTube.com playlists'
3083 _VALID_URL = r'''(?x)(?:
3084 (?:https?://)?
3085 (?:\w+\.)?
3086 (?:
3087 (?:
3088 youtube(?:kids)?\.com|
29f7c58a 3089 invidio\.us
8bdd16b4 3090 )
3091 /.*?\?.*?\blist=
3092 )?
3093 (?P<id>%(playlist_id)s)
3094 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3095 IE_NAME = 'youtube:playlist'
cdc628a4 3096 _TESTS = [{
8bdd16b4 3097 'note': 'issue #673',
3098 'url': 'PLBB231211A4F62143',
cdc628a4 3099 'info_dict': {
8bdd16b4 3100 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3101 'id': 'PLBB231211A4F62143',
3102 'uploader': 'Wickydoo',
3103 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3104 },
3105 'playlist_mincount': 29,
3106 }, {
3107 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3108 'info_dict': {
3109 'title': 'YDL_safe_search',
3110 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3111 },
3112 'playlist_count': 2,
3113 'skip': 'This playlist is private',
9558dcec 3114 }, {
8bdd16b4 3115 'note': 'embedded',
3116 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3117 'playlist_count': 4,
9558dcec 3118 'info_dict': {
8bdd16b4 3119 'title': 'JODA15',
3120 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3121 'uploader': 'milan',
3122 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3123 }
cdc628a4 3124 }, {
8bdd16b4 3125 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3126 'playlist_mincount': 982,
3127 'info_dict': {
3128 'title': '2018 Chinese New Singles (11/6 updated)',
3129 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3130 'uploader': 'LBK',
3131 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3132 }
daa0df9e 3133 }, {
29f7c58a 3134 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3135 'only_matching': True,
3136 }, {
3137 # music album playlist
3138 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3139 'only_matching': True,
3140 }]
3141
3142 @classmethod
3143 def suitable(cls, url):
3144 return False if YoutubeTabIE.suitable(url) else super(
3145 YoutubePlaylistIE, cls).suitable(url)
3146
3147 def _real_extract(self, url):
3148 playlist_id = self._match_id(url)
3149 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3150 if not qs:
3151 qs = {'list': playlist_id}
3152 return self.url_result(
3153 update_url_query('https://www.youtube.com/playlist', qs),
3154 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3155
3156
3157class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3158 IE_DESC = 'youtu.be'
29f7c58a 3159 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3160 _TESTS = [{
8bdd16b4 3161 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3162 'info_dict': {
3163 'id': 'yeWKywCrFtk',
3164 'ext': 'mp4',
3165 'title': 'Small Scale Baler and Braiding Rugs',
3166 'uploader': 'Backus-Page House Museum',
3167 'uploader_id': 'backuspagemuseum',
3168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3169 'upload_date': '20161008',
3170 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3171 'categories': ['Nonprofits & Activism'],
3172 'tags': list,
3173 'like_count': int,
3174 'dislike_count': int,
3175 },
3176 'params': {
3177 'noplaylist': True,
3178 'skip_download': True,
3179 },
39e7107d 3180 }, {
8bdd16b4 3181 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3182 'only_matching': True,
cdc628a4
PH
3183 }]
3184
8bdd16b4 3185 def _real_extract(self, url):
29f7c58a 3186 mobj = re.match(self._VALID_URL, url)
3187 video_id = mobj.group('id')
3188 playlist_id = mobj.group('playlist_id')
8bdd16b4 3189 return self.url_result(
29f7c58a 3190 update_url_query('https://www.youtube.com/watch', {
3191 'v': video_id,
3192 'list': playlist_id,
3193 'feature': 'youtu.be',
3194 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3195
3196
3197class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3198 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3199 _VALID_URL = r'ytuser:(?P<id>.+)'
3200 _TESTS = [{
3201 'url': 'ytuser:phihag',
3202 'only_matching': True,
3203 }]
3204
3205 def _real_extract(self, url):
3206 user_id = self._match_id(url)
3207 return self.url_result(
3208 'https://www.youtube.com/user/%s' % user_id,
3209 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3210
b05654f0 3211
3d3dddc9 3212class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3213 IE_NAME = 'youtube:favorites'
3214 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3215 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3216 _LOGIN_REQUIRED = True
3217 _TESTS = [{
3218 'url': ':ytfav',
3219 'only_matching': True,
3220 }, {
3221 'url': ':ytfavorites',
3222 'only_matching': True,
3223 }]
3224
3225 def _real_extract(self, url):
3226 return self.url_result(
3227 'https://www.youtube.com/playlist?list=LL',
3228 ie=YoutubeTabIE.ie_key())
3229
3230
8bdd16b4 3231class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3232 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3233 # there doesn't appear to be a real limit, for example if you search for
3234 # 'python' you get more than 8.000.000 results
3235 _MAX_RESULTS = float('inf')
78caa52a 3236 IE_NAME = 'youtube:search'
b05654f0 3237 _SEARCH_KEY = 'ytsearch'
6c894ea1 3238 _SEARCH_PARAMS = None
9dd8e46a 3239 _TESTS = []
b05654f0 3240
6c894ea1 3241 def _entries(self, query, n):
a5c56234 3242 data = {'query': query}
6c894ea1
U
3243 if self._SEARCH_PARAMS:
3244 data['params'] = self._SEARCH_PARAMS
3245 total = 0
3246 for page_num in itertools.count(1):
a5c56234
M
3247 search = self._call_api(
3248 ep='search', video_id='query "%s"' % query, fatal=False,
3249 note='Downloading page %s' % page_num, query=data)
6c894ea1 3250 if not search:
b4c08069 3251 break
6c894ea1
U
3252 slr_contents = try_get(
3253 search,
3254 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3255 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3256 list)
3257 if not slr_contents:
a22b2fd1 3258 break
0366ae87 3259
0366ae87
M
3260 # Youtube sometimes adds promoted content to searches,
3261 # changing the index location of videos and token.
3262 # So we search through all entries till we find them.
30a074c2 3263 continuation_token = None
3264 for slr_content in slr_contents:
a96c6d15 3265 if continuation_token is None:
3266 continuation_token = try_get(
3267 slr_content,
3268 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3269 compat_str)
3270
30a074c2 3271 isr_contents = try_get(
3272 slr_content,
3273 lambda x: x['itemSectionRenderer']['contents'],
3274 list)
9da76d30 3275 if not isr_contents:
30a074c2 3276 continue
3277 for content in isr_contents:
3278 if not isinstance(content, dict):
3279 continue
3280 video = content.get('videoRenderer')
3281 if not isinstance(video, dict):
3282 continue
3283 video_id = video.get('videoId')
3284 if not video_id:
3285 continue
3286
3287 yield self._extract_video(video)
3288 total += 1
3289 if total == n:
3290 return
0366ae87 3291
0366ae87 3292 if not continuation_token:
6c894ea1 3293 break
0366ae87 3294 data['continuation'] = continuation_token
b05654f0 3295
6c894ea1
U
3296 def _get_n_results(self, query, n):
3297 """Get a specified number of results for a query"""
3298 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3299
c9ae7b95 3300
a3dd9248 3301class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3302 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3303 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3304 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3305 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3306
c9ae7b95 3307
386e1dd9 3308class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3309 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3310 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3311 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3312 # _MAX_RESULTS = 100
3462ffa8 3313 _TESTS = [{
3314 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3315 'playlist_mincount': 5,
3316 'info_dict': {
3317 'title': 'youtube-dl test video',
3318 }
3319 }, {
3320 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3321 'only_matching': True,
3322 }]
3323
386e1dd9 3324 @classmethod
3325 def _make_valid_url(cls):
3326 return cls._VALID_URL
3327
3462ffa8 3328 def _real_extract(self, url):
386e1dd9 3329 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3330 query = (qs.get('search_query') or qs.get('q'))[0]
3331 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3332 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3333
3334
3335class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3336 """
25f14e9f 3337 Base class for feed extractors
3d3dddc9 3338 Subclasses must define the _FEED_NAME property.
d7ae0639 3339 """
b2e8bc1b 3340 _LOGIN_REQUIRED = True
3462ffa8 3341 # _MAX_PAGES = 5
ef2f3c7f 3342 _TESTS = []
d7ae0639
JMF
3343
3344 @property
3345 def IE_NAME(self):
78caa52a 3346 return 'youtube:%s' % self._FEED_NAME
04cc9617 3347
81f0259b 3348 def _real_initialize(self):
b2e8bc1b 3349 self._login()
81f0259b 3350
3853309f 3351 def _real_extract(self, url):
3d3dddc9 3352 return self.url_result(
3353 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3354 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3355
3356
ef2f3c7f 3357class YoutubeWatchLaterIE(InfoExtractor):
3358 IE_NAME = 'youtube:watchlater'
70d5c17b 3359 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3360 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3361 _TESTS = [{
8bdd16b4 3362 'url': ':ytwatchlater',
bc7a9cd8
S
3363 'only_matching': True,
3364 }]
25f14e9f
S
3365
3366 def _real_extract(self, url):
ef2f3c7f 3367 return self.url_result(
3368 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3369
3370
25f14e9f
S
3371class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3372 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3373 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3374 _FEED_NAME = 'recommended'
3d3dddc9 3375 _TESTS = [{
3376 'url': ':ytrec',
3377 'only_matching': True,
3378 }, {
3379 'url': ':ytrecommended',
3380 'only_matching': True,
3381 }, {
3382 'url': 'https://youtube.com',
3383 'only_matching': True,
3384 }]
1ed5b5c9 3385
1ed5b5c9 3386
25f14e9f 3387class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3388 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3389 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3390 _FEED_NAME = 'subscriptions'
3d3dddc9 3391 _TESTS = [{
3392 'url': ':ytsubs',
3393 'only_matching': True,
3394 }, {
3395 'url': ':ytsubscriptions',
3396 'only_matching': True,
3397 }]
1ed5b5c9 3398
1ed5b5c9 3399
25f14e9f 3400class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3401 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3402 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3403 _FEED_NAME = 'history'
3d3dddc9 3404 _TESTS = [{
3405 'url': ':ythistory',
3406 'only_matching': True,
3407 }]
1ed5b5c9
JMF
3408
3409
15870e90
PH
3410class YoutubeTruncatedURLIE(InfoExtractor):
3411 IE_NAME = 'youtube:truncated_url'
3412 IE_DESC = False # Do not list
975d35db 3413 _VALID_URL = r'''(?x)
b95aab84
PH
3414 (?:https?://)?
3415 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3416 (?:watch\?(?:
c4808c60 3417 feature=[a-z_]+|
b95aab84
PH
3418 annotation_id=annotation_[^&]+|
3419 x-yt-cl=[0-9]+|
c1708b89 3420 hl=[^&]*|
287be8c6 3421 t=[0-9]+
b95aab84
PH
3422 )?
3423 |
3424 attribution_link\?a=[^&]+
3425 )
3426 $
975d35db 3427 '''
15870e90 3428
c4808c60 3429 _TESTS = [{
2d3d2997 3430 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3431 'only_matching': True,
dc2fc736 3432 }, {
2d3d2997 3433 'url': 'https://www.youtube.com/watch?',
dc2fc736 3434 'only_matching': True,
b95aab84
PH
3435 }, {
3436 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3437 'only_matching': True,
3438 }, {
3439 'url': 'https://www.youtube.com/watch?feature=foo',
3440 'only_matching': True,
c1708b89
PH
3441 }, {
3442 'url': 'https://www.youtube.com/watch?hl=en-GB',
3443 'only_matching': True,
287be8c6
PH
3444 }, {
3445 'url': 'https://www.youtube.com/watch?t=2372',
3446 'only_matching': True,
c4808c60
PH
3447 }]
3448
15870e90
PH
3449 def _real_extract(self, url):
3450 raise ExtractorError(
78caa52a
PH
3451 'Did you forget to quote the URL? Remember that & is a meta '
3452 'character in most shells, so you want to put the URL in quotes, '
3867038a 3453 'like youtube-dl '
2d3d2997 3454 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3455 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3456 expected=True)
772fd5cc
PH
3457
3458
3459class YoutubeTruncatedIDIE(InfoExtractor):
3460 IE_NAME = 'youtube:truncated_id'
3461 IE_DESC = False # Do not list
b95aab84 3462 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3463
3464 _TESTS = [{
3465 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3466 'only_matching': True,
3467 }]
3468
3469 def _real_extract(self, url):
3470 video_id = self._match_id(url)
3471 raise ExtractorError(
3472 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3473 expected=True)