]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[gedi] Improvements from youtube-dl (#149)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c5e8d7af 28 clean_html,
c5e8d7af 29 ExtractorError,
b60419c5 30 format_field,
2d30521a 31 float_or_none,
dd27fd17 32 int_or_none,
94278f72 33 mimetype2ext,
6310acf5 34 parse_codecs,
7c80519c 35 parse_duration,
dca3ff4a 36 qualities,
3995d37d 37 remove_start,
cf7e015f 38 smuggle_url,
dbdaaa23 39 str_or_none,
c93d53f5 40 str_to_int,
556dbe7f 41 try_get,
c5e8d7af
PH
42 unescapeHTML,
43 unified_strdate,
cf7e015f 44 unsmuggle_url,
8bdd16b4 45 update_url_query,
21c340b8 46 url_or_none,
6e6bc8da 47 urlencode_postdata,
8bdd16b4 48 urljoin,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
56
57 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
58 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
59 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 60
3462ffa8 61 _RESERVED_NAMES = (
cd7c66cf 62 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
63 r'movies|results|shared|hashtag|trending|feed|feeds|'
64 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 65
b2e8bc1b
JMF
66 _NETRC_MACHINE = 'youtube'
67 # If True it will raise an error if no login info is provided
68 _LOGIN_REQUIRED = False
69
70d5c17b 70 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 71
25f14e9f
S
72 def _ids_to_results(self, ids):
73 return [
74 self.url_result(vid_id, 'Youtube', video_id=vid_id)
75 for vid_id in ids]
76
b2e8bc1b 77 def _login(self):
83317f69 78 """
79 Attempt to log in to YouTube.
80 True is returned if successful or skipped.
81 False is returned if login failed.
82
83 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
84 """
68217024 85 username, password = self._get_login_info()
b2e8bc1b
JMF
86 # No authentication to be performed
87 if username is None:
70d35d16 88 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 89 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 90 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
91 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 92 return True
b2e8bc1b 93
7cc3570e
PH
94 login_page = self._download_webpage(
95 self._LOGIN_URL, None,
69ea8ca4
PH
96 note='Downloading login page',
97 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
98 if login_page is False:
99 return
b2e8bc1b 100
1212e997 101 login_form = self._hidden_inputs(login_page)
c5e8d7af 102
e00eb564
S
103 def req(url, f_req, note, errnote):
104 data = login_form.copy()
105 data.update({
106 'pstMsg': 1,
107 'checkConnection': 'youtube',
108 'checkedDomains': 'youtube',
109 'hl': 'en',
110 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 111 'f.req': json.dumps(f_req),
e00eb564
S
112 'flowName': 'GlifWebSignIn',
113 'flowEntry': 'ServiceLogin',
baf67a60
S
114 # TODO: reverse actual botguard identifier generation algo
115 'bgRequest': '["identifier",""]',
041bc3ad 116 })
e00eb564
S
117 return self._download_json(
118 url, None, note=note, errnote=errnote,
119 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 fatal=False,
121 data=urlencode_postdata(data), headers={
122 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
123 'Google-Accounts-XSRF': 1,
124 })
125
3995d37d
S
126 def warn(message):
127 self._downloader.report_warning(message)
128
129 lookup_req = [
130 username,
131 None, [], None, 'US', None, None, 2, False, True,
132 [
133 None, None,
134 [2, 1, None, 1,
135 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 None, [], 4],
137 1, [None, None, []], None, None, None, True
138 ],
139 username,
140 ]
141
e00eb564 142 lookup_results = req(
3995d37d 143 self._LOOKUP_URL, lookup_req,
e00eb564
S
144 'Looking up account info', 'Unable to look up account info')
145
146 if lookup_results is False:
147 return False
041bc3ad 148
3995d37d
S
149 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 if not user_hash:
151 warn('Unable to extract user hash')
152 return False
153
154 challenge_req = [
155 user_hash,
156 None, 1, None, [1, None, None, None, [password, None, True]],
157 [
158 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ]]
83317f69 161
3995d37d
S
162 challenge_results = req(
163 self._CHALLENGE_URL, challenge_req,
164 'Logging in', 'Unable to log in')
83317f69 165
3995d37d 166 if challenge_results is False:
e00eb564 167 return
83317f69 168
3995d37d
S
169 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 if login_res:
171 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 warn(
173 'Unable to login: %s' % 'Invalid password'
174 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
175 return False
176
177 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 if not res:
179 warn('Unable to extract result entry')
180 return False
181
9a6628aa
S
182 login_challenge = try_get(res, lambda x: x[0][0], list)
183 if login_challenge:
184 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
185 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
186 # SEND_SUCCESS - TFA code has been successfully sent to phone
187 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 188 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
189 if status == 'QUOTA_EXCEEDED':
190 warn('Exceeded the limit of TFA codes, try later')
191 return False
192
193 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 if not tl:
195 warn('Unable to extract TL')
196 return False
197
198 tfa_code = self._get_tfa_info('2-step verification code')
199
200 if not tfa_code:
201 warn(
202 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
203 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
204 return False
205
206 tfa_code = remove_start(tfa_code, 'G-')
207
208 tfa_req = [
209 user_hash, None, 2, None,
210 [
211 9, None, None, None, None, None, None, None,
212 [None, tfa_code, True, 2]
213 ]]
214
215 tfa_results = req(
216 self._TFA_URL.format(tl), tfa_req,
217 'Submitting TFA code', 'Unable to submit TFA code')
218
219 if tfa_results is False:
220 return False
221
222 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 if tfa_res:
224 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 warn(
226 'Unable to finish TFA: %s' % 'Invalid TFA code'
227 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
228 return False
229
230 check_cookie_url = try_get(
231 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
232 else:
233 CHALLENGES = {
234 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
235 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
236 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 }
238 challenge = CHALLENGES.get(
239 challenge_str,
240 '%s returned error %s.' % (self.IE_NAME, challenge_str))
241 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
242 return False
3995d37d
S
243 else:
244 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245
246 if not check_cookie_url:
247 warn('Unable to extract CheckCookie URL')
248 return False
e00eb564
S
249
250 check_cookie_results = self._download_webpage(
3995d37d
S
251 check_cookie_url, None, 'Checking cookie', fatal=False)
252
253 if check_cookie_results is False:
254 return False
e00eb564 255
3995d37d
S
256 if 'https://myaccount.google.com/' not in check_cookie_results:
257 warn('Unable to log in')
b2e8bc1b 258 return False
e00eb564 259
b2e8bc1b
JMF
260 return True
261
30226342 262 def _download_webpage_handle(self, *args, **kwargs):
c1148516 263 query = kwargs.get('query', {}).copy()
c1148516 264 kwargs['query'] = query
30226342 265 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
266 *args, **compat_kwargs(kwargs))
267
b2e8bc1b
JMF
268 def _real_initialize(self):
269 if self._downloader is None:
270 return
b2e8bc1b
JMF
271 if not self._login():
272 return
c5e8d7af 273
8bdd16b4 274 _DEFAULT_API_DATA = {
275 'context': {
276 'client': {
277 'clientName': 'WEB',
a5c56234 278 'clientVersion': '2.20210301.08.00',
8bdd16b4 279 }
280 },
281 }
8377574c 282
a0566bbf 283 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 284 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
285 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 286
a5c56234
M
287 def _generate_sapisidhash_header(self):
288 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
289 if sapisid_cookie is None:
290 return
291 time_now = round(time.time())
292 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
293 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
294
295 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
296 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 297 data = self._DEFAULT_API_DATA.copy()
298 data.update(query)
a5c56234
M
299 headers = headers or {}
300 headers.update({'content-type': 'application/json'})
301 auth = self._generate_sapisidhash_header()
302 if auth is not None:
303 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 304 return self._download_json(
a5c56234
M
305 'https://www.youtube.com/youtubei/v1/%s' % ep,
306 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
307 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 308 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 309
8bdd16b4 310 def _extract_yt_initial_data(self, video_id, webpage):
311 return self._parse_json(
312 self._search_regex(
29f7c58a 313 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 314 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 315 video_id)
0c148415 316
29f7c58a 317 def _extract_ytcfg(self, video_id, webpage):
318 return self._parse_json(
319 self._search_regex(
320 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
321 default='{}'), video_id, fatal=False)
322
30a074c2 323 def _extract_video(self, renderer):
324 video_id = renderer.get('videoId')
325 title = try_get(
326 renderer,
327 (lambda x: x['title']['runs'][0]['text'],
328 lambda x: x['title']['simpleText']), compat_str)
329 description = try_get(
330 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
331 compat_str)
332 duration = parse_duration(try_get(
333 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
334 view_count_text = try_get(
335 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
336 view_count = str_to_int(self._search_regex(
337 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
338 'view count', default=None))
339 uploader = try_get(
bc2ca1bb 340 renderer,
341 (lambda x: x['ownerText']['runs'][0]['text'],
342 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 343 return {
344 '_type': 'url_transparent',
345 'ie_key': YoutubeIE.ie_key(),
346 'id': video_id,
347 'url': video_id,
348 'title': title,
349 'description': description,
350 'duration': duration,
351 'view_count': view_count,
352 'uploader': uploader,
353 }
354
0c148415 355
360e1ca5 356class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 357 IE_DESC = 'YouTube.com'
bc2ca1bb 358 _INVIDIOUS_SITES = (
359 # invidious-redirect websites
360 r'(?:www\.)?redirect\.invidious\.io',
361 r'(?:(?:www|dev)\.)?invidio\.us',
362 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
363 r'(?:www\.)?invidious\.pussthecat\.org',
364 r'(?:www\.)?invidious\.048596\.xyz',
365 r'(?:www\.)?invidious\.zee\.li',
366 r'(?:www\.)?vid\.puffyan\.us',
367 r'(?:(?:www|au)\.)?ytprivate\.com',
368 r'(?:www\.)?invidious\.namazso\.eu',
369 r'(?:www\.)?invidious\.ethibox\.fr',
370 r'(?:www\.)?inv\.skyn3t\.in',
371 r'(?:www\.)?invidious\.himiko\.cloud',
372 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
373 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
374 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
375 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
376 # youtube-dl invidious instances list
377 r'(?:(?:www|no)\.)?invidiou\.sh',
378 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
379 r'(?:www\.)?invidious\.kabi\.tk',
380 r'(?:www\.)?invidious\.13ad\.de',
381 r'(?:www\.)?invidious\.mastodon\.host',
382 r'(?:www\.)?invidious\.zapashcanon\.fr',
383 r'(?:www\.)?invidious\.kavin\.rocks',
384 r'(?:www\.)?invidious\.tube',
385 r'(?:www\.)?invidiou\.site',
386 r'(?:www\.)?invidious\.site',
387 r'(?:www\.)?invidious\.xyz',
388 r'(?:www\.)?invidious\.nixnet\.xyz',
389 r'(?:www\.)?invidious\.drycat\.fr',
390 r'(?:www\.)?tube\.poal\.co',
391 r'(?:www\.)?tube\.connect\.cafe',
392 r'(?:www\.)?vid\.wxzm\.sx',
393 r'(?:www\.)?vid\.mint\.lgbt',
394 r'(?:www\.)?yewtu\.be',
395 r'(?:www\.)?yt\.elukerio\.org',
396 r'(?:www\.)?yt\.lelux\.fi',
397 r'(?:www\.)?invidious\.ggc-project\.de',
398 r'(?:www\.)?yt\.maisputain\.ovh',
399 r'(?:www\.)?invidious\.toot\.koeln',
400 r'(?:www\.)?invidious\.fdn\.fr',
401 r'(?:www\.)?watch\.nettohikari\.com',
402 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
403 r'(?:www\.)?qklhadlycap4cnod\.onion',
404 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
405 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
406 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
407 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
408 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
409 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
410 )
cb7dfeea 411 _VALID_URL = r"""(?x)^
c5e8d7af 412 (
edb53e2d 413 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 414 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
415 (?:www\.)?deturl\.com/www\.youtube\.com|
416 (?:www\.)?pwnyoutube\.com|
417 (?:www\.)?hooktube\.com|
418 (?:www\.)?yourepeat\.com|
419 tube\.majestyc\.net|
420 %(invidious)s|
421 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
422 (?:.*?\#/)? # handle anchor (#/) redirect urls
423 (?: # the various things that can precede the ID:
ac7553d0 424 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 425 |(?: # or the v= param in all its forms
f7000f3a 426 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 427 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 428 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
429 v=
430 )
f4b05232 431 ))
cbaed4bb
S
432 |(?:
433 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
434 vid\.plus| # or vid.plus/xxxx
435 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 436 %(invidious)s
cbaed4bb 437 )/
edb53e2d 438 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 439 )
c5e8d7af 440 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 441 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
442 (?!.*?\blist=
443 (?:
444 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
445 WL # WL are handled by the watch later IE
446 )
447 )
c5e8d7af 448 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 449 $""" % {
450 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
451 'invidious': '|'.join(_INVIDIOUS_SITES),
452 }
e40c758c 453 _PLAYER_INFO_RE = (
cc2db878 454 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
455 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 456 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 457 )
2c62dc26 458 _formats = {
c2d3cb4c 459 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
460 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
461 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
462 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
463 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
464 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
465 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
466 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 467 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 468 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
469 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
470 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
471 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
472 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
473 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 474 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 475 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
476 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 477
478
479 # 3D videos
c2d3cb4c 480 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
481 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
482 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
483 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 484 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
485 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
486 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 487
96fb5605 488 # Apple HTTP Live Streaming
11f12195 489 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 490 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
491 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
492 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
493 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
494 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 495 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
496 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
497
498 # DASH mp4 video
d23028a8
S
499 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
500 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
501 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
502 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
503 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 504 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
505 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
506 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
507 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
508 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
509 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
510 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 511
f6f1fc92 512 # Dash mp4 audio
d23028a8
S
513 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
514 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
515 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
516 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
517 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
518 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
519 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
520
521 # Dash webm
d23028a8
S
522 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
523 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
524 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
525 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
526 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
527 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
528 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
529 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
530 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
532 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
534 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
536 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 537 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
538 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
539 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
540 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
541 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
542 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
543 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
544
545 # Dash webm audio
d23028a8
S
546 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
547 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 548
0857baad 549 # Dash webm audio with opus inside
d23028a8
S
550 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
551 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
552 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 553
ce6b9a2d
PH
554 # RTMP (unnamed)
555 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
556
557 # av01 video only formats sometimes served with "unknown" codecs
558 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
559 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
560 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
561 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 562 }
29f7c58a 563 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 564
fd5c4aab
S
565 _GEO_BYPASS = False
566
78caa52a 567 IE_NAME = 'youtube'
2eb88d95
PH
568 _TESTS = [
569 {
2d3d2997 570 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
571 'info_dict': {
572 'id': 'BaW_jenozKc',
573 'ext': 'mp4',
3867038a 574 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
575 'uploader': 'Philipp Hagemeister',
576 'uploader_id': 'phihag',
ec85ded8 577 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
578 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
579 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 580 'upload_date': '20121002',
3867038a 581 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 582 'categories': ['Science & Technology'],
3867038a 583 'tags': ['youtube-dl'],
556dbe7f 584 'duration': 10,
dbdaaa23 585 'view_count': int,
3e7c1224
PH
586 'like_count': int,
587 'dislike_count': int,
7c80519c 588 'start_time': 1,
297a564b 589 'end_time': 9,
2eb88d95 590 }
0e853ca4 591 },
fccd3771 592 {
4bc3a23e
PH
593 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
594 'note': 'Embed-only video (#1746)',
595 'info_dict': {
596 'id': 'yZIXLfi8CZQ',
597 'ext': 'mp4',
598 'upload_date': '20120608',
599 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
600 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
601 'uploader': 'SET India',
94bfcd23 602 'uploader_id': 'setindia',
ec85ded8 603 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 604 'age_limit': 18,
545cc85d 605 },
606 'skip': 'Private video',
fccd3771 607 },
11b56058 608 {
8bdd16b4 609 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
610 'note': 'Use the first video ID in the URL',
611 'info_dict': {
612 'id': 'BaW_jenozKc',
613 'ext': 'mp4',
3867038a 614 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
615 'uploader': 'Philipp Hagemeister',
616 'uploader_id': 'phihag',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 618 'upload_date': '20121002',
3867038a 619 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 620 'categories': ['Science & Technology'],
3867038a 621 'tags': ['youtube-dl'],
556dbe7f 622 'duration': 10,
dbdaaa23 623 'view_count': int,
11b56058
PM
624 'like_count': int,
625 'dislike_count': int,
34a7de29
S
626 },
627 'params': {
628 'skip_download': True,
629 },
11b56058 630 },
dd27fd17 631 {
2d3d2997 632 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
633 'note': '256k DASH audio (format 141) via DASH manifest',
634 'info_dict': {
635 'id': 'a9LDPn-MO4I',
636 'ext': 'm4a',
637 'upload_date': '20121002',
638 'uploader_id': '8KVIDEO',
ec85ded8 639 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
640 'description': '',
641 'uploader': '8KVIDEO',
642 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 643 },
4bc3a23e
PH
644 'params': {
645 'youtube_include_dash_manifest': True,
646 'format': '141',
4919603f 647 },
de3c7fe0 648 'skip': 'format 141 not served anymore',
dd27fd17 649 },
8bdd16b4 650 # DASH manifest with encrypted signature
651 {
652 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
653 'info_dict': {
654 'id': 'IB3lcPjvWLA',
655 'ext': 'm4a',
656 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
657 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
658 'duration': 244,
659 'uploader': 'AfrojackVEVO',
660 'uploader_id': 'AfrojackVEVO',
661 'upload_date': '20131011',
cc2db878 662 'abr': 129.495,
8bdd16b4 663 },
664 'params': {
665 'youtube_include_dash_manifest': True,
666 'format': '141/bestaudio[ext=m4a]',
667 },
668 },
aa79ac0c
PH
669 # Controversy video
670 {
671 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
672 'info_dict': {
673 'id': 'T4XJQO3qol8',
674 'ext': 'mp4',
556dbe7f 675 'duration': 219,
aa79ac0c 676 'upload_date': '20100909',
4fe54c12 677 'uploader': 'Amazing Atheist',
aa79ac0c 678 'uploader_id': 'TheAmazingAtheist',
ec85ded8 679 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 680 'title': 'Burning Everyone\'s Koran',
545cc85d 681 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 682 }
c522adb1 683 },
dd2d55f1 684 # Normal age-gate video (embed allowed)
c522adb1 685 {
2d3d2997 686 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
687 'info_dict': {
688 'id': 'HtVdAasjOgU',
689 'ext': 'mp4',
690 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 691 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 692 'duration': 142,
c522adb1
JMF
693 'uploader': 'The Witcher',
694 'uploader_id': 'WitcherGame',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 696 'upload_date': '20140605',
34952f09 697 'age_limit': 18,
c522adb1
JMF
698 },
699 },
8bdd16b4 700 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
701 # YouTube Red ad is not captured for creator
702 {
703 'url': '__2ABJjxzNo',
704 'info_dict': {
705 'id': '__2ABJjxzNo',
706 'ext': 'mp4',
707 'duration': 266,
708 'upload_date': '20100430',
709 'uploader_id': 'deadmau5',
710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 711 'creator': 'deadmau5',
712 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 713 'uploader': 'deadmau5',
714 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 715 'alt_title': 'Some Chords',
8bdd16b4 716 },
717 'expected_warnings': [
718 'DASH manifest missing',
719 ]
720 },
067aa17e 721 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
722 {
723 'url': 'lqQg6PlCWgI',
724 'info_dict': {
725 'id': 'lqQg6PlCWgI',
726 'ext': 'mp4',
556dbe7f 727 'duration': 6085,
90227264 728 'upload_date': '20150827',
cbe2bd91 729 'uploader_id': 'olympic',
ec85ded8 730 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 731 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 732 'uploader': 'Olympic',
cbe2bd91
PH
733 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
734 },
735 'params': {
736 'skip_download': 'requires avconv',
e52a40ab 737 }
cbe2bd91 738 },
6271f1ca
PH
739 # Non-square pixels
740 {
741 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
742 'info_dict': {
743 'id': '_b-2C3KPAM0',
744 'ext': 'mp4',
745 'stretched_ratio': 16 / 9.,
556dbe7f 746 'duration': 85,
6271f1ca
PH
747 'upload_date': '20110310',
748 'uploader_id': 'AllenMeow',
ec85ded8 749 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 750 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 751 'uploader': '孫ᄋᄅ',
6271f1ca
PH
752 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
753 },
06b491eb
S
754 },
755 # url_encoded_fmt_stream_map is empty string
756 {
757 'url': 'qEJwOuvDf7I',
758 'info_dict': {
759 'id': 'qEJwOuvDf7I',
f57b7835 760 'ext': 'webm',
06b491eb
S
761 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
762 'description': '',
763 'upload_date': '20150404',
764 'uploader_id': 'spbelect',
765 'uploader': 'Наблюдатели Петербурга',
766 },
767 'params': {
768 'skip_download': 'requires avconv',
e323cf3f
S
769 },
770 'skip': 'This live event has ended.',
06b491eb 771 },
067aa17e 772 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
773 {
774 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
775 'info_dict': {
776 'id': 'FIl7x6_3R5Y',
eb6793ba 777 'ext': 'webm',
da77d856
S
778 'title': 'md5:7b81415841e02ecd4313668cde88737a',
779 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 780 'duration': 220,
da77d856
S
781 'upload_date': '20150625',
782 'uploader_id': 'dorappi2000',
ec85ded8 783 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 784 'uploader': 'dorappi2000',
eb6793ba 785 'formats': 'mincount:31',
da77d856 786 },
eb6793ba 787 'skip': 'not actual anymore',
2ee8f5d8 788 },
8a1a26ce
YCH
789 # DASH manifest with segment_list
790 {
791 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
792 'md5': '8ce563a1d667b599d21064e982ab9e31',
793 'info_dict': {
794 'id': 'CsmdDsKjzN8',
795 'ext': 'mp4',
17ee98e1 796 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
797 'uploader': 'Airtek',
798 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
799 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
800 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
801 },
802 'params': {
803 'youtube_include_dash_manifest': True,
804 'format': '135', # bestvideo
be49068d
S
805 },
806 'skip': 'This live event has ended.',
2ee8f5d8 807 },
cf7e015f
S
808 {
809 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 810 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 811 'info_dict': {
545cc85d 812 'id': 'jvGDaLqkpTg',
813 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
814 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
815 },
816 'playlist': [{
817 'info_dict': {
545cc85d 818 'id': 'jvGDaLqkpTg',
cf7e015f 819 'ext': 'mp4',
545cc85d 820 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
821 'description': 'md5:e03b909557865076822aa169218d6a5d',
822 'duration': 10643,
823 'upload_date': '20161111',
824 'uploader': 'Team PGP',
825 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
826 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
827 },
828 }, {
829 'info_dict': {
545cc85d 830 'id': '3AKt1R1aDnw',
cf7e015f 831 'ext': 'mp4',
545cc85d 832 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
833 'description': 'md5:e03b909557865076822aa169218d6a5d',
834 'duration': 10991,
835 'upload_date': '20161111',
836 'uploader': 'Team PGP',
837 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
839 },
840 }, {
841 'info_dict': {
545cc85d 842 'id': 'RtAMM00gpVc',
cf7e015f 843 'ext': 'mp4',
545cc85d 844 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
845 'description': 'md5:e03b909557865076822aa169218d6a5d',
846 'duration': 10995,
847 'upload_date': '20161111',
848 'uploader': 'Team PGP',
849 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
850 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
851 },
852 }, {
853 'info_dict': {
545cc85d 854 'id': '6N2fdlP3C5U',
cf7e015f 855 'ext': 'mp4',
545cc85d 856 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
857 'description': 'md5:e03b909557865076822aa169218d6a5d',
858 'duration': 10990,
859 'upload_date': '20161111',
860 'uploader': 'Team PGP',
861 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
862 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
863 },
864 }],
865 'params': {
866 'skip_download': True,
867 },
cbaed4bb 868 },
f9f49d87 869 {
067aa17e 870 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
871 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
872 'info_dict': {
873 'id': 'gVfLd0zydlo',
874 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
875 },
876 'playlist_count': 2,
be49068d 877 'skip': 'Not multifeed anymore',
f9f49d87 878 },
cbaed4bb 879 {
2d3d2997 880 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 881 'only_matching': True,
0e49d9a6 882 },
6d4fc66b 883 {
2d3d2997 884 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
885 'only_matching': True,
886 },
0e49d9a6 887 {
067aa17e 888 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 889 # Also tests cut-off URL expansion in video description (see
067aa17e
S
890 # https://github.com/ytdl-org/youtube-dl/issues/1892,
891 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
892 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
893 'info_dict': {
894 'id': 'lsguqyKfVQg',
895 'ext': 'mp4',
896 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 897 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 898 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 899 'duration': 133,
0e49d9a6
LL
900 'upload_date': '20151119',
901 'uploader_id': 'IronSoulElf',
ec85ded8 902 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 903 'uploader': 'IronSoulElf',
eb6793ba
S
904 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
905 'track': 'Dark Walk - Position Music',
906 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 907 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
908 },
909 'params': {
910 'skip_download': True,
911 },
912 },
61f92af1 913 {
067aa17e 914 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
915 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
916 'only_matching': True,
917 },
313dfc45
LL
918 {
919 # Video with yt:stretch=17:0
920 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
921 'info_dict': {
922 'id': 'Q39EVAstoRM',
923 'ext': 'mp4',
924 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
925 'description': 'md5:ee18a25c350637c8faff806845bddee9',
926 'upload_date': '20151107',
927 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
928 'uploader': 'CH GAMER DROID',
929 },
930 'params': {
931 'skip_download': True,
932 },
be49068d 933 'skip': 'This video does not exist.',
313dfc45 934 },
7caf9830
S
935 {
936 # Video licensed under Creative Commons
937 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
938 'info_dict': {
939 'id': 'M4gD1WSo5mA',
940 'ext': 'mp4',
941 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
942 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 943 'duration': 721,
7caf9830
S
944 'upload_date': '20150127',
945 'uploader_id': 'BerkmanCenter',
ec85ded8 946 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 947 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
948 'license': 'Creative Commons Attribution license (reuse allowed)',
949 },
950 'params': {
951 'skip_download': True,
952 },
953 },
fd050249
S
954 {
955 # Channel-like uploader_url
956 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
957 'info_dict': {
958 'id': 'eQcmzGIKrzg',
959 'ext': 'mp4',
960 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 961 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 962 'duration': 4060,
fd050249 963 'upload_date': '20151119',
eb6793ba 964 'uploader': 'Bernie Sanders',
fd050249 965 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 966 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
967 'license': 'Creative Commons Attribution license (reuse allowed)',
968 },
969 'params': {
970 'skip_download': True,
971 },
972 },
040ac686
S
973 {
974 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
975 'only_matching': True,
7f29cf54
S
976 },
977 {
067aa17e 978 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
979 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
980 'only_matching': True,
6496ccb4
S
981 },
982 {
983 # Rental video preview
984 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
985 'info_dict': {
986 'id': 'uGpuVWrhIzE',
987 'ext': 'mp4',
988 'title': 'Piku - Trailer',
989 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
990 'upload_date': '20150811',
991 'uploader': 'FlixMatrix',
992 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 993 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
994 'license': 'Standard YouTube License',
995 },
996 'params': {
997 'skip_download': True,
998 },
eb6793ba 999 'skip': 'This video is not available.',
022a5d66 1000 },
12afdc2a
S
1001 {
1002 # YouTube Red video with episode data
1003 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1004 'info_dict': {
1005 'id': 'iqKdEhx-dD4',
1006 'ext': 'mp4',
1007 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1008 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1009 'duration': 2085,
12afdc2a
S
1010 'upload_date': '20170118',
1011 'uploader': 'Vsauce',
1012 'uploader_id': 'Vsauce',
1013 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1014 'series': 'Mind Field',
1015 'season_number': 1,
1016 'episode_number': 1,
1017 },
1018 'params': {
1019 'skip_download': True,
1020 },
1021 'expected_warnings': [
1022 'Skipping DASH manifest',
1023 ],
1024 },
c7121fa7
S
1025 {
1026 # The following content has been identified by the YouTube community
1027 # as inappropriate or offensive to some audiences.
1028 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1029 'info_dict': {
1030 'id': '6SJNVb0GnPI',
1031 'ext': 'mp4',
1032 'title': 'Race Differences in Intelligence',
1033 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1034 'duration': 965,
1035 'upload_date': '20140124',
1036 'uploader': 'New Century Foundation',
1037 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1038 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1039 },
1040 'params': {
1041 'skip_download': True,
1042 },
545cc85d 1043 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1044 },
022a5d66
S
1045 {
1046 # itag 212
1047 'url': '1t24XAntNCY',
1048 'only_matching': True,
fd5c4aab
S
1049 },
1050 {
1051 # geo restricted to JP
1052 'url': 'sJL6WA-aGkQ',
1053 'only_matching': True,
1054 },
cd5a74a2
S
1055 {
1056 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1057 'only_matching': True,
1058 },
bc2ca1bb 1059 {
1060 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1061 'only_matching': True,
1062 },
1063 {
1064 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1065 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1066 'only_matching': True,
1067 },
825cd268
RA
1068 {
1069 # DRM protected
1070 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1071 'only_matching': True,
4fe54c12
S
1072 },
1073 {
1074 # Video with unsupported adaptive stream type formats
1075 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1076 'info_dict': {
1077 'id': 'Z4Vy8R84T1U',
1078 'ext': 'mp4',
1079 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1080 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1081 'duration': 433,
1082 'upload_date': '20130923',
1083 'uploader': 'Amelia Putri Harwita',
1084 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1085 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1086 'formats': 'maxcount:10',
1087 },
1088 'params': {
1089 'skip_download': True,
1090 'youtube_include_dash_manifest': False,
1091 },
5429d6a9 1092 'skip': 'not actual anymore',
5caabd3c 1093 },
1094 {
822b9d9c 1095 # Youtube Music Auto-generated description
5caabd3c 1096 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1097 'info_dict': {
1098 'id': 'MgNrAu2pzNs',
1099 'ext': 'mp4',
1100 'title': 'Voyeur Girl',
1101 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1102 'upload_date': '20190312',
5429d6a9
S
1103 'uploader': 'Stephen - Topic',
1104 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1105 'artist': 'Stephen',
1106 'track': 'Voyeur Girl',
1107 'album': 'it\'s too much love to know my dear',
1108 'release_date': '20190313',
1109 'release_year': 2019,
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
1114 },
66b48727
RA
1115 {
1116 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1117 'only_matching': True,
1118 },
011e75e6
S
1119 {
1120 # invalid -> valid video id redirection
1121 'url': 'DJztXj2GPfl',
1122 'info_dict': {
1123 'id': 'DJztXj2GPfk',
1124 'ext': 'mp4',
1125 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1126 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1127 'upload_date': '20090125',
1128 'uploader': 'Prochorowka',
1129 'uploader_id': 'Prochorowka',
1130 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1131 'artist': 'Panjabi MC',
1132 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1133 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1134 },
1135 'params': {
1136 'skip_download': True,
1137 },
545cc85d 1138 'skip': 'Video unavailable',
ea74e00b
DP
1139 },
1140 {
1141 # empty description results in an empty string
1142 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1143 'info_dict': {
1144 'id': 'x41yOUIvK2k',
1145 'ext': 'mp4',
1146 'title': 'IMG 3456',
1147 'description': '',
1148 'upload_date': '20170613',
1149 'uploader_id': 'ElevageOrVert',
1150 'uploader': 'ElevageOrVert',
1151 },
1152 'params': {
1153 'skip_download': True,
1154 },
1155 },
a0566bbf 1156 {
29f7c58a 1157 # with '};' inside yt initial data (see [1])
1158 # see [2] for an example with '};' inside ytInitialPlayerResponse
1159 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1160 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1161 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1162 'info_dict': {
1163 'id': 'CHqg6qOn4no',
1164 'ext': 'mp4',
1165 'title': 'Part 77 Sort a list of simple types in c#',
1166 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1167 'upload_date': '20130831',
1168 'uploader_id': 'kudvenkat',
1169 'uploader': 'kudvenkat',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 },
1174 },
29f7c58a 1175 {
1176 # another example of '};' in ytInitialData
1177 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1178 'only_matching': True,
1179 },
1180 {
1181 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1182 'only_matching': True,
1183 },
545cc85d 1184 {
cc2db878 1185 # https://github.com/ytdl-org/youtube-dl/pull/28094
1186 'url': 'OtqTfy26tG0',
1187 'info_dict': {
1188 'id': 'OtqTfy26tG0',
1189 'ext': 'mp4',
1190 'title': 'Burn Out',
1191 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1192 'upload_date': '20141120',
1193 'uploader': 'The Cinematic Orchestra - Topic',
1194 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1195 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1196 'artist': 'The Cinematic Orchestra',
1197 'track': 'Burn Out',
1198 'album': 'Every Day',
1199 'release_data': None,
1200 'release_year': None,
1201 },
1202 'params': {
1203 'skip_download': True,
1204 },
545cc85d 1205 },
bc2ca1bb 1206 {
1207 # controversial video, only works with bpctr when authenticated with cookies
1208 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1209 'only_matching': True,
1210 },
2eb88d95
PH
1211 ]
1212
e0df6211
PH
1213 def __init__(self, *args, **kwargs):
1214 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1215 self._code_cache = {}
83799698 1216 self._player_cache = {}
e0df6211 1217
60064c53
PH
1218 def _signature_cache_id(self, example_sig):
1219 """ Return a string representation of a signature """
78caa52a 1220 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1221
e40c758c
S
1222 @classmethod
1223 def _extract_player_info(cls, player_url):
1224 for player_re in cls._PLAYER_INFO_RE:
1225 id_m = re.search(player_re, player_url)
1226 if id_m:
1227 break
1228 else:
c081b35c 1229 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1230 return id_m.group('id')
e40c758c
S
1231
1232 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1233 player_id = self._extract_player_info(player_url)
e0df6211 1234
c4417ddb 1235 # Read from filesystem cache
545cc85d 1236 func_id = 'js_%s_%s' % (
1237 player_id, self._signature_cache_id(example_sig))
c4417ddb 1238 assert os.path.basename(func_id) == func_id
a0e07d31 1239
69ea8ca4 1240 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1241 if cache_spec is not None:
78caa52a 1242 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1243
545cc85d 1244 if player_id not in self._code_cache:
1245 self._code_cache[player_id] = self._download_webpage(
e0df6211 1246 player_url, video_id,
545cc85d 1247 note='Downloading player ' + player_id,
69ea8ca4 1248 errnote='Download of %s failed' % player_url)
545cc85d 1249 code = self._code_cache[player_id]
1250 res = self._parse_sig_js(code)
e0df6211 1251
785521bf
PH
1252 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1253 cache_res = res(test_string)
1254 cache_spec = [ord(c) for c in cache_res]
83799698 1255
69ea8ca4 1256 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1257 return res
1258
60064c53 1259 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1260 def gen_sig_code(idxs):
1261 def _genslice(start, end, step):
78caa52a 1262 starts = '' if start == 0 else str(start)
8bcc8756 1263 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1264 steps = '' if step == 1 else (':%d' % step)
78caa52a 1265 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1266
1267 step = None
7af808a5
PH
1268 # Quelch pyflakes warnings - start will be set when step is set
1269 start = '(Never used)'
edf3e38e
PH
1270 for i, prev in zip(idxs[1:], idxs[:-1]):
1271 if step is not None:
1272 if i - prev == step:
1273 continue
1274 yield _genslice(start, prev, step)
1275 step = None
1276 continue
1277 if i - prev in [-1, 1]:
1278 step = i - prev
1279 start = prev
1280 continue
1281 else:
78caa52a 1282 yield 's[%d]' % prev
edf3e38e 1283 if step is None:
78caa52a 1284 yield 's[%d]' % i
edf3e38e
PH
1285 else:
1286 yield _genslice(start, i, step)
1287
78caa52a 1288 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1289 cache_res = func(test_string)
edf3e38e 1290 cache_spec = [ord(c) for c in cache_res]
78caa52a 1291 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1292 signature_id_tuple = '(%s)' % (
1293 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1294 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1295 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1296 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1297
e0df6211
PH
1298 def _parse_sig_js(self, jscode):
1299 funcname = self._search_regex(
abefc03f
S
1300 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1301 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1302 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1303 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1304 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1305 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1306 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1307 # Obsolete patterns
1308 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1309 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1310 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1311 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1312 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1313 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1314 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1315 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1316 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1317
1318 jsi = JSInterpreter(jscode)
1319 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1320 return lambda s: initial_function([s])
1321
545cc85d 1322 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1323 """Turn the encrypted s field into a working signature"""
6b37f0be 1324
c8bf86d5 1325 if player_url is None:
69ea8ca4 1326 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1327
69ea8ca4 1328 if player_url.startswith('//'):
78caa52a 1329 player_url = 'https:' + player_url
3c90cc8b
S
1330 elif not re.match(r'https?://', player_url):
1331 player_url = compat_urlparse.urljoin(
1332 'https://www.youtube.com', player_url)
c8bf86d5 1333 try:
62af3a0e 1334 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1335 if player_id not in self._player_cache:
1336 func = self._extract_signature_function(
60064c53 1337 video_id, player_url, s
c8bf86d5
PH
1338 )
1339 self._player_cache[player_id] = func
1340 func = self._player_cache[player_id]
1341 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1342 self._print_sig_code(func, s)
c8bf86d5
PH
1343 return func(s)
1344 except Exception as e:
1345 tb = traceback.format_exc()
1346 raise ExtractorError(
78caa52a 1347 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1348
545cc85d 1349 def _mark_watched(self, video_id, player_response):
21c340b8
S
1350 playback_url = url_or_none(try_get(
1351 player_response,
545cc85d 1352 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1353 if not playback_url:
1354 return
1355 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1356 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1357
1358 # cpn generation algorithm is reverse engineered from base.js.
1359 # In fact it works even with dummy cpn.
1360 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1361 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1362
1363 qs.update({
1364 'ver': ['2'],
1365 'cpn': [cpn],
1366 })
1367 playback_url = compat_urlparse.urlunparse(
15707c7e 1368 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1369
1370 self._download_webpage(
1371 playback_url, video_id, 'Marking watched',
1372 'Unable to mark watched', fatal=False)
1373
66c9fa36
S
1374 @staticmethod
1375 def _extract_urls(webpage):
1376 # Embedded YouTube player
1377 entries = [
1378 unescapeHTML(mobj.group('url'))
1379 for mobj in re.finditer(r'''(?x)
1380 (?:
1381 <iframe[^>]+?src=|
1382 data-video-url=|
1383 <embed[^>]+?src=|
1384 embedSWF\(?:\s*|
1385 <object[^>]+data=|
1386 new\s+SWFObject\(
1387 )
1388 (["\'])
1389 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1390 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1391 \1''', webpage)]
1392
1393 # lazyYT YouTube embed
1394 entries.extend(list(map(
1395 unescapeHTML,
1396 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1397
1398 # Wordpress "YouTube Video Importer" plugin
1399 matches = re.findall(r'''(?x)<div[^>]+
1400 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1401 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1402 entries.extend(m[-1] for m in matches)
1403
1404 return entries
1405
1406 @staticmethod
1407 def _extract_url(webpage):
1408 urls = YoutubeIE._extract_urls(webpage)
1409 return urls[0] if urls else None
1410
97665381
PH
1411 @classmethod
1412 def extract_id(cls, url):
1413 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1414 if mobj is None:
69ea8ca4 1415 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1416 video_id = mobj.group(2)
1417 return video_id
1418
545cc85d 1419 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1420 chapters_list = try_get(
8bdd16b4 1421 data,
84213ea8
S
1422 lambda x: x['playerOverlays']
1423 ['playerOverlayRenderer']
1424 ['decoratedPlayerBarRenderer']
1425 ['decoratedPlayerBarRenderer']
1426 ['playerBar']
1427 ['chapteredPlayerBarRenderer']
1428 ['chapters'],
1429 list)
1430 if not chapters_list:
1431 return
1432
1433 def chapter_time(chapter):
1434 return float_or_none(
1435 try_get(
1436 chapter,
1437 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1438 int),
1439 scale=1000)
1440 chapters = []
1441 for next_num, chapter in enumerate(chapters_list, start=1):
1442 start_time = chapter_time(chapter)
1443 if start_time is None:
1444 continue
1445 end_time = (chapter_time(chapters_list[next_num])
1446 if next_num < len(chapters_list) else duration)
1447 if end_time is None:
1448 continue
1449 title = try_get(
1450 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1451 compat_str)
1452 chapters.append({
1453 'start_time': start_time,
1454 'end_time': end_time,
1455 'title': title,
1456 })
1457 return chapters
1458
545cc85d 1459 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1460 return self._parse_json(self._search_regex(
1461 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1462 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1463
c5e8d7af 1464 def _real_extract(self, url):
cf7e015f 1465 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1466 video_id = self._match_id(url)
1467 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1468 webpage_url = base_url + 'watch?v=' + video_id
1469 webpage = self._download_webpage(
1470 webpage_url + '&has_verified=1&bpctr=9999999999',
1471 video_id, fatal=False)
545cc85d 1472
1473 player_response = None
1474 if webpage:
1475 player_response = self._extract_yt_initial_variable(
1476 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1477 video_id, 'initial player response')
1478 if not player_response:
1479 player_response = self._call_api(
1480 'player', {'videoId': video_id}, video_id)
1481
1482 playability_status = player_response.get('playabilityStatus') or {}
1483 if playability_status.get('reason') == 'Sign in to confirm your age':
1484 pr = self._parse_json(try_get(compat_parse_qs(
1485 self._download_webpage(
1486 base_url + 'get_video_info', video_id,
1487 'Refetching age-gated info webpage',
1488 'unable to download video info webpage', query={
1489 'video_id': video_id,
7c60c33e 1490 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1491 }, fatal=False)),
1492 lambda x: x['player_response'][0],
1493 compat_str) or '{}', video_id)
1494 if pr:
1495 player_response = pr
1496
1497 trailer_video_id = try_get(
1498 playability_status,
1499 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1500 compat_str)
1501 if trailer_video_id:
1502 return self.url_result(
1503 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1504
545cc85d 1505 def get_text(x):
1506 if not x:
c2d125d9 1507 return
545cc85d 1508 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1509
545cc85d 1510 search_meta = (
1511 lambda x: self._html_search_meta(x, webpage, default=None)) \
1512 if webpage else lambda x: None
dbdaaa23 1513
545cc85d 1514 video_details = player_response.get('videoDetails') or {}
37357d21 1515 microformat = try_get(
545cc85d 1516 player_response,
1517 lambda x: x['microformat']['playerMicroformatRenderer'],
1518 dict) or {}
1519 video_title = video_details.get('title') \
1520 or get_text(microformat.get('title')) \
1521 or search_meta(['og:title', 'twitter:title', 'title'])
1522 video_description = video_details.get('shortDescription')
cf7e015f 1523
8fe10494 1524 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1525 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1526 multifeed_metadata_list = try_get(
1527 player_response,
1528 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1529 compat_str)
8fe10494
S
1530 if multifeed_metadata_list:
1531 entries = []
1532 feed_ids = []
1533 for feed in multifeed_metadata_list.split(','):
1534 # Unquote should take place before split on comma (,) since textual
1535 # fields may contain comma as well (see
067aa17e 1536 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1537 feed_data = compat_parse_qs(
1538 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1539
1540 def feed_entry(name):
545cc85d 1541 return try_get(
1542 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1543
1544 feed_id = feed_entry('id')
1545 if not feed_id:
1546 continue
1547 feed_title = feed_entry('title')
1548 title = video_title
1549 if feed_title:
1550 title += ' (%s)' % feed_title
8fe10494
S
1551 entries.append({
1552 '_type': 'url_transparent',
1553 'ie_key': 'Youtube',
1554 'url': smuggle_url(
545cc85d 1555 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1556 {'force_singlefeed': True}),
6b09401b 1557 'title': title,
8fe10494 1558 })
6b09401b 1559 feed_ids.append(feed_id)
8fe10494
S
1560 self.to_screen(
1561 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1562 % (', '.join(feed_ids), video_id))
545cc85d 1563 return self.playlist_result(
1564 entries, video_id, video_title, video_description)
8fe10494
S
1565 else:
1566 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1567
545cc85d 1568 formats = []
1569 itags = []
cc2db878 1570 itag_qualities = {}
545cc85d 1571 player_url = None
dca3ff4a 1572 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1573 streaming_data = player_response.get('streamingData') or {}
1574 streaming_formats = streaming_data.get('formats') or []
1575 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1576 for fmt in streaming_formats:
1577 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1578 continue
321bf820 1579
cc2db878 1580 itag = str_or_none(fmt.get('itag'))
1581 quality = fmt.get('quality')
1582 if itag and quality:
1583 itag_qualities[itag] = quality
1584 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1585 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1586 # number of fragment that would subsequently requested with (`&sq=N`)
1587 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1588 continue
1589
545cc85d 1590 fmt_url = fmt.get('url')
1591 if not fmt_url:
1592 sc = compat_parse_qs(fmt.get('signatureCipher'))
1593 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1594 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1595 if not (sc and fmt_url and encrypted_sig):
1596 continue
1597 if not player_url:
1598 if not webpage:
1599 continue
1600 player_url = self._search_regex(
1601 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1602 webpage, 'player URL', fatal=False)
1603 if not player_url:
201e9eaa 1604 continue
545cc85d 1605 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1606 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1607 fmt_url += '&' + sp + '=' + signature
1608
545cc85d 1609 if itag:
1610 itags.append(itag)
cc2db878 1611 tbr = float_or_none(
1612 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1613 dct = {
1614 'asr': int_or_none(fmt.get('audioSampleRate')),
1615 'filesize': int_or_none(fmt.get('contentLength')),
1616 'format_id': itag,
1617 'format_note': fmt.get('qualityLabel') or quality,
1618 'fps': int_or_none(fmt.get('fps')),
1619 'height': int_or_none(fmt.get('height')),
dca3ff4a 1620 'quality': q(quality),
cc2db878 1621 'tbr': tbr,
545cc85d 1622 'url': fmt_url,
1623 'width': fmt.get('width'),
1624 }
1625 mimetype = fmt.get('mimeType')
1626 if mimetype:
1627 mobj = re.match(
1628 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1629 if mobj:
1630 dct['ext'] = mimetype2ext(mobj.group(1))
1631 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1632 no_audio = dct.get('acodec') == 'none'
1633 no_video = dct.get('vcodec') == 'none'
1634 if no_audio:
1635 dct['vbr'] = tbr
1636 if no_video:
1637 dct['abr'] = tbr
1638 if no_audio or no_video:
545cc85d 1639 dct['downloader_options'] = {
1640 # Youtube throttles chunks >~10M
1641 'http_chunk_size': 10485760,
bf1317d2 1642 }
7c60c33e 1643 if dct.get('ext'):
1644 dct['container'] = dct['ext'] + '_dash'
545cc85d 1645 formats.append(dct)
1646
1647 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1648 if hls_manifest_url:
1649 for f in self._extract_m3u8_formats(
1650 hls_manifest_url, video_id, 'mp4', fatal=False):
1651 itag = self._search_regex(
1652 r'/itag/(\d+)', f['url'], 'itag', default=None)
1653 if itag:
1654 f['format_id'] = itag
1655 formats.append(f)
1656
1657 if self._downloader.params.get('youtube_include_dash_manifest'):
1658 dash_manifest_url = streaming_data.get('dashManifestUrl')
1659 if dash_manifest_url:
545cc85d 1660 for f in self._extract_mpd_formats(
1661 dash_manifest_url, video_id, fatal=False):
cc2db878 1662 itag = f['format_id']
1663 if itag in itags:
1664 continue
dca3ff4a 1665 if itag in itag_qualities:
1666 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1667 # but kept to maintain feature parity (and code similarity) with youtube-dl
1668 # Remove if this causes any issues with sorting in future
1669 f['quality'] = q(itag_qualities[itag])
545cc85d 1670 filesize = int_or_none(self._search_regex(
1671 r'/clen/(\d+)', f.get('fragment_base_url')
1672 or f['url'], 'file size', default=None))
1673 if filesize:
1674 f['filesize'] = filesize
cc2db878 1675 formats.append(f)
bf1317d2 1676
545cc85d 1677 if not formats:
63ad4d43 1678 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1679 raise ExtractorError(
1680 'This video is DRM protected.', expected=True)
1681 pemr = try_get(
1682 playability_status,
1683 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1684 dict) or {}
1685 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1686 subreason = pemr.get('subreason')
1687 if subreason:
1688 subreason = clean_html(get_text(subreason))
1689 if subreason == 'The uploader has not made this video available in your country.':
1690 countries = microformat.get('availableCountries')
1691 if not countries:
1692 regions_allowed = search_meta('regionsAllowed')
1693 countries = regions_allowed.split(',') if regions_allowed else None
1694 self.raise_geo_restricted(
1695 subreason, countries)
1696 reason += '\n' + subreason
1697 if reason:
1698 raise ExtractorError(reason, expected=True)
bf1317d2 1699
545cc85d 1700 self._sort_formats(formats)
bf1317d2 1701
545cc85d 1702 keywords = video_details.get('keywords') or []
1703 if not keywords and webpage:
1704 keywords = [
1705 unescapeHTML(m.group('content'))
1706 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1707 for keyword in keywords:
1708 if keyword.startswith('yt:stretch='):
1709 w, h = keyword.split('=')[1].split(':')
1710 w, h = int(w), int(h)
1711 if w > 0 and h > 0:
1712 ratio = w / h
1713 for f in formats:
1714 if f.get('vcodec') != 'none':
1715 f['stretched_ratio'] = ratio
6449cd80 1716
545cc85d 1717 thumbnails = []
1718 for container in (video_details, microformat):
1719 for thumbnail in (try_get(
1720 container,
1721 lambda x: x['thumbnail']['thumbnails'], list) or []):
1722 thumbnail_url = thumbnail.get('url')
1723 if not thumbnail_url:
bf1317d2 1724 continue
545cc85d 1725 thumbnails.append({
1726 'height': int_or_none(thumbnail.get('height')),
1727 'url': thumbnail_url,
1728 'width': int_or_none(thumbnail.get('width')),
1729 })
1730 if thumbnails:
1731 break
a6211d23 1732 else:
545cc85d 1733 thumbnail = search_meta(['og:image', 'twitter:image'])
1734 if thumbnail:
1735 thumbnails = [{'url': thumbnail}]
1736
1737 category = microformat.get('category') or search_meta('genre')
1738 channel_id = video_details.get('channelId') \
1739 or microformat.get('externalChannelId') \
1740 or search_meta('channelId')
1741 duration = int_or_none(
1742 video_details.get('lengthSeconds')
1743 or microformat.get('lengthSeconds')) \
1744 or parse_duration(search_meta('duration'))
1745 is_live = video_details.get('isLive')
1746 owner_profile_url = microformat.get('ownerProfileUrl')
1747
1748 info = {
1749 'id': video_id,
1750 'title': self._live_title(video_title) if is_live else video_title,
1751 'formats': formats,
1752 'thumbnails': thumbnails,
1753 'description': video_description,
1754 'upload_date': unified_strdate(
1755 microformat.get('uploadDate')
1756 or search_meta('uploadDate')),
1757 'uploader': video_details['author'],
1758 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1759 'uploader_url': owner_profile_url,
1760 'channel_id': channel_id,
1761 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1762 'duration': duration,
1763 'view_count': int_or_none(
1764 video_details.get('viewCount')
1765 or microformat.get('viewCount')
1766 or search_meta('interactionCount')),
1767 'average_rating': float_or_none(video_details.get('averageRating')),
1768 'age_limit': 18 if (
1769 microformat.get('isFamilySafe') is False
1770 or search_meta('isFamilyFriendly') == 'false'
1771 or search_meta('og:restrictions:age') == '18+') else 0,
1772 'webpage_url': webpage_url,
1773 'categories': [category] if category else None,
1774 'tags': keywords,
1775 'is_live': is_live,
1776 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 1777 'was_live': video_details.get('isLiveContent')
545cc85d 1778 }
b477fc13 1779
545cc85d 1780 pctr = try_get(
1781 player_response,
1782 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1783 subtitles = {}
1784 if pctr:
1785 def process_language(container, base_url, lang_code, query):
1786 lang_subs = []
1787 for fmt in self._SUBTITLE_FORMATS:
1788 query.update({
1789 'fmt': fmt,
1790 })
1791 lang_subs.append({
1792 'ext': fmt,
1793 'url': update_url_query(base_url, query),
1794 })
1795 container[lang_code] = lang_subs
7e72694b 1796
545cc85d 1797 for caption_track in (pctr.get('captionTracks') or []):
1798 base_url = caption_track.get('baseUrl')
1799 if not base_url:
1800 continue
1801 if caption_track.get('kind') != 'asr':
1802 lang_code = caption_track.get('languageCode')
1803 if not lang_code:
1804 continue
1805 process_language(
1806 subtitles, base_url, lang_code, {})
1807 continue
1808 automatic_captions = {}
1809 for translation_language in (pctr.get('translationLanguages') or []):
1810 translation_language_code = translation_language.get('languageCode')
1811 if not translation_language_code:
1812 continue
1813 process_language(
1814 automatic_captions, base_url, translation_language_code,
1815 {'tlang': translation_language_code})
1816 info['automatic_captions'] = automatic_captions
1817 info['subtitles'] = subtitles
7e72694b 1818
545cc85d 1819 parsed_url = compat_urllib_parse_urlparse(url)
1820 for component in [parsed_url.fragment, parsed_url.query]:
1821 query = compat_parse_qs(component)
1822 for k, v in query.items():
1823 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1824 d_k += '_time'
1825 if d_k not in info and k in s_ks:
1826 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1827
1828 # Youtube Music Auto-generated description
822b9d9c 1829 if video_description:
38d70284 1830 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1831 if mobj:
822b9d9c
RA
1832 release_year = mobj.group('release_year')
1833 release_date = mobj.group('release_date')
1834 if release_date:
1835 release_date = release_date.replace('-', '')
1836 if not release_year:
545cc85d 1837 release_year = release_date[:4]
1838 info.update({
1839 'album': mobj.group('album'.strip()),
1840 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1841 'track': mobj.group('track').strip(),
1842 'release_date': release_date,
cc2db878 1843 'release_year': int_or_none(release_year),
545cc85d 1844 })
7e72694b 1845
545cc85d 1846 initial_data = None
1847 if webpage:
1848 initial_data = self._extract_yt_initial_variable(
1849 webpage, self._YT_INITIAL_DATA_RE, video_id,
1850 'yt initial data')
1851 if not initial_data:
1852 initial_data = self._call_api(
1853 'next', {'videoId': video_id}, video_id, fatal=False)
1854
1855 if not is_live:
1856 try:
1857 # This will error if there is no livechat
1858 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1859 info['subtitles']['live_chat'] = [{
1860 'video_id': video_id,
1861 'ext': 'json',
1862 'protocol': 'youtube_live_chat_replay',
1863 }]
1864 except (KeyError, IndexError, TypeError):
1865 pass
1866
1867 if initial_data:
1868 chapters = self._extract_chapters_from_json(
1869 initial_data, video_id, duration)
1870 if not chapters:
1871 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1872 contents = try_get(
1873 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1874 list)
1875 if not contents:
1876 continue
1877
1878 def chapter_time(mmlir):
1879 return parse_duration(
1880 get_text(mmlir.get('timeDescription')))
1881
1882 chapters = []
1883 for next_num, content in enumerate(contents, start=1):
1884 mmlir = content.get('macroMarkersListItemRenderer') or {}
1885 start_time = chapter_time(mmlir)
1886 end_time = chapter_time(try_get(
1887 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1888 if next_num < len(contents) else duration
1889 if start_time is None or end_time is None:
1890 continue
1891 chapters.append({
1892 'start_time': start_time,
1893 'end_time': end_time,
1894 'title': get_text(mmlir.get('title')),
1895 })
1896 if chapters:
1897 break
1898 if chapters:
1899 info['chapters'] = chapters
1900
1901 contents = try_get(
1902 initial_data,
1903 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1904 list) or []
1905 for content in contents:
1906 vpir = content.get('videoPrimaryInfoRenderer')
1907 if vpir:
1908 stl = vpir.get('superTitleLink')
1909 if stl:
1910 stl = get_text(stl)
1911 if try_get(
1912 vpir,
1913 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1914 info['location'] = stl
1915 else:
1916 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1917 if mobj:
1918 info.update({
1919 'series': mobj.group(1),
1920 'season_number': int(mobj.group(2)),
1921 'episode_number': int(mobj.group(3)),
1922 })
1923 for tlb in (try_get(
1924 vpir,
1925 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1926 list) or []):
1927 tbr = tlb.get('toggleButtonRenderer') or {}
1928 for getter, regex in [(
1929 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1930 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1931 lambda x: x['accessibility'],
1932 lambda x: x['accessibilityData']['accessibilityData'],
1933 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1934 label = (try_get(tbr, getter, dict) or {}).get('label')
1935 if label:
1936 mobj = re.match(regex, label)
1937 if mobj:
1938 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1939 break
1940 sbr_tooltip = try_get(
1941 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1942 if sbr_tooltip:
1943 like_count, dislike_count = sbr_tooltip.split(' / ')
1944 info.update({
1945 'like_count': str_to_int(like_count),
1946 'dislike_count': str_to_int(dislike_count),
1947 })
1948 vsir = content.get('videoSecondaryInfoRenderer')
1949 if vsir:
1950 info['channel'] = get_text(try_get(
1951 vsir,
1952 lambda x: x['owner']['videoOwnerRenderer']['title'],
1953 compat_str))
1954 rows = try_get(
1955 vsir,
1956 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1957 list) or []
1958 multiple_songs = False
1959 for row in rows:
1960 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1961 multiple_songs = True
1962 break
1963 for row in rows:
1964 mrr = row.get('metadataRowRenderer') or {}
1965 mrr_title = mrr.get('title')
1966 if not mrr_title:
1967 continue
1968 mrr_title = get_text(mrr['title'])
1969 mrr_contents_text = get_text(mrr['contents'][0])
1970 if mrr_title == 'License':
1971 info['license'] = mrr_contents_text
1972 elif not multiple_songs:
1973 if mrr_title == 'Album':
1974 info['album'] = mrr_contents_text
1975 elif mrr_title == 'Artist':
1976 info['artist'] = mrr_contents_text
1977 elif mrr_title == 'Song':
1978 info['track'] = mrr_contents_text
1979
1980 fallbacks = {
1981 'channel': 'uploader',
1982 'channel_id': 'uploader_id',
1983 'channel_url': 'uploader_url',
1984 }
1985 for to, frm in fallbacks.items():
1986 if not info.get(to):
1987 info[to] = info.get(frm)
1988
1989 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1990 v = info.get(s_k)
1991 if v:
1992 info[d_k] = v
b84071c0 1993
06167fbb 1994 # get xsrf for annotations or comments
1995 get_annotations = self._downloader.params.get('writeannotations', False)
1996 get_comments = self._downloader.params.get('getcomments', False)
1997 if get_annotations or get_comments:
29f7c58a 1998 xsrf_token = None
545cc85d 1999 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2000 if ytcfg:
2001 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2002 if not xsrf_token:
2003 xsrf_token = self._search_regex(
2004 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2005 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2006
2007 # annotations
06167fbb 2008 if get_annotations:
64b6a4e9
RA
2009 invideo_url = try_get(
2010 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2011 if xsrf_token and invideo_url:
29f7c58a 2012 xsrf_field_name = None
2013 if ytcfg:
2014 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2015 if not xsrf_field_name:
2016 xsrf_field_name = self._search_regex(
2017 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2018 webpage, 'xsrf field name',
29f7c58a 2019 group='xsrf_field_name', default='session_token')
8a784c74 2020 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2021 self._proto_relative_url(invideo_url),
2022 video_id, note='Downloading annotations',
2023 errnote='Unable to download video annotations', fatal=False,
2024 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2025
06167fbb 2026 # Get comments
2027 # TODO: Refactor and move to seperate function
277d6ff5 2028 def extract_comments():
06167fbb 2029 expected_video_comment_count = 0
2030 video_comments = []
277d6ff5 2031 comment_xsrf = xsrf_token
06167fbb 2032
2033 def find_value(html, key, num_chars=2, separator='"'):
2034 pos_begin = html.find(key) + len(key) + num_chars
2035 pos_end = html.find(separator, pos_begin)
2036 return html[pos_begin: pos_end]
2037
2038 def search_dict(partial, key):
2039 if isinstance(partial, dict):
2040 for k, v in partial.items():
2041 if k == key:
2042 yield v
2043 else:
2044 for o in search_dict(v, key):
2045 yield o
2046 elif isinstance(partial, list):
2047 for i in partial:
2048 for o in search_dict(i, key):
2049 yield o
2050
8a784c74 2051 continuations = []
2052 if initial_data:
2053 try:
2054 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2055 continuations = [ncd['continuation']]
2056 # Handle videos where comments have been disabled entirely
2057 except StopIteration:
2058 pass
06167fbb 2059
8d0ea5f9 2060 def get_continuation(continuation, session_token, replies=False):
06167fbb 2061 query = {
66c935fb 2062 'pbj': 1,
2063 'ctoken': continuation,
06167fbb 2064 }
2065 if replies:
2066 query['action_get_comment_replies'] = 1
2067 else:
2068 query['action_get_comments'] = 1
2069
2070 while True:
2071 content, handle = self._download_webpage_handle(
2072 'https://www.youtube.com/comment_service_ajax',
2073 video_id,
2074 note=False,
2075 expected_status=[413],
2076 data=urlencode_postdata({
2077 'session_token': session_token
2078 }),
2079 query=query,
2080 headers={
2081 'Accept': '*/*',
2082 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2083 'X-YouTube-Client-Name': '1',
2084 'X-YouTube-Client-Version': '2.20201202.06.01'
2085 }
2086 )
2087
2088 response_code = handle.getcode()
2089 if (response_code == 200):
2090 return self._parse_json(content, video_id)
8d0ea5f9 2091 if (response_code == 413):
06167fbb 2092 return None
2093 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2094
2095 first_continuation = True
885d36d4 2096 chain_msg = ''
2097 self.to_screen('Downloading comments')
06167fbb 2098 while continuations:
885d36d4 2099 continuation = continuations.pop()
277d6ff5 2100 comment_response = get_continuation(continuation, comment_xsrf)
06167fbb 2101 if not comment_response:
2102 continue
2103 if list(search_dict(comment_response, 'externalErrorMessage')):
2104 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2105
8d0ea5f9
B
2106 if 'continuationContents' not in comment_response['response']:
2107 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2108 continue
2109 # not sure if this actually helps
2110 if 'xsrf_token' in comment_response:
277d6ff5 2111 comment_xsrf = comment_response['xsrf_token']
8d0ea5f9 2112
06167fbb 2113 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2114 if first_continuation:
2115 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2116 first_continuation = False
2117 if 'contents' not in item_section:
2118 # continuation returned no comments?
2119 # set an empty array as to not break the for loop
2120 item_section['contents'] = []
2121
2122 for meta_comment in item_section['contents']:
2123 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2124 video_comments.append({
2125 'id': comment['commentId'],
ba7bf12d 2126 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2127 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2128 'author': comment.get('authorText', {}).get('simpleText', ''),
2129 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2130 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2131 'parent': 'root'
2132 })
2133 if 'replies' not in meta_comment['commentThreadRenderer']:
2134 continue
2135
8d0ea5f9
B
2136 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2137 while reply_continuations:
06167fbb 2138 time.sleep(1)
8d0ea5f9 2139 continuation = reply_continuations.pop()
277d6ff5 2140 replies_data = get_continuation(continuation, comment_xsrf, True)
06167fbb 2141 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2142 continue
06167fbb 2143
2144 if self._downloader.params.get('verbose', False):
885d36d4 2145 chain_msg = ' (chain %s)' % comment['commentId']
2146 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2147 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2148 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2149 reply_comment = reply_meta['commentRenderer']
2150 video_comments.append({
2151 'id': reply_comment['commentId'],
2152 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2153 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2154 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2155 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2156 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2157 'parent': comment['commentId']
2158 })
2159 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2160 continue
8d0ea5f9 2161 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2162
885d36d4 2163 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2164 if 'continuations' in item_section:
8d0ea5f9 2165 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2166 time.sleep(1)
2167
885d36d4 2168 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
277d6ff5 2169 return {
545cc85d 2170 'comments': video_comments,
2171 'comment_count': expected_video_comment_count
277d6ff5 2172 }
2173
2174 if get_comments:
2175 info['__post_extractor'] = extract_comments
4ea3be0a 2176
545cc85d 2177 self.mark_watched(video_id, player_response)
d77ab8e2 2178
545cc85d 2179 return info
c5e8d7af 2180
5f6a1245 2181
8bdd16b4 2182class YoutubeTabIE(YoutubeBaseInfoExtractor):
2183 IE_DESC = 'YouTube.com tab'
70d5c17b 2184 _VALID_URL = r'''(?x)
2185 https?://
2186 (?:\w+\.)?
2187 (?:
2188 youtube(?:kids)?\.com|
2189 invidio\.us
2190 )/
2191 (?:
2192 (?:channel|c|user)/|
2193 (?P<not_channel>
9ba5705a 2194 feed/|hashtag/|
70d5c17b 2195 (?:playlist|watch)\?.*?\blist=
2196 )|
29f7c58a 2197 (?!(?:%s)\b) # Direct URLs
70d5c17b 2198 )
2199 (?P<id>[^/?\#&]+)
2200 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2201 IE_NAME = 'youtube:tab'
2202
81127aa5 2203 _TESTS = [{
8bdd16b4 2204 # playlists, multipage
2205 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2206 'playlist_mincount': 94,
2207 'info_dict': {
2208 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2209 'title': 'Игорь Клейнер - Playlists',
2210 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2211 'uploader': 'Игорь Клейнер',
2212 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2213 },
2214 }, {
2215 # playlists, multipage, different order
2216 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2217 'playlist_mincount': 94,
2218 'info_dict': {
2219 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2220 'title': 'Игорь Клейнер - Playlists',
2221 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2222 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2223 'uploader': 'Игорь Клейнер',
8bdd16b4 2224 },
2225 }, {
2226 # playlists, singlepage
2227 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2228 'playlist_mincount': 4,
2229 'info_dict': {
2230 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2231 'title': 'ThirstForScience - Playlists',
2232 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2233 'uploader': 'ThirstForScience',
2234 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2235 }
2236 }, {
2237 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2238 'only_matching': True,
2239 }, {
2240 # basic, single video playlist
0e30a7b9 2241 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2242 'info_dict': {
0e30a7b9 2243 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2244 'uploader': 'Sergey M.',
2245 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2246 'title': 'youtube-dl public playlist',
81127aa5 2247 },
0e30a7b9 2248 'playlist_count': 1,
9291475f 2249 }, {
8bdd16b4 2250 # empty playlist
0e30a7b9 2251 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2252 'info_dict': {
0e30a7b9 2253 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2254 'uploader': 'Sergey M.',
2255 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2256 'title': 'youtube-dl empty playlist',
9291475f
PH
2257 },
2258 'playlist_count': 0,
2259 }, {
8bdd16b4 2260 # Home tab
2261 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2262 'info_dict': {
8bdd16b4 2263 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2264 'title': 'lex will - Home',
2265 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2266 'uploader': 'lex will',
2267 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2268 },
8bdd16b4 2269 'playlist_mincount': 2,
9291475f 2270 }, {
8bdd16b4 2271 # Videos tab
2272 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2273 'info_dict': {
8bdd16b4 2274 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2275 'title': 'lex will - Videos',
2276 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2277 'uploader': 'lex will',
2278 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2279 },
8bdd16b4 2280 'playlist_mincount': 975,
9291475f 2281 }, {
8bdd16b4 2282 # Videos tab, sorted by popular
2283 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2284 'info_dict': {
8bdd16b4 2285 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2286 'title': 'lex will - Videos',
2287 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2288 'uploader': 'lex will',
2289 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2290 },
8bdd16b4 2291 'playlist_mincount': 199,
9291475f 2292 }, {
8bdd16b4 2293 # Playlists tab
2294 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2295 'info_dict': {
8bdd16b4 2296 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2297 'title': 'lex will - Playlists',
2298 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2299 'uploader': 'lex will',
2300 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2301 },
8bdd16b4 2302 'playlist_mincount': 17,
ac7553d0 2303 }, {
8bdd16b4 2304 # Community tab
2305 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2306 'info_dict': {
8bdd16b4 2307 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2308 'title': 'lex will - Community',
2309 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2310 'uploader': 'lex will',
2311 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2312 },
2313 'playlist_mincount': 18,
87dadd45 2314 }, {
8bdd16b4 2315 # Channels tab
2316 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2317 'info_dict': {
8bdd16b4 2318 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2319 'title': 'lex will - Channels',
2320 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2321 'uploader': 'lex will',
2322 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2323 },
deaec5af 2324 'playlist_mincount': 12,
6b08cdf6 2325 }, {
a0566bbf 2326 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2327 'only_matching': True,
2328 }, {
a0566bbf 2329 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2330 'only_matching': True,
2331 }, {
a0566bbf 2332 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2333 'only_matching': True,
2334 }, {
2335 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2336 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2337 'info_dict': {
2338 'title': '29C3: Not my department',
2339 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2340 'uploader': 'Christiaan008',
2341 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2342 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2343 },
2344 'playlist_count': 96,
2345 }, {
2346 'note': 'Large playlist',
2347 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2348 'info_dict': {
8bdd16b4 2349 'title': 'Uploads from Cauchemar',
2350 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2351 'uploader': 'Cauchemar',
2352 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2353 },
8bdd16b4 2354 'playlist_mincount': 1123,
2355 }, {
2356 # even larger playlist, 8832 videos
2357 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2358 'only_matching': True,
4b7df0d3
JMF
2359 }, {
2360 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2361 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2362 'info_dict': {
acf757f4
PH
2363 'title': 'Uploads from Interstellar Movie',
2364 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2365 'uploader': 'Interstellar Movie',
8bdd16b4 2366 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2367 },
481cc733 2368 'playlist_mincount': 21,
8bdd16b4 2369 }, {
2370 # https://github.com/ytdl-org/youtube-dl/issues/21844
2371 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2372 'info_dict': {
2373 'title': 'Data Analysis with Dr Mike Pound',
2374 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2375 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2376 'uploader': 'Computerphile',
deaec5af 2377 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2378 },
2379 'playlist_mincount': 11,
2380 }, {
a0566bbf 2381 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2382 'only_matching': True,
dacb3a86
S
2383 }, {
2384 # Playlist URL that does not actually serve a playlist
2385 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2386 'info_dict': {
2387 'id': 'FqZTN594JQw',
2388 'ext': 'webm',
2389 'title': "Smiley's People 01 detective, Adventure Series, Action",
2390 'uploader': 'STREEM',
2391 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2392 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2393 'upload_date': '20150526',
2394 'license': 'Standard YouTube License',
2395 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2396 'categories': ['People & Blogs'],
2397 'tags': list,
dbdaaa23 2398 'view_count': int,
dacb3a86
S
2399 'like_count': int,
2400 'dislike_count': int,
2401 },
2402 'params': {
2403 'skip_download': True,
2404 },
13a75688 2405 'skip': 'This video is not available.',
dacb3a86 2406 'add_ie': [YoutubeIE.ie_key()],
481cc733 2407 }, {
8bdd16b4 2408 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2409 'only_matching': True,
66b48727 2410 }, {
8bdd16b4 2411 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2412 'only_matching': True,
a0566bbf 2413 }, {
2414 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2415 'info_dict': {
2416 'id': '9Auq9mYxFEE',
2417 'ext': 'mp4',
deaec5af 2418 'title': compat_str,
a0566bbf 2419 'uploader': 'Sky News',
2420 'uploader_id': 'skynews',
2421 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2422 'upload_date': '20191102',
deaec5af 2423 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2424 'categories': ['News & Politics'],
2425 'tags': list,
2426 'like_count': int,
2427 'dislike_count': int,
2428 },
2429 'params': {
2430 'skip_download': True,
2431 },
2432 }, {
2433 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2434 'info_dict': {
2435 'id': 'a48o2S1cPoo',
2436 'ext': 'mp4',
2437 'title': 'The Young Turks - Live Main Show',
2438 'uploader': 'The Young Turks',
2439 'uploader_id': 'TheYoungTurks',
2440 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2441 'upload_date': '20150715',
2442 'license': 'Standard YouTube License',
2443 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2444 'categories': ['News & Politics'],
2445 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2446 'like_count': int,
2447 'dislike_count': int,
2448 },
2449 'params': {
2450 'skip_download': True,
2451 },
2452 'only_matching': True,
2453 }, {
2454 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2455 'only_matching': True,
2456 }, {
2457 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2458 'only_matching': True,
3d3dddc9 2459 }, {
2460 'url': 'https://www.youtube.com/feed/trending',
2461 'only_matching': True,
2462 }, {
2463 # needs auth
2464 'url': 'https://www.youtube.com/feed/library',
2465 'only_matching': True,
2466 }, {
2467 # needs auth
2468 'url': 'https://www.youtube.com/feed/history',
2469 'only_matching': True,
2470 }, {
2471 # needs auth
2472 'url': 'https://www.youtube.com/feed/subscriptions',
2473 'only_matching': True,
2474 }, {
2475 # needs auth
2476 'url': 'https://www.youtube.com/feed/watch_later',
2477 'only_matching': True,
2478 }, {
2479 # no longer available?
2480 'url': 'https://www.youtube.com/feed/recommended',
2481 'only_matching': True,
29f7c58a 2482 }, {
2483 # inline playlist with not always working continuations
2484 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2485 'only_matching': True,
2486 }, {
2487 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2488 'only_matching': True,
2489 }, {
2490 'url': 'https://www.youtube.com/course',
2491 'only_matching': True,
2492 }, {
2493 'url': 'https://www.youtube.com/zsecurity',
2494 'only_matching': True,
2495 }, {
2496 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2497 'only_matching': True,
2498 }, {
2499 'url': 'https://www.youtube.com/TheYoungTurks/live',
2500 'only_matching': True,
2501 }]
2502
2503 @classmethod
2504 def suitable(cls, url):
2505 return False if YoutubeIE.suitable(url) else super(
2506 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2507
2508 def _extract_channel_id(self, webpage):
2509 channel_id = self._html_search_meta(
2510 'channelId', webpage, 'channel id', default=None)
2511 if channel_id:
2512 return channel_id
2513 channel_url = self._html_search_meta(
2514 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2515 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2516 'twitter:app:url:googleplay'), webpage, 'channel url')
2517 return self._search_regex(
2518 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2519 channel_url, 'channel id')
15f6397c 2520
8bdd16b4 2521 @staticmethod
cd7c66cf 2522 def _extract_basic_item_renderer(item):
2523 # Modified from _extract_grid_item_renderer
2524 known_renderers = (
2525 'playlistRenderer', 'videoRenderer', 'channelRenderer'
2526 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2527 )
2528 for key, renderer in item.items():
2529 if key not in known_renderers:
2530 continue
2531 return renderer
8bdd16b4 2532
8bdd16b4 2533 def _grid_entries(self, grid_renderer):
2534 for item in grid_renderer['items']:
2535 if not isinstance(item, dict):
39b62db1 2536 continue
cd7c66cf 2537 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2538 if not isinstance(renderer, dict):
2539 continue
2540 title = try_get(
2541 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2542 # playlist
2543 playlist_id = renderer.get('playlistId')
2544 if playlist_id:
2545 yield self.url_result(
2546 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2547 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2548 video_title=title)
2549 # video
2550 video_id = renderer.get('videoId')
2551 if video_id:
2552 yield self._extract_video(renderer)
2553 # channel
2554 channel_id = renderer.get('channelId')
2555 if channel_id:
2556 title = try_get(
2557 renderer, lambda x: x['title']['simpleText'], compat_str)
2558 yield self.url_result(
2559 'https://www.youtube.com/channel/%s' % channel_id,
2560 ie=YoutubeTabIE.ie_key(), video_title=title)
2561
3d3dddc9 2562 def _shelf_entries_from_content(self, shelf_renderer):
2563 content = shelf_renderer.get('content')
2564 if not isinstance(content, dict):
8bdd16b4 2565 return
cd7c66cf 2566 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2567 if renderer:
2568 # TODO: add support for nested playlists so each shelf is processed
2569 # as separate playlist
2570 # TODO: this includes only first N items
2571 for entry in self._grid_entries(renderer):
2572 yield entry
2573 renderer = content.get('horizontalListRenderer')
2574 if renderer:
2575 # TODO
2576 pass
8bdd16b4 2577
29f7c58a 2578 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2579 ep = try_get(
2580 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2581 compat_str)
2582 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2583 if shelf_url:
29f7c58a 2584 # Skipping links to another channels, note that checking for
2585 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2586 # will not work
2587 if skip_channels and '/channels?' in shelf_url:
2588 return
3d3dddc9 2589 title = try_get(
2590 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2591 yield self.url_result(shelf_url, video_title=title)
2592 # Shelf may not contain shelf URL, fallback to extraction from content
2593 for entry in self._shelf_entries_from_content(shelf_renderer):
2594 yield entry
c5e8d7af 2595
8bdd16b4 2596 def _playlist_entries(self, video_list_renderer):
2597 for content in video_list_renderer['contents']:
2598 if not isinstance(content, dict):
2599 continue
2600 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2601 if not isinstance(renderer, dict):
2602 continue
2603 video_id = renderer.get('videoId')
2604 if not video_id:
2605 continue
2606 yield self._extract_video(renderer)
07aeced6 2607
3462ffa8 2608 def _rich_entries(self, rich_grid_renderer):
2609 renderer = try_get(
70d5c17b 2610 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2611 video_id = renderer.get('videoId')
2612 if not video_id:
2613 return
2614 yield self._extract_video(renderer)
2615
8bdd16b4 2616 def _video_entry(self, video_renderer):
2617 video_id = video_renderer.get('videoId')
2618 if video_id:
2619 return self._extract_video(video_renderer)
dacb3a86 2620
8bdd16b4 2621 def _post_thread_entries(self, post_thread_renderer):
2622 post_renderer = try_get(
2623 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2624 if not post_renderer:
2625 return
2626 # video attachment
2627 video_renderer = try_get(
2628 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2629 video_id = None
2630 if video_renderer:
2631 entry = self._video_entry(video_renderer)
2632 if entry:
2633 yield entry
2634 # inline video links
2635 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2636 for run in runs:
2637 if not isinstance(run, dict):
2638 continue
2639 ep_url = try_get(
2640 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2641 if not ep_url:
2642 continue
2643 if not YoutubeIE.suitable(ep_url):
2644 continue
2645 ep_video_id = YoutubeIE._match_id(ep_url)
2646 if video_id == ep_video_id:
2647 continue
2648 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2649
8bdd16b4 2650 def _post_thread_continuation_entries(self, post_thread_continuation):
2651 contents = post_thread_continuation.get('contents')
2652 if not isinstance(contents, list):
2653 return
2654 for content in contents:
2655 renderer = content.get('backstagePostThreadRenderer')
2656 if not isinstance(renderer, dict):
2657 continue
2658 for entry in self._post_thread_entries(renderer):
2659 yield entry
07aeced6 2660
29f7c58a 2661 @staticmethod
2662 def _build_continuation_query(continuation, ctp=None):
2663 query = {
2664 'ctoken': continuation,
2665 'continuation': continuation,
2666 }
2667 if ctp:
2668 query['itct'] = ctp
2669 return query
2670
8bdd16b4 2671 @staticmethod
2672 def _extract_next_continuation_data(renderer):
2673 next_continuation = try_get(
2674 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2675 if not next_continuation:
2676 return
2677 continuation = next_continuation.get('continuation')
2678 if not continuation:
2679 return
2680 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2681 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2682
8bdd16b4 2683 @classmethod
2684 def _extract_continuation(cls, renderer):
2685 next_continuation = cls._extract_next_continuation_data(renderer)
2686 if next_continuation:
2687 return next_continuation
cc2db878 2688 contents = []
2689 for key in ('contents', 'items'):
2690 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2691 for content in contents:
2692 if not isinstance(content, dict):
2693 continue
2694 continuation_ep = try_get(
2695 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2696 dict)
2697 if not continuation_ep:
2698 continue
2699 continuation = try_get(
2700 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2701 if not continuation:
2702 continue
2703 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2704 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2705
d069eca7 2706 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2707
70d5c17b 2708 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2709 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2710 for content in contents:
2711 if not isinstance(content, dict):
8bdd16b4 2712 continue
70d5c17b 2713 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2714 if not is_renderer:
70d5c17b 2715 renderer = content.get('richItemRenderer')
3462ffa8 2716 if renderer:
2717 for entry in self._rich_entries(renderer):
2718 yield entry
2719 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2720 continue
3462ffa8 2721 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2722 for isr_content in isr_contents:
2723 if not isinstance(isr_content, dict):
2724 continue
69184e41 2725
2726 known_renderers = {
2727 'playlistVideoListRenderer': self._playlist_entries,
2728 'gridRenderer': self._grid_entries,
2729 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2730 'backstagePostThreadRenderer': self._post_thread_entries,
2731 'videoRenderer': lambda x: [self._video_entry(x)],
2732 }
2733 for key, renderer in isr_content.items():
2734 if key not in known_renderers:
2735 continue
2736 for entry in known_renderers[key](renderer):
2737 if entry:
2738 yield entry
3462ffa8 2739 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2740 break
70d5c17b 2741
3462ffa8 2742 if not continuation_list[0]:
2743 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2744
2745 if not continuation_list[0]:
2746 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2747
2748 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2749 tab_content = try_get(tab, lambda x: x['content'], dict)
2750 if not tab_content:
2751 return
3462ffa8 2752 parent_renderer = (
29f7c58a 2753 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2754 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2755 for entry in extract_entries(parent_renderer):
2756 yield entry
3462ffa8 2757 continuation = continuation_list[0]
8bdd16b4 2758
2759 headers = {
2760 'x-youtube-client-name': '1',
2761 'x-youtube-client-version': '2.20201112.04.01',
2762 }
2763 if identity_token:
2764 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2765
d069eca7
M
2766 if account_syncid:
2767 headers['X-Goog-PageId'] = account_syncid
2768 headers['X-Goog-AuthUser'] = 0
2769
8bdd16b4 2770 for page_num in itertools.count(1):
2771 if not continuation:
2772 break
62bff2c1 2773 retries = self._downloader.params.get('extractor_retries', 3)
2774 count = -1
2775 last_error = None
2776 while count < retries:
2777 count += 1
2778 if last_error:
2779 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2780 try:
a5c56234
M
2781 response = self._call_api(
2782 ep="browse", fatal=True, headers=headers,
2783 video_id='%s page %s' % (item_id, page_num),
2784 query={
2785 'continuation': continuation['continuation'],
2786 'clickTracking': {'clickTrackingParams': continuation['itct']},
2787 },
2788 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2789 except ExtractorError as e:
62bff2c1 2790 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2791 # Downloading page may result in intermittent 5xx HTTP error
2792 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2793 last_error = 'HTTP Error %s' % e.cause.code
2794 if count < retries:
29f7c58a 2795 continue
2796 raise
62bff2c1 2797 else:
62bff2c1 2798 # Youtube sometimes sends incomplete data
2799 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2800 if response.get('continuationContents') or response.get('onResponseReceivedActions'):
2801 break
f3eaa8dd
M
2802
2803 # Youtube may send alerts if there was an issue with the continuation page
2804 self._extract_alerts(response, expected=False)
2805
2806 last_error = 'Incomplete data received'
c705177d 2807 if count >= retries:
2808 self._downloader.report_error(last_error)
a5c56234
M
2809
2810 if not response:
8bdd16b4 2811 break
ebf1b291 2812
69184e41 2813 known_continuation_renderers = {
2814 'playlistVideoListContinuation': self._playlist_entries,
2815 'gridContinuation': self._grid_entries,
2816 'itemSectionContinuation': self._post_thread_continuation_entries,
2817 'sectionListContinuation': extract_entries, # for feeds
2818 }
8bdd16b4 2819 continuation_contents = try_get(
69184e41 2820 response, lambda x: x['continuationContents'], dict) or {}
2821 continuation_renderer = None
2822 for key, value in continuation_contents.items():
2823 if key not in known_continuation_renderers:
3462ffa8 2824 continue
69184e41 2825 continuation_renderer = value
2826 continuation_list = [None]
2827 for entry in known_continuation_renderers[key](continuation_renderer):
2828 yield entry
2829 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2830 break
2831 if continuation_renderer:
2832 continue
c5e8d7af 2833
a1b535bd 2834 known_renderers = {
2835 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2836 'gridVideoRenderer': (self._grid_entries, 'items'),
2837 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 2838 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 2839 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
a1b535bd 2840 }
8bdd16b4 2841 continuation_items = try_get(
2842 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2843 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2844 video_items_renderer = None
2845 for key, value in continuation_item.items():
2846 if key not in known_renderers:
8bdd16b4 2847 continue
a1b535bd 2848 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2849 continuation_list = [None]
a1b535bd 2850 for entry in known_renderers[key][0](video_items_renderer):
2851 yield entry
9ba5705a 2852 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2853 break
2854 if video_items_renderer:
2855 continue
8bdd16b4 2856 break
9558dcec 2857
8bdd16b4 2858 @staticmethod
2859 def _extract_selected_tab(tabs):
2860 for tab in tabs:
2861 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2862 return tab['tabRenderer']
2b3c2546 2863 else:
8bdd16b4 2864 raise ExtractorError('Unable to find selected tab')
b82f815f 2865
8bdd16b4 2866 @staticmethod
2867 def _extract_uploader(data):
2868 uploader = {}
2869 sidebar_renderer = try_get(
2870 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2871 if sidebar_renderer:
2872 for item in sidebar_renderer:
2873 if not isinstance(item, dict):
2874 continue
2875 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2876 if not isinstance(renderer, dict):
2877 continue
2878 owner = try_get(
2879 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2880 if owner:
2881 uploader['uploader'] = owner.get('text')
2882 uploader['uploader_id'] = try_get(
2883 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2884 uploader['uploader_url'] = urljoin(
2885 'https://www.youtube.com/',
2886 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2887 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2888
d069eca7 2889 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 2890 playlist_id = title = description = channel_url = channel_name = channel_id = None
2891 thumbnails_list = tags = []
2892
8bdd16b4 2893 selected_tab = self._extract_selected_tab(tabs)
2894 renderer = try_get(
2895 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2896 if renderer:
b60419c5 2897 channel_name = renderer.get('title')
2898 channel_url = renderer.get('channelUrl')
2899 channel_id = renderer.get('externalId')
64c0d954 2900
64c0d954 2901 if not renderer:
2902 renderer = try_get(
2903 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2904 if renderer:
2905 title = renderer.get('title')
ecc97af3 2906 description = renderer.get('description', '')
b60419c5 2907 playlist_id = channel_id
2908 tags = renderer.get('keywords', '').split()
2909 thumbnails_list = (
2910 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2911 or try_get(
2912 data,
2913 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2914 list)
b60419c5 2915 or [])
2916
2917 thumbnails = []
2918 for t in thumbnails_list:
2919 if not isinstance(t, dict):
2920 continue
2921 thumbnail_url = url_or_none(t.get('url'))
2922 if not thumbnail_url:
2923 continue
2924 thumbnails.append({
2925 'url': thumbnail_url,
2926 'width': int_or_none(t.get('width')),
2927 'height': int_or_none(t.get('height')),
2928 })
64c0d954 2929
3462ffa8 2930 if playlist_id is None:
70d5c17b 2931 playlist_id = item_id
2932 if title is None:
b60419c5 2933 title = playlist_id
2934 title += format_field(selected_tab, 'title', ' - %s')
2935
2936 metadata = {
2937 'playlist_id': playlist_id,
2938 'playlist_title': title,
2939 'playlist_description': description,
2940 'uploader': channel_name,
2941 'uploader_id': channel_id,
2942 'uploader_url': channel_url,
2943 'thumbnails': thumbnails,
2944 'tags': tags,
2945 }
2946 if not channel_id:
2947 metadata.update(self._extract_uploader(data))
2948 metadata.update({
2949 'channel': metadata['uploader'],
2950 'channel_id': metadata['uploader_id'],
2951 'channel_url': metadata['uploader_url']})
2952 return self.playlist_result(
d069eca7
M
2953 self._entries(
2954 selected_tab, playlist_id,
2955 self._extract_identity_token(webpage, item_id),
2956 self._extract_account_syncid(data)),
b60419c5 2957 **metadata)
73c4ac2c 2958
cd7c66cf 2959 def _extract_mix_playlist(self, playlist, playlist_id):
2960 page_num = 0
2961 while True:
2962 videos = list(self._playlist_entries(playlist))
2963 if not videos:
2964 return
2965 video_count = len(videos)
2966 start = min(video_count - 24, 26) if video_count > 25 else 0
2967 for item in videos[start:]:
2968 yield item
2969
2970 page_num += 1
2971 _, data = self._extract_webpage(
2972 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, videos[-1]['id']),
2973 '%s page %d' % (playlist_id, page_num))
2974 playlist = try_get(
2975 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2976
29f7c58a 2977 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2978 title = playlist.get('title') or try_get(
2979 data, lambda x: x['titleText']['simpleText'], compat_str)
2980 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 2981
2982 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 2983 playlist_url = urljoin(url, try_get(
2984 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2985 compat_str))
2986 if playlist_url and playlist_url != url:
2987 return self.url_result(
2988 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2989 video_title=title)
cd7c66cf 2990
8bdd16b4 2991 return self.playlist_result(
cd7c66cf 2992 self._extract_mix_playlist(playlist, playlist_id),
2993 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 2994
f3eaa8dd
M
2995 def _extract_alerts(self, data, expected=False):
2996
2997 def _real_extract_alerts():
2998 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
2999 if not isinstance(alert_dict, dict):
02ced43c 3000 continue
f3eaa8dd
M
3001 for alert in alert_dict.values():
3002 alert_type = alert.get('type')
3003 if not alert_type:
3004 continue
3005 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3006 if message:
3007 yield alert_type, message
f3eaa8dd
M
3008 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3009 message = try_get(run, lambda x: x['text'], compat_str)
3010 if message:
3011 yield alert_type, message
3012
3013 err_msg = None
3014 for alert_type, alert_message in _real_extract_alerts():
3015 if alert_type.lower() == 'error':
3016 if err_msg:
3017 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3018 err_msg = alert_message
3019 else:
3020 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3021
3022 if err_msg:
3023 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3024
29f7c58a 3025 def _extract_identity_token(self, webpage, item_id):
3026 ytcfg = self._extract_ytcfg(item_id, webpage)
3027 if ytcfg:
3028 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3029 if token:
3030 return token
3031 return self._search_regex(
3032 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3033 'identity token', default=None)
3034
d069eca7
M
3035 @staticmethod
3036 def _extract_account_syncid(data):
3037 """Extract syncId required to download private playlists of secondary channels"""
3038 sync_ids = (
3039 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
3040 or '').split("||")
3041 if len(sync_ids) >= 2 and sync_ids[1]:
3042 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
3043 # and just "user_syncid||" for primary channel. We only want the channel_syncid
3044 return sync_ids[0]
3045
cd7c66cf 3046 def _extract_webpage(self, url, item_id):
62bff2c1 3047 retries = self._downloader.params.get('extractor_retries', 3)
3048 count = -1
c705177d 3049 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3050 while count < retries:
62bff2c1 3051 count += 1
14fdfea9 3052 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3053 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3054 if count:
c705177d 3055 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3056 webpage = self._download_webpage(
3057 url, item_id,
cd7c66cf 3058 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3059 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3060 self._extract_alerts(data, expected=True)
14fdfea9 3061 if data.get('contents') or data.get('currentVideoEndpoint'):
3062 break
c705177d 3063 if count >= retries:
3064 self._downloader.report_error(last_error)
cd7c66cf 3065 return webpage, data
3066
3067 def _real_extract(self, url):
3068 item_id = self._match_id(url)
3069 url = compat_urlparse.urlunparse(
3070 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3071
3072 # This is not matched in a channel page with a tab selected
3073 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3074 mobj = mobj.groupdict() if mobj else {}
3075 if mobj and not mobj.get('not_channel'):
3076 self._downloader.report_warning(
3077 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3078 'To download only the videos in the home page, add a "/featured" to the URL')
3079 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3080
3081 # Handle both video/playlist URLs
3082 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3083 video_id = qs.get('v', [None])[0]
3084 playlist_id = qs.get('list', [None])[0]
3085
3086 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3087 if not playlist_id:
3088 # If there is neither video or playlist ids,
3089 # youtube redirects to home page, which is undesirable
3090 raise ExtractorError('Unable to recognize tab page')
3091 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3092 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3093
3094 if video_id and playlist_id:
3095 if self._downloader.params.get('noplaylist'):
3096 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3097 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3098 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3099
3100 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3101
8bdd16b4 3102 tabs = try_get(
3103 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3104 if tabs:
d069eca7 3105 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3106
8bdd16b4 3107 playlist = try_get(
3108 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3109 if playlist:
29f7c58a 3110 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3111
a0566bbf 3112 video_id = try_get(
3113 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3114 compat_str) or video_id
8bdd16b4 3115 if video_id:
cd7c66cf 3116 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3117 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3118
8bdd16b4 3119 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3120
c5e8d7af 3121
8bdd16b4 3122class YoutubePlaylistIE(InfoExtractor):
3123 IE_DESC = 'YouTube.com playlists'
3124 _VALID_URL = r'''(?x)(?:
3125 (?:https?://)?
3126 (?:\w+\.)?
3127 (?:
3128 (?:
3129 youtube(?:kids)?\.com|
29f7c58a 3130 invidio\.us
8bdd16b4 3131 )
3132 /.*?\?.*?\blist=
3133 )?
3134 (?P<id>%(playlist_id)s)
3135 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3136 IE_NAME = 'youtube:playlist'
cdc628a4 3137 _TESTS = [{
8bdd16b4 3138 'note': 'issue #673',
3139 'url': 'PLBB231211A4F62143',
cdc628a4 3140 'info_dict': {
8bdd16b4 3141 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3142 'id': 'PLBB231211A4F62143',
3143 'uploader': 'Wickydoo',
3144 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3145 },
3146 'playlist_mincount': 29,
3147 }, {
3148 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3149 'info_dict': {
3150 'title': 'YDL_safe_search',
3151 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3152 },
3153 'playlist_count': 2,
3154 'skip': 'This playlist is private',
9558dcec 3155 }, {
8bdd16b4 3156 'note': 'embedded',
3157 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3158 'playlist_count': 4,
9558dcec 3159 'info_dict': {
8bdd16b4 3160 'title': 'JODA15',
3161 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3162 'uploader': 'milan',
3163 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3164 }
cdc628a4 3165 }, {
8bdd16b4 3166 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3167 'playlist_mincount': 982,
3168 'info_dict': {
3169 'title': '2018 Chinese New Singles (11/6 updated)',
3170 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3171 'uploader': 'LBK',
3172 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3173 }
daa0df9e 3174 }, {
29f7c58a 3175 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3176 'only_matching': True,
3177 }, {
3178 # music album playlist
3179 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3180 'only_matching': True,
3181 }]
3182
3183 @classmethod
3184 def suitable(cls, url):
3185 return False if YoutubeTabIE.suitable(url) else super(
3186 YoutubePlaylistIE, cls).suitable(url)
3187
3188 def _real_extract(self, url):
3189 playlist_id = self._match_id(url)
3190 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3191 if not qs:
3192 qs = {'list': playlist_id}
3193 return self.url_result(
3194 update_url_query('https://www.youtube.com/playlist', qs),
3195 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3196
3197
3198class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3199 IE_DESC = 'youtu.be'
29f7c58a 3200 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3201 _TESTS = [{
8bdd16b4 3202 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3203 'info_dict': {
3204 'id': 'yeWKywCrFtk',
3205 'ext': 'mp4',
3206 'title': 'Small Scale Baler and Braiding Rugs',
3207 'uploader': 'Backus-Page House Museum',
3208 'uploader_id': 'backuspagemuseum',
3209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3210 'upload_date': '20161008',
3211 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3212 'categories': ['Nonprofits & Activism'],
3213 'tags': list,
3214 'like_count': int,
3215 'dislike_count': int,
3216 },
3217 'params': {
3218 'noplaylist': True,
3219 'skip_download': True,
3220 },
39e7107d 3221 }, {
8bdd16b4 3222 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3223 'only_matching': True,
cdc628a4
PH
3224 }]
3225
8bdd16b4 3226 def _real_extract(self, url):
29f7c58a 3227 mobj = re.match(self._VALID_URL, url)
3228 video_id = mobj.group('id')
3229 playlist_id = mobj.group('playlist_id')
8bdd16b4 3230 return self.url_result(
29f7c58a 3231 update_url_query('https://www.youtube.com/watch', {
3232 'v': video_id,
3233 'list': playlist_id,
3234 'feature': 'youtu.be',
3235 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3236
3237
3238class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3239 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3240 _VALID_URL = r'ytuser:(?P<id>.+)'
3241 _TESTS = [{
3242 'url': 'ytuser:phihag',
3243 'only_matching': True,
3244 }]
3245
3246 def _real_extract(self, url):
3247 user_id = self._match_id(url)
3248 return self.url_result(
3249 'https://www.youtube.com/user/%s' % user_id,
3250 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3251
b05654f0 3252
3d3dddc9 3253class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3254 IE_NAME = 'youtube:favorites'
3255 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3256 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3257 _LOGIN_REQUIRED = True
3258 _TESTS = [{
3259 'url': ':ytfav',
3260 'only_matching': True,
3261 }, {
3262 'url': ':ytfavorites',
3263 'only_matching': True,
3264 }]
3265
3266 def _real_extract(self, url):
3267 return self.url_result(
3268 'https://www.youtube.com/playlist?list=LL',
3269 ie=YoutubeTabIE.ie_key())
3270
3271
8bdd16b4 3272class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3273 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3274 # there doesn't appear to be a real limit, for example if you search for
3275 # 'python' you get more than 8.000.000 results
3276 _MAX_RESULTS = float('inf')
78caa52a 3277 IE_NAME = 'youtube:search'
b05654f0 3278 _SEARCH_KEY = 'ytsearch'
6c894ea1 3279 _SEARCH_PARAMS = None
9dd8e46a 3280 _TESTS = []
b05654f0 3281
6c894ea1 3282 def _entries(self, query, n):
a5c56234 3283 data = {'query': query}
6c894ea1
U
3284 if self._SEARCH_PARAMS:
3285 data['params'] = self._SEARCH_PARAMS
3286 total = 0
3287 for page_num in itertools.count(1):
a5c56234
M
3288 search = self._call_api(
3289 ep='search', video_id='query "%s"' % query, fatal=False,
3290 note='Downloading page %s' % page_num, query=data)
6c894ea1 3291 if not search:
b4c08069 3292 break
6c894ea1
U
3293 slr_contents = try_get(
3294 search,
3295 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3296 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3297 list)
3298 if not slr_contents:
a22b2fd1 3299 break
0366ae87 3300
0366ae87
M
3301 # Youtube sometimes adds promoted content to searches,
3302 # changing the index location of videos and token.
3303 # So we search through all entries till we find them.
30a074c2 3304 continuation_token = None
3305 for slr_content in slr_contents:
a96c6d15 3306 if continuation_token is None:
3307 continuation_token = try_get(
3308 slr_content,
3309 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3310 compat_str)
3311
30a074c2 3312 isr_contents = try_get(
3313 slr_content,
3314 lambda x: x['itemSectionRenderer']['contents'],
3315 list)
9da76d30 3316 if not isr_contents:
30a074c2 3317 continue
3318 for content in isr_contents:
3319 if not isinstance(content, dict):
3320 continue
3321 video = content.get('videoRenderer')
3322 if not isinstance(video, dict):
3323 continue
3324 video_id = video.get('videoId')
3325 if not video_id:
3326 continue
3327
3328 yield self._extract_video(video)
3329 total += 1
3330 if total == n:
3331 return
0366ae87 3332
0366ae87 3333 if not continuation_token:
6c894ea1 3334 break
0366ae87 3335 data['continuation'] = continuation_token
b05654f0 3336
6c894ea1
U
3337 def _get_n_results(self, query, n):
3338 """Get a specified number of results for a query"""
3339 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3340
c9ae7b95 3341
a3dd9248 3342class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3343 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3344 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3345 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3346 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3347
c9ae7b95 3348
386e1dd9 3349class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3350 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3351 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3352 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3353 # _MAX_RESULTS = 100
3462ffa8 3354 _TESTS = [{
3355 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3356 'playlist_mincount': 5,
3357 'info_dict': {
3358 'title': 'youtube-dl test video',
3359 }
3360 }, {
3361 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3362 'only_matching': True,
3363 }]
3364
386e1dd9 3365 @classmethod
3366 def _make_valid_url(cls):
3367 return cls._VALID_URL
3368
3462ffa8 3369 def _real_extract(self, url):
386e1dd9 3370 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3371 query = (qs.get('search_query') or qs.get('q'))[0]
3372 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3373 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3374
3375
3376class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3377 """
25f14e9f 3378 Base class for feed extractors
3d3dddc9 3379 Subclasses must define the _FEED_NAME property.
d7ae0639 3380 """
b2e8bc1b 3381 _LOGIN_REQUIRED = True
ef2f3c7f 3382 _TESTS = []
d7ae0639
JMF
3383
3384 @property
3385 def IE_NAME(self):
78caa52a 3386 return 'youtube:%s' % self._FEED_NAME
04cc9617 3387
81f0259b 3388 def _real_initialize(self):
b2e8bc1b 3389 self._login()
81f0259b 3390
3853309f 3391 def _real_extract(self, url):
3d3dddc9 3392 return self.url_result(
3393 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3394 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3395
3396
ef2f3c7f 3397class YoutubeWatchLaterIE(InfoExtractor):
3398 IE_NAME = 'youtube:watchlater'
70d5c17b 3399 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3400 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3401 _TESTS = [{
8bdd16b4 3402 'url': ':ytwatchlater',
bc7a9cd8
S
3403 'only_matching': True,
3404 }]
25f14e9f
S
3405
3406 def _real_extract(self, url):
ef2f3c7f 3407 return self.url_result(
3408 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3409
3410
25f14e9f
S
3411class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3412 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3413 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3414 _FEED_NAME = 'recommended'
3d3dddc9 3415 _TESTS = [{
3416 'url': ':ytrec',
3417 'only_matching': True,
3418 }, {
3419 'url': ':ytrecommended',
3420 'only_matching': True,
3421 }, {
3422 'url': 'https://youtube.com',
3423 'only_matching': True,
3424 }]
1ed5b5c9 3425
1ed5b5c9 3426
25f14e9f 3427class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3428 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3429 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3430 _FEED_NAME = 'subscriptions'
3d3dddc9 3431 _TESTS = [{
3432 'url': ':ytsubs',
3433 'only_matching': True,
3434 }, {
3435 'url': ':ytsubscriptions',
3436 'only_matching': True,
3437 }]
1ed5b5c9 3438
1ed5b5c9 3439
25f14e9f 3440class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3441 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3442 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3443 _FEED_NAME = 'history'
3d3dddc9 3444 _TESTS = [{
3445 'url': ':ythistory',
3446 'only_matching': True,
3447 }]
1ed5b5c9
JMF
3448
3449
15870e90
PH
3450class YoutubeTruncatedURLIE(InfoExtractor):
3451 IE_NAME = 'youtube:truncated_url'
3452 IE_DESC = False # Do not list
975d35db 3453 _VALID_URL = r'''(?x)
b95aab84
PH
3454 (?:https?://)?
3455 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3456 (?:watch\?(?:
c4808c60 3457 feature=[a-z_]+|
b95aab84
PH
3458 annotation_id=annotation_[^&]+|
3459 x-yt-cl=[0-9]+|
c1708b89 3460 hl=[^&]*|
287be8c6 3461 t=[0-9]+
b95aab84
PH
3462 )?
3463 |
3464 attribution_link\?a=[^&]+
3465 )
3466 $
975d35db 3467 '''
15870e90 3468
c4808c60 3469 _TESTS = [{
2d3d2997 3470 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3471 'only_matching': True,
dc2fc736 3472 }, {
2d3d2997 3473 'url': 'https://www.youtube.com/watch?',
dc2fc736 3474 'only_matching': True,
b95aab84
PH
3475 }, {
3476 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3477 'only_matching': True,
3478 }, {
3479 'url': 'https://www.youtube.com/watch?feature=foo',
3480 'only_matching': True,
c1708b89
PH
3481 }, {
3482 'url': 'https://www.youtube.com/watch?hl=en-GB',
3483 'only_matching': True,
287be8c6
PH
3484 }, {
3485 'url': 'https://www.youtube.com/watch?t=2372',
3486 'only_matching': True,
c4808c60
PH
3487 }]
3488
15870e90
PH
3489 def _real_extract(self, url):
3490 raise ExtractorError(
78caa52a
PH
3491 'Did you forget to quote the URL? Remember that & is a meta '
3492 'character in most shells, so you want to put the URL in quotes, '
3867038a 3493 'like youtube-dl '
2d3d2997 3494 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3495 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3496 expected=True)
772fd5cc
PH
3497
3498
3499class YoutubeTruncatedIDIE(InfoExtractor):
3500 IE_NAME = 'youtube:truncated_id'
3501 IE_DESC = False # Do not list
b95aab84 3502 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3503
3504 _TESTS = [{
3505 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3506 'only_matching': True,
3507 }]
3508
3509 def _real_extract(self, url):
3510 video_id = self._match_id(url)
3511 raise ExtractorError(
3512 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3513 expected=True)