]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[embedthumbnail] Set mtime correctly
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c5e8d7af 28 clean_html,
26fe8ffe 29 dict_get,
c5e8d7af 30 ExtractorError,
b60419c5 31 format_field,
2d30521a 32 float_or_none,
dd27fd17 33 int_or_none,
94278f72 34 mimetype2ext,
6310acf5 35 parse_codecs,
7c80519c 36 parse_duration,
dca3ff4a 37 qualities,
3995d37d 38 remove_start,
cf7e015f 39 smuggle_url,
dbdaaa23 40 str_or_none,
c93d53f5 41 str_to_int,
556dbe7f 42 try_get,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
8bdd16b4 46 update_url_query,
21c340b8 47 url_or_none,
6e6bc8da 48 urlencode_postdata,
8bdd16b4 49 urljoin,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
57
58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 61
3462ffa8 62 _RESERVED_NAMES = (
cd7c66cf 63 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
64 r'movies|results|shared|hashtag|trending|feed|feeds|'
65 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
70d5c17b 71 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 72
25f14e9f
S
73 def _ids_to_results(self, ids):
74 return [
75 self.url_result(vid_id, 'Youtube', video_id=vid_id)
76 for vid_id in ids]
77
b2e8bc1b 78 def _login(self):
83317f69 79 """
80 Attempt to log in to YouTube.
81 True is returned if successful or skipped.
82 False is returned if login failed.
83
84 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
85 """
68217024 86 username, password = self._get_login_info()
b2e8bc1b
JMF
87 # No authentication to be performed
88 if username is None:
70d35d16 89 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 90 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 91 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
92 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
baf67a60
S
115 # TODO: reverse actual botguard identifier generation algo
116 'bgRequest': '["identifier",""]',
041bc3ad 117 })
e00eb564
S
118 return self._download_json(
119 url, None, note=note, errnote=errnote,
120 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121 fatal=False,
122 data=urlencode_postdata(data), headers={
123 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124 'Google-Accounts-XSRF': 1,
125 })
126
3995d37d
S
127 def warn(message):
128 self._downloader.report_warning(message)
129
130 lookup_req = [
131 username,
132 None, [], None, 'US', None, None, 2, False, True,
133 [
134 None, None,
135 [2, 1, None, 1,
136 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137 None, [], 4],
138 1, [None, None, []], None, None, None, True
139 ],
140 username,
141 ]
142
e00eb564 143 lookup_results = req(
3995d37d 144 self._LOOKUP_URL, lookup_req,
e00eb564
S
145 'Looking up account info', 'Unable to look up account info')
146
147 if lookup_results is False:
148 return False
041bc3ad 149
3995d37d
S
150 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151 if not user_hash:
152 warn('Unable to extract user hash')
153 return False
154
155 challenge_req = [
156 user_hash,
157 None, 1, None, [1, None, None, None, [password, None, True]],
158 [
159 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160 1, [None, None, []], None, None, None, True
161 ]]
83317f69 162
3995d37d
S
163 challenge_results = req(
164 self._CHALLENGE_URL, challenge_req,
165 'Logging in', 'Unable to log in')
83317f69 166
3995d37d 167 if challenge_results is False:
e00eb564 168 return
83317f69 169
3995d37d
S
170 login_res = try_get(challenge_results, lambda x: x[0][5], list)
171 if login_res:
172 login_msg = try_get(login_res, lambda x: x[5], compat_str)
173 warn(
174 'Unable to login: %s' % 'Invalid password'
175 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 return False
177
178 res = try_get(challenge_results, lambda x: x[0][-1], list)
179 if not res:
180 warn('Unable to extract result entry')
181 return False
182
9a6628aa
S
183 login_challenge = try_get(res, lambda x: x[0][0], list)
184 if login_challenge:
185 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
187 # SEND_SUCCESS - TFA code has been successfully sent to phone
188 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 189 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
190 if status == 'QUOTA_EXCEEDED':
191 warn('Exceeded the limit of TFA codes, try later')
192 return False
193
194 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195 if not tl:
196 warn('Unable to extract TL')
197 return False
198
199 tfa_code = self._get_tfa_info('2-step verification code')
200
201 if not tfa_code:
202 warn(
203 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 return False
206
207 tfa_code = remove_start(tfa_code, 'G-')
208
209 tfa_req = [
210 user_hash, None, 2, None,
211 [
212 9, None, None, None, None, None, None, None,
213 [None, tfa_code, True, 2]
214 ]]
215
216 tfa_results = req(
217 self._TFA_URL.format(tl), tfa_req,
218 'Submitting TFA code', 'Unable to submit TFA code')
219
220 if tfa_results is False:
221 return False
222
223 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224 if tfa_res:
225 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226 warn(
227 'Unable to finish TFA: %s' % 'Invalid TFA code'
228 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 return False
230
231 check_cookie_url = try_get(
232 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
233 else:
234 CHALLENGES = {
235 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238 }
239 challenge = CHALLENGES.get(
240 challenge_str,
241 '%s returned error %s.' % (self.IE_NAME, challenge_str))
242 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 return False
3995d37d
S
244 else:
245 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247 if not check_cookie_url:
248 warn('Unable to extract CheckCookie URL')
249 return False
e00eb564
S
250
251 check_cookie_results = self._download_webpage(
3995d37d
S
252 check_cookie_url, None, 'Checking cookie', fatal=False)
253
254 if check_cookie_results is False:
255 return False
e00eb564 256
3995d37d
S
257 if 'https://myaccount.google.com/' not in check_cookie_results:
258 warn('Unable to log in')
b2e8bc1b 259 return False
e00eb564 260
b2e8bc1b
JMF
261 return True
262
30226342 263 def _download_webpage_handle(self, *args, **kwargs):
c1148516 264 query = kwargs.get('query', {}).copy()
c1148516 265 kwargs['query'] = query
30226342 266 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
267 *args, **compat_kwargs(kwargs))
268
b2e8bc1b
JMF
269 def _real_initialize(self):
270 if self._downloader is None:
271 return
b2e8bc1b
JMF
272 if not self._login():
273 return
c5e8d7af 274
8bdd16b4 275 _DEFAULT_API_DATA = {
276 'context': {
277 'client': {
278 'clientName': 'WEB',
a5c56234 279 'clientVersion': '2.20210301.08.00',
8bdd16b4 280 }
281 },
282 }
8377574c 283
a0566bbf 284 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 285 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
286 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 287
a5c56234
M
288 def _generate_sapisidhash_header(self):
289 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
290 if sapisid_cookie is None:
291 return
292 time_now = round(time.time())
293 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
294 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
295
296 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
297 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 298 data = self._DEFAULT_API_DATA.copy()
299 data.update(query)
a5c56234
M
300 headers = headers or {}
301 headers.update({'content-type': 'application/json'})
302 auth = self._generate_sapisidhash_header()
303 if auth is not None:
304 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 305 return self._download_json(
a5c56234
M
306 'https://www.youtube.com/youtubei/v1/%s' % ep,
307 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
308 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 309 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 310
8bdd16b4 311 def _extract_yt_initial_data(self, video_id, webpage):
312 return self._parse_json(
313 self._search_regex(
29f7c58a 314 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 315 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 316 video_id)
0c148415 317
29f7c58a 318 def _extract_ytcfg(self, video_id, webpage):
319 return self._parse_json(
320 self._search_regex(
321 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
322 default='{}'), video_id, fatal=False)
323
30a074c2 324 def _extract_video(self, renderer):
325 video_id = renderer.get('videoId')
326 title = try_get(
327 renderer,
328 (lambda x: x['title']['runs'][0]['text'],
329 lambda x: x['title']['simpleText']), compat_str)
330 description = try_get(
331 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
332 compat_str)
333 duration = parse_duration(try_get(
334 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
335 view_count_text = try_get(
336 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
337 view_count = str_to_int(self._search_regex(
338 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
339 'view count', default=None))
340 uploader = try_get(
bc2ca1bb 341 renderer,
342 (lambda x: x['ownerText']['runs'][0]['text'],
343 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 344 return {
345 '_type': 'url_transparent',
346 'ie_key': YoutubeIE.ie_key(),
347 'id': video_id,
348 'url': video_id,
349 'title': title,
350 'description': description,
351 'duration': duration,
352 'view_count': view_count,
353 'uploader': uploader,
354 }
355
0c148415 356
360e1ca5 357class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 358 IE_DESC = 'YouTube.com'
bc2ca1bb 359 _INVIDIOUS_SITES = (
360 # invidious-redirect websites
361 r'(?:www\.)?redirect\.invidious\.io',
362 r'(?:(?:www|dev)\.)?invidio\.us',
363 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
364 r'(?:www\.)?invidious\.pussthecat\.org',
365 r'(?:www\.)?invidious\.048596\.xyz',
366 r'(?:www\.)?invidious\.zee\.li',
367 r'(?:www\.)?vid\.puffyan\.us',
368 r'(?:(?:www|au)\.)?ytprivate\.com',
369 r'(?:www\.)?invidious\.namazso\.eu',
370 r'(?:www\.)?invidious\.ethibox\.fr',
371 r'(?:www\.)?inv\.skyn3t\.in',
372 r'(?:www\.)?invidious\.himiko\.cloud',
373 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
374 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
375 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
376 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
377 # youtube-dl invidious instances list
378 r'(?:(?:www|no)\.)?invidiou\.sh',
379 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
380 r'(?:www\.)?invidious\.kabi\.tk',
381 r'(?:www\.)?invidious\.13ad\.de',
382 r'(?:www\.)?invidious\.mastodon\.host',
383 r'(?:www\.)?invidious\.zapashcanon\.fr',
384 r'(?:www\.)?invidious\.kavin\.rocks',
385 r'(?:www\.)?invidious\.tube',
386 r'(?:www\.)?invidiou\.site',
387 r'(?:www\.)?invidious\.site',
388 r'(?:www\.)?invidious\.xyz',
389 r'(?:www\.)?invidious\.nixnet\.xyz',
390 r'(?:www\.)?invidious\.drycat\.fr',
391 r'(?:www\.)?tube\.poal\.co',
392 r'(?:www\.)?tube\.connect\.cafe',
393 r'(?:www\.)?vid\.wxzm\.sx',
394 r'(?:www\.)?vid\.mint\.lgbt',
395 r'(?:www\.)?yewtu\.be',
396 r'(?:www\.)?yt\.elukerio\.org',
397 r'(?:www\.)?yt\.lelux\.fi',
398 r'(?:www\.)?invidious\.ggc-project\.de',
399 r'(?:www\.)?yt\.maisputain\.ovh',
400 r'(?:www\.)?invidious\.toot\.koeln',
401 r'(?:www\.)?invidious\.fdn\.fr',
402 r'(?:www\.)?watch\.nettohikari\.com',
403 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
404 r'(?:www\.)?qklhadlycap4cnod\.onion',
405 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
406 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
407 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
408 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
409 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
410 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
411 )
cb7dfeea 412 _VALID_URL = r"""(?x)^
c5e8d7af 413 (
edb53e2d 414 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 415 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
416 (?:www\.)?deturl\.com/www\.youtube\.com|
417 (?:www\.)?pwnyoutube\.com|
418 (?:www\.)?hooktube\.com|
419 (?:www\.)?yourepeat\.com|
420 tube\.majestyc\.net|
421 %(invidious)s|
422 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
423 (?:.*?\#/)? # handle anchor (#/) redirect urls
424 (?: # the various things that can precede the ID:
ac7553d0 425 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 426 |(?: # or the v= param in all its forms
f7000f3a 427 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 428 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 429 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
430 v=
431 )
f4b05232 432 ))
cbaed4bb
S
433 |(?:
434 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
435 vid\.plus| # or vid.plus/xxxx
436 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 437 %(invidious)s
cbaed4bb 438 )/
edb53e2d 439 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 440 )
c5e8d7af 441 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 442 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
443 (?!.*?\blist=
444 (?:
445 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
446 WL # WL are handled by the watch later IE
447 )
448 )
c5e8d7af 449 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 450 $""" % {
451 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
452 'invidious': '|'.join(_INVIDIOUS_SITES),
453 }
e40c758c 454 _PLAYER_INFO_RE = (
cc2db878 455 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
456 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 457 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 458 )
2c62dc26 459 _formats = {
c2d3cb4c 460 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
461 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
462 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
463 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
464 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
465 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
466 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
467 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 468 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 469 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
470 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
471 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
472 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
473 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
474 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 475 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 476 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
477 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 478
479
480 # 3D videos
c2d3cb4c 481 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
482 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
483 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
484 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 485 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
486 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
487 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 488
96fb5605 489 # Apple HTTP Live Streaming
11f12195 490 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 491 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
492 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
493 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
494 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
495 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 496 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
497 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
498
499 # DASH mp4 video
d23028a8
S
500 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
501 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
502 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
503 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
504 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 505 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
506 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
507 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
508 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
509 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
510 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
511 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 512
f6f1fc92 513 # Dash mp4 audio
d23028a8
S
514 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
515 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
516 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
517 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
518 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
519 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
520 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
521
522 # Dash webm
d23028a8
S
523 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
524 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
525 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
526 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
527 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
528 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
529 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
530 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
532 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
534 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
536 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
537 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 538 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
539 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
540 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
541 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
542 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
543 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
544 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
545
546 # Dash webm audio
d23028a8
S
547 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
548 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 549
0857baad 550 # Dash webm audio with opus inside
d23028a8
S
551 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
552 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
553 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 554
ce6b9a2d
PH
555 # RTMP (unnamed)
556 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
557
558 # av01 video only formats sometimes served with "unknown" codecs
559 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
560 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
561 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
562 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 563 }
29f7c58a 564 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 565
fd5c4aab
S
566 _GEO_BYPASS = False
567
78caa52a 568 IE_NAME = 'youtube'
2eb88d95
PH
569 _TESTS = [
570 {
2d3d2997 571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
572 'info_dict': {
573 'id': 'BaW_jenozKc',
574 'ext': 'mp4',
3867038a 575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
579 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
580 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 581 'upload_date': '20121002',
3867038a 582 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 583 'categories': ['Science & Technology'],
3867038a 584 'tags': ['youtube-dl'],
556dbe7f 585 'duration': 10,
dbdaaa23 586 'view_count': int,
3e7c1224
PH
587 'like_count': int,
588 'dislike_count': int,
7c80519c 589 'start_time': 1,
297a564b 590 'end_time': 9,
2eb88d95 591 }
0e853ca4 592 },
fccd3771 593 {
4bc3a23e
PH
594 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
595 'note': 'Embed-only video (#1746)',
596 'info_dict': {
597 'id': 'yZIXLfi8CZQ',
598 'ext': 'mp4',
599 'upload_date': '20120608',
600 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
601 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
602 'uploader': 'SET India',
94bfcd23 603 'uploader_id': 'setindia',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 605 'age_limit': 18,
545cc85d 606 },
607 'skip': 'Private video',
fccd3771 608 },
11b56058 609 {
8bdd16b4 610 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
611 'note': 'Use the first video ID in the URL',
612 'info_dict': {
613 'id': 'BaW_jenozKc',
614 'ext': 'mp4',
3867038a 615 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
616 'uploader': 'Philipp Hagemeister',
617 'uploader_id': 'phihag',
ec85ded8 618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 619 'upload_date': '20121002',
3867038a 620 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 621 'categories': ['Science & Technology'],
3867038a 622 'tags': ['youtube-dl'],
556dbe7f 623 'duration': 10,
dbdaaa23 624 'view_count': int,
11b56058
PM
625 'like_count': int,
626 'dislike_count': int,
34a7de29
S
627 },
628 'params': {
629 'skip_download': True,
630 },
11b56058 631 },
dd27fd17 632 {
2d3d2997 633 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
634 'note': '256k DASH audio (format 141) via DASH manifest',
635 'info_dict': {
636 'id': 'a9LDPn-MO4I',
637 'ext': 'm4a',
638 'upload_date': '20121002',
639 'uploader_id': '8KVIDEO',
ec85ded8 640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
641 'description': '',
642 'uploader': '8KVIDEO',
643 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 644 },
4bc3a23e
PH
645 'params': {
646 'youtube_include_dash_manifest': True,
647 'format': '141',
4919603f 648 },
de3c7fe0 649 'skip': 'format 141 not served anymore',
dd27fd17 650 },
8bdd16b4 651 # DASH manifest with encrypted signature
652 {
653 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
654 'info_dict': {
655 'id': 'IB3lcPjvWLA',
656 'ext': 'm4a',
657 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
658 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
659 'duration': 244,
660 'uploader': 'AfrojackVEVO',
661 'uploader_id': 'AfrojackVEVO',
662 'upload_date': '20131011',
cc2db878 663 'abr': 129.495,
8bdd16b4 664 },
665 'params': {
666 'youtube_include_dash_manifest': True,
667 'format': '141/bestaudio[ext=m4a]',
668 },
669 },
aa79ac0c
PH
670 # Controversy video
671 {
672 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
673 'info_dict': {
674 'id': 'T4XJQO3qol8',
675 'ext': 'mp4',
556dbe7f 676 'duration': 219,
aa79ac0c 677 'upload_date': '20100909',
4fe54c12 678 'uploader': 'Amazing Atheist',
aa79ac0c 679 'uploader_id': 'TheAmazingAtheist',
ec85ded8 680 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 681 'title': 'Burning Everyone\'s Koran',
545cc85d 682 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 683 }
c522adb1 684 },
dd2d55f1 685 # Normal age-gate video (embed allowed)
c522adb1 686 {
2d3d2997 687 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
688 'info_dict': {
689 'id': 'HtVdAasjOgU',
690 'ext': 'mp4',
691 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 692 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 693 'duration': 142,
c522adb1
JMF
694 'uploader': 'The Witcher',
695 'uploader_id': 'WitcherGame',
ec85ded8 696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 697 'upload_date': '20140605',
34952f09 698 'age_limit': 18,
c522adb1
JMF
699 },
700 },
8bdd16b4 701 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
702 # YouTube Red ad is not captured for creator
703 {
704 'url': '__2ABJjxzNo',
705 'info_dict': {
706 'id': '__2ABJjxzNo',
707 'ext': 'mp4',
708 'duration': 266,
709 'upload_date': '20100430',
710 'uploader_id': 'deadmau5',
711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 712 'creator': 'deadmau5',
713 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 714 'uploader': 'deadmau5',
715 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 716 'alt_title': 'Some Chords',
8bdd16b4 717 },
718 'expected_warnings': [
719 'DASH manifest missing',
720 ]
721 },
067aa17e 722 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
723 {
724 'url': 'lqQg6PlCWgI',
725 'info_dict': {
726 'id': 'lqQg6PlCWgI',
727 'ext': 'mp4',
556dbe7f 728 'duration': 6085,
90227264 729 'upload_date': '20150827',
cbe2bd91 730 'uploader_id': 'olympic',
ec85ded8 731 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 732 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 733 'uploader': 'Olympic',
cbe2bd91
PH
734 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
735 },
736 'params': {
737 'skip_download': 'requires avconv',
e52a40ab 738 }
cbe2bd91 739 },
6271f1ca
PH
740 # Non-square pixels
741 {
742 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
743 'info_dict': {
744 'id': '_b-2C3KPAM0',
745 'ext': 'mp4',
746 'stretched_ratio': 16 / 9.,
556dbe7f 747 'duration': 85,
6271f1ca
PH
748 'upload_date': '20110310',
749 'uploader_id': 'AllenMeow',
ec85ded8 750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 751 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 752 'uploader': '孫ᄋᄅ',
6271f1ca
PH
753 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
754 },
06b491eb
S
755 },
756 # url_encoded_fmt_stream_map is empty string
757 {
758 'url': 'qEJwOuvDf7I',
759 'info_dict': {
760 'id': 'qEJwOuvDf7I',
f57b7835 761 'ext': 'webm',
06b491eb
S
762 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
763 'description': '',
764 'upload_date': '20150404',
765 'uploader_id': 'spbelect',
766 'uploader': 'Наблюдатели Петербурга',
767 },
768 'params': {
769 'skip_download': 'requires avconv',
e323cf3f
S
770 },
771 'skip': 'This live event has ended.',
06b491eb 772 },
067aa17e 773 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
774 {
775 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
776 'info_dict': {
777 'id': 'FIl7x6_3R5Y',
eb6793ba 778 'ext': 'webm',
da77d856
S
779 'title': 'md5:7b81415841e02ecd4313668cde88737a',
780 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 781 'duration': 220,
da77d856
S
782 'upload_date': '20150625',
783 'uploader_id': 'dorappi2000',
ec85ded8 784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 785 'uploader': 'dorappi2000',
eb6793ba 786 'formats': 'mincount:31',
da77d856 787 },
eb6793ba 788 'skip': 'not actual anymore',
2ee8f5d8 789 },
8a1a26ce
YCH
790 # DASH manifest with segment_list
791 {
792 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
793 'md5': '8ce563a1d667b599d21064e982ab9e31',
794 'info_dict': {
795 'id': 'CsmdDsKjzN8',
796 'ext': 'mp4',
17ee98e1 797 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
798 'uploader': 'Airtek',
799 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
800 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
801 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
802 },
803 'params': {
804 'youtube_include_dash_manifest': True,
805 'format': '135', # bestvideo
be49068d
S
806 },
807 'skip': 'This live event has ended.',
2ee8f5d8 808 },
cf7e015f
S
809 {
810 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 811 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 812 'info_dict': {
545cc85d 813 'id': 'jvGDaLqkpTg',
814 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
815 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
816 },
817 'playlist': [{
818 'info_dict': {
545cc85d 819 'id': 'jvGDaLqkpTg',
cf7e015f 820 'ext': 'mp4',
545cc85d 821 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
822 'description': 'md5:e03b909557865076822aa169218d6a5d',
823 'duration': 10643,
824 'upload_date': '20161111',
825 'uploader': 'Team PGP',
826 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
828 },
829 }, {
830 'info_dict': {
545cc85d 831 'id': '3AKt1R1aDnw',
cf7e015f 832 'ext': 'mp4',
545cc85d 833 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
834 'description': 'md5:e03b909557865076822aa169218d6a5d',
835 'duration': 10991,
836 'upload_date': '20161111',
837 'uploader': 'Team PGP',
838 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
839 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
840 },
841 }, {
842 'info_dict': {
545cc85d 843 'id': 'RtAMM00gpVc',
cf7e015f 844 'ext': 'mp4',
545cc85d 845 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
846 'description': 'md5:e03b909557865076822aa169218d6a5d',
847 'duration': 10995,
848 'upload_date': '20161111',
849 'uploader': 'Team PGP',
850 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
852 },
853 }, {
854 'info_dict': {
545cc85d 855 'id': '6N2fdlP3C5U',
cf7e015f 856 'ext': 'mp4',
545cc85d 857 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
858 'description': 'md5:e03b909557865076822aa169218d6a5d',
859 'duration': 10990,
860 'upload_date': '20161111',
861 'uploader': 'Team PGP',
862 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
864 },
865 }],
866 'params': {
867 'skip_download': True,
868 },
cbaed4bb 869 },
f9f49d87 870 {
067aa17e 871 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
872 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
873 'info_dict': {
874 'id': 'gVfLd0zydlo',
875 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
876 },
877 'playlist_count': 2,
be49068d 878 'skip': 'Not multifeed anymore',
f9f49d87 879 },
cbaed4bb 880 {
2d3d2997 881 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 882 'only_matching': True,
0e49d9a6 883 },
6d4fc66b 884 {
2d3d2997 885 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
886 'only_matching': True,
887 },
0e49d9a6 888 {
067aa17e 889 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 890 # Also tests cut-off URL expansion in video description (see
067aa17e
S
891 # https://github.com/ytdl-org/youtube-dl/issues/1892,
892 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
893 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
894 'info_dict': {
895 'id': 'lsguqyKfVQg',
896 'ext': 'mp4',
897 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 898 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 899 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 900 'duration': 133,
0e49d9a6
LL
901 'upload_date': '20151119',
902 'uploader_id': 'IronSoulElf',
ec85ded8 903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 904 'uploader': 'IronSoulElf',
eb6793ba
S
905 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
906 'track': 'Dark Walk - Position Music',
907 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 908 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
909 },
910 'params': {
911 'skip_download': True,
912 },
913 },
61f92af1 914 {
067aa17e 915 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
916 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
917 'only_matching': True,
918 },
313dfc45
LL
919 {
920 # Video with yt:stretch=17:0
921 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
922 'info_dict': {
923 'id': 'Q39EVAstoRM',
924 'ext': 'mp4',
925 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
926 'description': 'md5:ee18a25c350637c8faff806845bddee9',
927 'upload_date': '20151107',
928 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
929 'uploader': 'CH GAMER DROID',
930 },
931 'params': {
932 'skip_download': True,
933 },
be49068d 934 'skip': 'This video does not exist.',
313dfc45 935 },
7caf9830
S
936 {
937 # Video licensed under Creative Commons
938 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
939 'info_dict': {
940 'id': 'M4gD1WSo5mA',
941 'ext': 'mp4',
942 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
943 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 944 'duration': 721,
7caf9830
S
945 'upload_date': '20150127',
946 'uploader_id': 'BerkmanCenter',
ec85ded8 947 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 948 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
949 'license': 'Creative Commons Attribution license (reuse allowed)',
950 },
951 'params': {
952 'skip_download': True,
953 },
954 },
fd050249
S
955 {
956 # Channel-like uploader_url
957 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
958 'info_dict': {
959 'id': 'eQcmzGIKrzg',
960 'ext': 'mp4',
961 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 962 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 963 'duration': 4060,
fd050249 964 'upload_date': '20151119',
eb6793ba 965 'uploader': 'Bernie Sanders',
fd050249 966 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
968 'license': 'Creative Commons Attribution license (reuse allowed)',
969 },
970 'params': {
971 'skip_download': True,
972 },
973 },
040ac686
S
974 {
975 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
976 'only_matching': True,
7f29cf54
S
977 },
978 {
067aa17e 979 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
980 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
981 'only_matching': True,
6496ccb4
S
982 },
983 {
984 # Rental video preview
985 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
986 'info_dict': {
987 'id': 'uGpuVWrhIzE',
988 'ext': 'mp4',
989 'title': 'Piku - Trailer',
990 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
991 'upload_date': '20150811',
992 'uploader': 'FlixMatrix',
993 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
995 'license': 'Standard YouTube License',
996 },
997 'params': {
998 'skip_download': True,
999 },
eb6793ba 1000 'skip': 'This video is not available.',
022a5d66 1001 },
12afdc2a
S
1002 {
1003 # YouTube Red video with episode data
1004 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1005 'info_dict': {
1006 'id': 'iqKdEhx-dD4',
1007 'ext': 'mp4',
1008 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1009 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1010 'duration': 2085,
12afdc2a
S
1011 'upload_date': '20170118',
1012 'uploader': 'Vsauce',
1013 'uploader_id': 'Vsauce',
1014 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1015 'series': 'Mind Field',
1016 'season_number': 1,
1017 'episode_number': 1,
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
1022 'expected_warnings': [
1023 'Skipping DASH manifest',
1024 ],
1025 },
c7121fa7
S
1026 {
1027 # The following content has been identified by the YouTube community
1028 # as inappropriate or offensive to some audiences.
1029 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1030 'info_dict': {
1031 'id': '6SJNVb0GnPI',
1032 'ext': 'mp4',
1033 'title': 'Race Differences in Intelligence',
1034 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1035 'duration': 965,
1036 'upload_date': '20140124',
1037 'uploader': 'New Century Foundation',
1038 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1040 },
1041 'params': {
1042 'skip_download': True,
1043 },
545cc85d 1044 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1045 },
022a5d66
S
1046 {
1047 # itag 212
1048 'url': '1t24XAntNCY',
1049 'only_matching': True,
fd5c4aab
S
1050 },
1051 {
1052 # geo restricted to JP
1053 'url': 'sJL6WA-aGkQ',
1054 'only_matching': True,
1055 },
cd5a74a2
S
1056 {
1057 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1058 'only_matching': True,
1059 },
bc2ca1bb 1060 {
1061 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1062 'only_matching': True,
1063 },
1064 {
1065 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1066 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1067 'only_matching': True,
1068 },
825cd268
RA
1069 {
1070 # DRM protected
1071 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1072 'only_matching': True,
4fe54c12
S
1073 },
1074 {
1075 # Video with unsupported adaptive stream type formats
1076 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1077 'info_dict': {
1078 'id': 'Z4Vy8R84T1U',
1079 'ext': 'mp4',
1080 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1081 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1082 'duration': 433,
1083 'upload_date': '20130923',
1084 'uploader': 'Amelia Putri Harwita',
1085 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1087 'formats': 'maxcount:10',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 'youtube_include_dash_manifest': False,
1092 },
5429d6a9 1093 'skip': 'not actual anymore',
5caabd3c 1094 },
1095 {
822b9d9c 1096 # Youtube Music Auto-generated description
5caabd3c 1097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1098 'info_dict': {
1099 'id': 'MgNrAu2pzNs',
1100 'ext': 'mp4',
1101 'title': 'Voyeur Girl',
1102 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1103 'upload_date': '20190312',
5429d6a9
S
1104 'uploader': 'Stephen - Topic',
1105 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1106 'artist': 'Stephen',
1107 'track': 'Voyeur Girl',
1108 'album': 'it\'s too much love to know my dear',
1109 'release_date': '20190313',
1110 'release_year': 2019,
1111 },
1112 'params': {
1113 'skip_download': True,
1114 },
1115 },
66b48727
RA
1116 {
1117 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1118 'only_matching': True,
1119 },
011e75e6
S
1120 {
1121 # invalid -> valid video id redirection
1122 'url': 'DJztXj2GPfl',
1123 'info_dict': {
1124 'id': 'DJztXj2GPfk',
1125 'ext': 'mp4',
1126 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1127 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1128 'upload_date': '20090125',
1129 'uploader': 'Prochorowka',
1130 'uploader_id': 'Prochorowka',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1132 'artist': 'Panjabi MC',
1133 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1134 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1135 },
1136 'params': {
1137 'skip_download': True,
1138 },
545cc85d 1139 'skip': 'Video unavailable',
ea74e00b
DP
1140 },
1141 {
1142 # empty description results in an empty string
1143 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1144 'info_dict': {
1145 'id': 'x41yOUIvK2k',
1146 'ext': 'mp4',
1147 'title': 'IMG 3456',
1148 'description': '',
1149 'upload_date': '20170613',
1150 'uploader_id': 'ElevageOrVert',
1151 'uploader': 'ElevageOrVert',
1152 },
1153 'params': {
1154 'skip_download': True,
1155 },
1156 },
a0566bbf 1157 {
29f7c58a 1158 # with '};' inside yt initial data (see [1])
1159 # see [2] for an example with '};' inside ytInitialPlayerResponse
1160 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1161 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1162 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1163 'info_dict': {
1164 'id': 'CHqg6qOn4no',
1165 'ext': 'mp4',
1166 'title': 'Part 77 Sort a list of simple types in c#',
1167 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1168 'upload_date': '20130831',
1169 'uploader_id': 'kudvenkat',
1170 'uploader': 'kudvenkat',
1171 },
1172 'params': {
1173 'skip_download': True,
1174 },
1175 },
29f7c58a 1176 {
1177 # another example of '};' in ytInitialData
1178 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1179 'only_matching': True,
1180 },
1181 {
1182 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1183 'only_matching': True,
1184 },
545cc85d 1185 {
cc2db878 1186 # https://github.com/ytdl-org/youtube-dl/pull/28094
1187 'url': 'OtqTfy26tG0',
1188 'info_dict': {
1189 'id': 'OtqTfy26tG0',
1190 'ext': 'mp4',
1191 'title': 'Burn Out',
1192 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1193 'upload_date': '20141120',
1194 'uploader': 'The Cinematic Orchestra - Topic',
1195 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1196 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1197 'artist': 'The Cinematic Orchestra',
1198 'track': 'Burn Out',
1199 'album': 'Every Day',
1200 'release_data': None,
1201 'release_year': None,
1202 },
1203 'params': {
1204 'skip_download': True,
1205 },
545cc85d 1206 },
bc2ca1bb 1207 {
1208 # controversial video, only works with bpctr when authenticated with cookies
1209 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1210 'only_matching': True,
1211 },
2eb88d95
PH
1212 ]
1213
e0df6211
PH
1214 def __init__(self, *args, **kwargs):
1215 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1216 self._code_cache = {}
83799698 1217 self._player_cache = {}
e0df6211 1218
60064c53
PH
1219 def _signature_cache_id(self, example_sig):
1220 """ Return a string representation of a signature """
78caa52a 1221 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1222
e40c758c
S
1223 @classmethod
1224 def _extract_player_info(cls, player_url):
1225 for player_re in cls._PLAYER_INFO_RE:
1226 id_m = re.search(player_re, player_url)
1227 if id_m:
1228 break
1229 else:
c081b35c 1230 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1231 return id_m.group('id')
e40c758c
S
1232
1233 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1234 player_id = self._extract_player_info(player_url)
e0df6211 1235
c4417ddb 1236 # Read from filesystem cache
545cc85d 1237 func_id = 'js_%s_%s' % (
1238 player_id, self._signature_cache_id(example_sig))
c4417ddb 1239 assert os.path.basename(func_id) == func_id
a0e07d31 1240
69ea8ca4 1241 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1242 if cache_spec is not None:
78caa52a 1243 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1244
545cc85d 1245 if player_id not in self._code_cache:
1246 self._code_cache[player_id] = self._download_webpage(
e0df6211 1247 player_url, video_id,
545cc85d 1248 note='Downloading player ' + player_id,
69ea8ca4 1249 errnote='Download of %s failed' % player_url)
545cc85d 1250 code = self._code_cache[player_id]
1251 res = self._parse_sig_js(code)
e0df6211 1252
785521bf
PH
1253 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1254 cache_res = res(test_string)
1255 cache_spec = [ord(c) for c in cache_res]
83799698 1256
69ea8ca4 1257 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1258 return res
1259
60064c53 1260 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1261 def gen_sig_code(idxs):
1262 def _genslice(start, end, step):
78caa52a 1263 starts = '' if start == 0 else str(start)
8bcc8756 1264 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1265 steps = '' if step == 1 else (':%d' % step)
78caa52a 1266 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1267
1268 step = None
7af808a5
PH
1269 # Quelch pyflakes warnings - start will be set when step is set
1270 start = '(Never used)'
edf3e38e
PH
1271 for i, prev in zip(idxs[1:], idxs[:-1]):
1272 if step is not None:
1273 if i - prev == step:
1274 continue
1275 yield _genslice(start, prev, step)
1276 step = None
1277 continue
1278 if i - prev in [-1, 1]:
1279 step = i - prev
1280 start = prev
1281 continue
1282 else:
78caa52a 1283 yield 's[%d]' % prev
edf3e38e 1284 if step is None:
78caa52a 1285 yield 's[%d]' % i
edf3e38e
PH
1286 else:
1287 yield _genslice(start, i, step)
1288
78caa52a 1289 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1290 cache_res = func(test_string)
edf3e38e 1291 cache_spec = [ord(c) for c in cache_res]
78caa52a 1292 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1293 signature_id_tuple = '(%s)' % (
1294 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1295 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1296 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1297 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1298
e0df6211
PH
1299 def _parse_sig_js(self, jscode):
1300 funcname = self._search_regex(
abefc03f
S
1301 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1302 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1303 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1304 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1305 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1306 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1307 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1308 # Obsolete patterns
1309 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1310 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1311 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1312 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1313 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1314 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1315 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1316 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1317 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1318
1319 jsi = JSInterpreter(jscode)
1320 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1321 return lambda s: initial_function([s])
1322
545cc85d 1323 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1324 """Turn the encrypted s field into a working signature"""
6b37f0be 1325
c8bf86d5 1326 if player_url is None:
69ea8ca4 1327 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1328
69ea8ca4 1329 if player_url.startswith('//'):
78caa52a 1330 player_url = 'https:' + player_url
3c90cc8b
S
1331 elif not re.match(r'https?://', player_url):
1332 player_url = compat_urlparse.urljoin(
1333 'https://www.youtube.com', player_url)
c8bf86d5 1334 try:
62af3a0e 1335 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1336 if player_id not in self._player_cache:
1337 func = self._extract_signature_function(
60064c53 1338 video_id, player_url, s
c8bf86d5
PH
1339 )
1340 self._player_cache[player_id] = func
1341 func = self._player_cache[player_id]
1342 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1343 self._print_sig_code(func, s)
c8bf86d5
PH
1344 return func(s)
1345 except Exception as e:
1346 tb = traceback.format_exc()
1347 raise ExtractorError(
78caa52a 1348 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1349
545cc85d 1350 def _mark_watched(self, video_id, player_response):
21c340b8
S
1351 playback_url = url_or_none(try_get(
1352 player_response,
545cc85d 1353 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1354 if not playback_url:
1355 return
1356 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1357 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1358
1359 # cpn generation algorithm is reverse engineered from base.js.
1360 # In fact it works even with dummy cpn.
1361 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1362 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1363
1364 qs.update({
1365 'ver': ['2'],
1366 'cpn': [cpn],
1367 })
1368 playback_url = compat_urlparse.urlunparse(
15707c7e 1369 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1370
1371 self._download_webpage(
1372 playback_url, video_id, 'Marking watched',
1373 'Unable to mark watched', fatal=False)
1374
66c9fa36
S
1375 @staticmethod
1376 def _extract_urls(webpage):
1377 # Embedded YouTube player
1378 entries = [
1379 unescapeHTML(mobj.group('url'))
1380 for mobj in re.finditer(r'''(?x)
1381 (?:
1382 <iframe[^>]+?src=|
1383 data-video-url=|
1384 <embed[^>]+?src=|
1385 embedSWF\(?:\s*|
1386 <object[^>]+data=|
1387 new\s+SWFObject\(
1388 )
1389 (["\'])
1390 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1391 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1392 \1''', webpage)]
1393
1394 # lazyYT YouTube embed
1395 entries.extend(list(map(
1396 unescapeHTML,
1397 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1398
1399 # Wordpress "YouTube Video Importer" plugin
1400 matches = re.findall(r'''(?x)<div[^>]+
1401 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1402 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1403 entries.extend(m[-1] for m in matches)
1404
1405 return entries
1406
1407 @staticmethod
1408 def _extract_url(webpage):
1409 urls = YoutubeIE._extract_urls(webpage)
1410 return urls[0] if urls else None
1411
97665381
PH
1412 @classmethod
1413 def extract_id(cls, url):
1414 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1415 if mobj is None:
69ea8ca4 1416 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1417 video_id = mobj.group(2)
1418 return video_id
1419
545cc85d 1420 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1421 chapters_list = try_get(
8bdd16b4 1422 data,
84213ea8
S
1423 lambda x: x['playerOverlays']
1424 ['playerOverlayRenderer']
1425 ['decoratedPlayerBarRenderer']
1426 ['decoratedPlayerBarRenderer']
1427 ['playerBar']
1428 ['chapteredPlayerBarRenderer']
1429 ['chapters'],
1430 list)
1431 if not chapters_list:
1432 return
1433
1434 def chapter_time(chapter):
1435 return float_or_none(
1436 try_get(
1437 chapter,
1438 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1439 int),
1440 scale=1000)
1441 chapters = []
1442 for next_num, chapter in enumerate(chapters_list, start=1):
1443 start_time = chapter_time(chapter)
1444 if start_time is None:
1445 continue
1446 end_time = (chapter_time(chapters_list[next_num])
1447 if next_num < len(chapters_list) else duration)
1448 if end_time is None:
1449 continue
1450 title = try_get(
1451 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1452 compat_str)
1453 chapters.append({
1454 'start_time': start_time,
1455 'end_time': end_time,
1456 'title': title,
1457 })
1458 return chapters
1459
545cc85d 1460 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1461 return self._parse_json(self._search_regex(
1462 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1463 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1464
c5e8d7af 1465 def _real_extract(self, url):
cf7e015f 1466 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1467 video_id = self._match_id(url)
1468 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1469 webpage_url = base_url + 'watch?v=' + video_id
1470 webpage = self._download_webpage(
1471 webpage_url + '&has_verified=1&bpctr=9999999999',
1472 video_id, fatal=False)
545cc85d 1473
1474 player_response = None
1475 if webpage:
1476 player_response = self._extract_yt_initial_variable(
1477 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1478 video_id, 'initial player response')
1479 if not player_response:
1480 player_response = self._call_api(
1481 'player', {'videoId': video_id}, video_id)
1482
1483 playability_status = player_response.get('playabilityStatus') or {}
1484 if playability_status.get('reason') == 'Sign in to confirm your age':
1485 pr = self._parse_json(try_get(compat_parse_qs(
1486 self._download_webpage(
1487 base_url + 'get_video_info', video_id,
1488 'Refetching age-gated info webpage',
1489 'unable to download video info webpage', query={
1490 'video_id': video_id,
7c60c33e 1491 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1492 }, fatal=False)),
1493 lambda x: x['player_response'][0],
1494 compat_str) or '{}', video_id)
1495 if pr:
1496 player_response = pr
1497
1498 trailer_video_id = try_get(
1499 playability_status,
1500 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1501 compat_str)
1502 if trailer_video_id:
1503 return self.url_result(
1504 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1505
545cc85d 1506 def get_text(x):
1507 if not x:
c2d125d9 1508 return
545cc85d 1509 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1510
545cc85d 1511 search_meta = (
1512 lambda x: self._html_search_meta(x, webpage, default=None)) \
1513 if webpage else lambda x: None
dbdaaa23 1514
545cc85d 1515 video_details = player_response.get('videoDetails') or {}
37357d21 1516 microformat = try_get(
545cc85d 1517 player_response,
1518 lambda x: x['microformat']['playerMicroformatRenderer'],
1519 dict) or {}
1520 video_title = video_details.get('title') \
1521 or get_text(microformat.get('title')) \
1522 or search_meta(['og:title', 'twitter:title', 'title'])
1523 video_description = video_details.get('shortDescription')
cf7e015f 1524
8fe10494 1525 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1526 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1527 multifeed_metadata_list = try_get(
1528 player_response,
1529 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1530 compat_str)
8fe10494
S
1531 if multifeed_metadata_list:
1532 entries = []
1533 feed_ids = []
1534 for feed in multifeed_metadata_list.split(','):
1535 # Unquote should take place before split on comma (,) since textual
1536 # fields may contain comma as well (see
067aa17e 1537 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1538 feed_data = compat_parse_qs(
1539 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1540
1541 def feed_entry(name):
545cc85d 1542 return try_get(
1543 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1544
1545 feed_id = feed_entry('id')
1546 if not feed_id:
1547 continue
1548 feed_title = feed_entry('title')
1549 title = video_title
1550 if feed_title:
1551 title += ' (%s)' % feed_title
8fe10494
S
1552 entries.append({
1553 '_type': 'url_transparent',
1554 'ie_key': 'Youtube',
1555 'url': smuggle_url(
545cc85d 1556 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1557 {'force_singlefeed': True}),
6b09401b 1558 'title': title,
8fe10494 1559 })
6b09401b 1560 feed_ids.append(feed_id)
8fe10494
S
1561 self.to_screen(
1562 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1563 % (', '.join(feed_ids), video_id))
545cc85d 1564 return self.playlist_result(
1565 entries, video_id, video_title, video_description)
8fe10494
S
1566 else:
1567 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1568
545cc85d 1569 formats = []
1570 itags = []
cc2db878 1571 itag_qualities = {}
545cc85d 1572 player_url = None
dca3ff4a 1573 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1574 streaming_data = player_response.get('streamingData') or {}
1575 streaming_formats = streaming_data.get('formats') or []
1576 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1577 for fmt in streaming_formats:
1578 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1579 continue
321bf820 1580
cc2db878 1581 itag = str_or_none(fmt.get('itag'))
1582 quality = fmt.get('quality')
1583 if itag and quality:
1584 itag_qualities[itag] = quality
1585 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1586 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1587 # number of fragment that would subsequently requested with (`&sq=N`)
1588 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1589 continue
1590
545cc85d 1591 fmt_url = fmt.get('url')
1592 if not fmt_url:
1593 sc = compat_parse_qs(fmt.get('signatureCipher'))
1594 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1595 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1596 if not (sc and fmt_url and encrypted_sig):
1597 continue
1598 if not player_url:
1599 if not webpage:
1600 continue
1601 player_url = self._search_regex(
1602 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1603 webpage, 'player URL', fatal=False)
1604 if not player_url:
201e9eaa 1605 continue
545cc85d 1606 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1607 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1608 fmt_url += '&' + sp + '=' + signature
1609
545cc85d 1610 if itag:
1611 itags.append(itag)
cc2db878 1612 tbr = float_or_none(
1613 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1614 dct = {
1615 'asr': int_or_none(fmt.get('audioSampleRate')),
1616 'filesize': int_or_none(fmt.get('contentLength')),
1617 'format_id': itag,
1618 'format_note': fmt.get('qualityLabel') or quality,
1619 'fps': int_or_none(fmt.get('fps')),
1620 'height': int_or_none(fmt.get('height')),
dca3ff4a 1621 'quality': q(quality),
cc2db878 1622 'tbr': tbr,
545cc85d 1623 'url': fmt_url,
1624 'width': fmt.get('width'),
1625 }
1626 mimetype = fmt.get('mimeType')
1627 if mimetype:
1628 mobj = re.match(
1629 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1630 if mobj:
1631 dct['ext'] = mimetype2ext(mobj.group(1))
1632 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1633 no_audio = dct.get('acodec') == 'none'
1634 no_video = dct.get('vcodec') == 'none'
1635 if no_audio:
1636 dct['vbr'] = tbr
1637 if no_video:
1638 dct['abr'] = tbr
1639 if no_audio or no_video:
545cc85d 1640 dct['downloader_options'] = {
1641 # Youtube throttles chunks >~10M
1642 'http_chunk_size': 10485760,
bf1317d2 1643 }
7c60c33e 1644 if dct.get('ext'):
1645 dct['container'] = dct['ext'] + '_dash'
545cc85d 1646 formats.append(dct)
1647
1648 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1649 if hls_manifest_url:
1650 for f in self._extract_m3u8_formats(
1651 hls_manifest_url, video_id, 'mp4', fatal=False):
1652 itag = self._search_regex(
1653 r'/itag/(\d+)', f['url'], 'itag', default=None)
1654 if itag:
1655 f['format_id'] = itag
1656 formats.append(f)
1657
1658 if self._downloader.params.get('youtube_include_dash_manifest'):
1659 dash_manifest_url = streaming_data.get('dashManifestUrl')
1660 if dash_manifest_url:
545cc85d 1661 for f in self._extract_mpd_formats(
1662 dash_manifest_url, video_id, fatal=False):
cc2db878 1663 itag = f['format_id']
1664 if itag in itags:
1665 continue
dca3ff4a 1666 if itag in itag_qualities:
1667 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1668 # but kept to maintain feature parity (and code similarity) with youtube-dl
1669 # Remove if this causes any issues with sorting in future
1670 f['quality'] = q(itag_qualities[itag])
545cc85d 1671 filesize = int_or_none(self._search_regex(
1672 r'/clen/(\d+)', f.get('fragment_base_url')
1673 or f['url'], 'file size', default=None))
1674 if filesize:
1675 f['filesize'] = filesize
cc2db878 1676 formats.append(f)
bf1317d2 1677
545cc85d 1678 if not formats:
63ad4d43 1679 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1680 raise ExtractorError(
1681 'This video is DRM protected.', expected=True)
1682 pemr = try_get(
1683 playability_status,
1684 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1685 dict) or {}
1686 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1687 subreason = pemr.get('subreason')
1688 if subreason:
1689 subreason = clean_html(get_text(subreason))
1690 if subreason == 'The uploader has not made this video available in your country.':
1691 countries = microformat.get('availableCountries')
1692 if not countries:
1693 regions_allowed = search_meta('regionsAllowed')
1694 countries = regions_allowed.split(',') if regions_allowed else None
1695 self.raise_geo_restricted(
1696 subreason, countries)
1697 reason += '\n' + subreason
1698 if reason:
1699 raise ExtractorError(reason, expected=True)
bf1317d2 1700
545cc85d 1701 self._sort_formats(formats)
bf1317d2 1702
545cc85d 1703 keywords = video_details.get('keywords') or []
1704 if not keywords and webpage:
1705 keywords = [
1706 unescapeHTML(m.group('content'))
1707 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1708 for keyword in keywords:
1709 if keyword.startswith('yt:stretch='):
1710 w, h = keyword.split('=')[1].split(':')
1711 w, h = int(w), int(h)
1712 if w > 0 and h > 0:
1713 ratio = w / h
1714 for f in formats:
1715 if f.get('vcodec') != 'none':
1716 f['stretched_ratio'] = ratio
6449cd80 1717
545cc85d 1718 thumbnails = []
1719 for container in (video_details, microformat):
1720 for thumbnail in (try_get(
1721 container,
1722 lambda x: x['thumbnail']['thumbnails'], list) or []):
1723 thumbnail_url = thumbnail.get('url')
1724 if not thumbnail_url:
bf1317d2 1725 continue
545cc85d 1726 thumbnails.append({
1727 'height': int_or_none(thumbnail.get('height')),
1728 'url': thumbnail_url,
1729 'width': int_or_none(thumbnail.get('width')),
1730 })
1731 if thumbnails:
1732 break
a6211d23 1733 else:
545cc85d 1734 thumbnail = search_meta(['og:image', 'twitter:image'])
1735 if thumbnail:
1736 thumbnails = [{'url': thumbnail}]
1737
1738 category = microformat.get('category') or search_meta('genre')
1739 channel_id = video_details.get('channelId') \
1740 or microformat.get('externalChannelId') \
1741 or search_meta('channelId')
1742 duration = int_or_none(
1743 video_details.get('lengthSeconds')
1744 or microformat.get('lengthSeconds')) \
1745 or parse_duration(search_meta('duration'))
1746 is_live = video_details.get('isLive')
1747 owner_profile_url = microformat.get('ownerProfileUrl')
1748
1749 info = {
1750 'id': video_id,
1751 'title': self._live_title(video_title) if is_live else video_title,
1752 'formats': formats,
1753 'thumbnails': thumbnails,
1754 'description': video_description,
1755 'upload_date': unified_strdate(
1756 microformat.get('uploadDate')
1757 or search_meta('uploadDate')),
1758 'uploader': video_details['author'],
1759 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1760 'uploader_url': owner_profile_url,
1761 'channel_id': channel_id,
1762 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1763 'duration': duration,
1764 'view_count': int_or_none(
1765 video_details.get('viewCount')
1766 or microformat.get('viewCount')
1767 or search_meta('interactionCount')),
1768 'average_rating': float_or_none(video_details.get('averageRating')),
1769 'age_limit': 18 if (
1770 microformat.get('isFamilySafe') is False
1771 or search_meta('isFamilyFriendly') == 'false'
1772 or search_meta('og:restrictions:age') == '18+') else 0,
1773 'webpage_url': webpage_url,
1774 'categories': [category] if category else None,
1775 'tags': keywords,
1776 'is_live': is_live,
1777 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 1778 'was_live': video_details.get('isLiveContent')
545cc85d 1779 }
b477fc13 1780
545cc85d 1781 pctr = try_get(
1782 player_response,
1783 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1784 subtitles = {}
1785 if pctr:
1786 def process_language(container, base_url, lang_code, query):
1787 lang_subs = []
1788 for fmt in self._SUBTITLE_FORMATS:
1789 query.update({
1790 'fmt': fmt,
1791 })
1792 lang_subs.append({
1793 'ext': fmt,
1794 'url': update_url_query(base_url, query),
1795 })
1796 container[lang_code] = lang_subs
7e72694b 1797
545cc85d 1798 for caption_track in (pctr.get('captionTracks') or []):
1799 base_url = caption_track.get('baseUrl')
1800 if not base_url:
1801 continue
1802 if caption_track.get('kind') != 'asr':
1803 lang_code = caption_track.get('languageCode')
1804 if not lang_code:
1805 continue
1806 process_language(
1807 subtitles, base_url, lang_code, {})
1808 continue
1809 automatic_captions = {}
1810 for translation_language in (pctr.get('translationLanguages') or []):
1811 translation_language_code = translation_language.get('languageCode')
1812 if not translation_language_code:
1813 continue
1814 process_language(
1815 automatic_captions, base_url, translation_language_code,
1816 {'tlang': translation_language_code})
1817 info['automatic_captions'] = automatic_captions
1818 info['subtitles'] = subtitles
7e72694b 1819
545cc85d 1820 parsed_url = compat_urllib_parse_urlparse(url)
1821 for component in [parsed_url.fragment, parsed_url.query]:
1822 query = compat_parse_qs(component)
1823 for k, v in query.items():
1824 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1825 d_k += '_time'
1826 if d_k not in info and k in s_ks:
1827 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1828
1829 # Youtube Music Auto-generated description
822b9d9c 1830 if video_description:
38d70284 1831 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1832 if mobj:
822b9d9c
RA
1833 release_year = mobj.group('release_year')
1834 release_date = mobj.group('release_date')
1835 if release_date:
1836 release_date = release_date.replace('-', '')
1837 if not release_year:
545cc85d 1838 release_year = release_date[:4]
1839 info.update({
1840 'album': mobj.group('album'.strip()),
1841 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1842 'track': mobj.group('track').strip(),
1843 'release_date': release_date,
cc2db878 1844 'release_year': int_or_none(release_year),
545cc85d 1845 })
7e72694b 1846
545cc85d 1847 initial_data = None
1848 if webpage:
1849 initial_data = self._extract_yt_initial_variable(
1850 webpage, self._YT_INITIAL_DATA_RE, video_id,
1851 'yt initial data')
1852 if not initial_data:
1853 initial_data = self._call_api(
1854 'next', {'videoId': video_id}, video_id, fatal=False)
1855
1856 if not is_live:
1857 try:
1858 # This will error if there is no livechat
1859 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1860 info['subtitles']['live_chat'] = [{
1861 'video_id': video_id,
1862 'ext': 'json',
1863 'protocol': 'youtube_live_chat_replay',
1864 }]
1865 except (KeyError, IndexError, TypeError):
1866 pass
1867
1868 if initial_data:
1869 chapters = self._extract_chapters_from_json(
1870 initial_data, video_id, duration)
1871 if not chapters:
1872 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1873 contents = try_get(
1874 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1875 list)
1876 if not contents:
1877 continue
1878
1879 def chapter_time(mmlir):
1880 return parse_duration(
1881 get_text(mmlir.get('timeDescription')))
1882
1883 chapters = []
1884 for next_num, content in enumerate(contents, start=1):
1885 mmlir = content.get('macroMarkersListItemRenderer') or {}
1886 start_time = chapter_time(mmlir)
1887 end_time = chapter_time(try_get(
1888 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1889 if next_num < len(contents) else duration
1890 if start_time is None or end_time is None:
1891 continue
1892 chapters.append({
1893 'start_time': start_time,
1894 'end_time': end_time,
1895 'title': get_text(mmlir.get('title')),
1896 })
1897 if chapters:
1898 break
1899 if chapters:
1900 info['chapters'] = chapters
1901
1902 contents = try_get(
1903 initial_data,
1904 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1905 list) or []
1906 for content in contents:
1907 vpir = content.get('videoPrimaryInfoRenderer')
1908 if vpir:
1909 stl = vpir.get('superTitleLink')
1910 if stl:
1911 stl = get_text(stl)
1912 if try_get(
1913 vpir,
1914 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1915 info['location'] = stl
1916 else:
1917 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1918 if mobj:
1919 info.update({
1920 'series': mobj.group(1),
1921 'season_number': int(mobj.group(2)),
1922 'episode_number': int(mobj.group(3)),
1923 })
1924 for tlb in (try_get(
1925 vpir,
1926 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1927 list) or []):
1928 tbr = tlb.get('toggleButtonRenderer') or {}
1929 for getter, regex in [(
1930 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1931 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1932 lambda x: x['accessibility'],
1933 lambda x: x['accessibilityData']['accessibilityData'],
1934 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1935 label = (try_get(tbr, getter, dict) or {}).get('label')
1936 if label:
1937 mobj = re.match(regex, label)
1938 if mobj:
1939 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1940 break
1941 sbr_tooltip = try_get(
1942 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1943 if sbr_tooltip:
1944 like_count, dislike_count = sbr_tooltip.split(' / ')
1945 info.update({
1946 'like_count': str_to_int(like_count),
1947 'dislike_count': str_to_int(dislike_count),
1948 })
1949 vsir = content.get('videoSecondaryInfoRenderer')
1950 if vsir:
1951 info['channel'] = get_text(try_get(
1952 vsir,
1953 lambda x: x['owner']['videoOwnerRenderer']['title'],
1954 compat_str))
1955 rows = try_get(
1956 vsir,
1957 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1958 list) or []
1959 multiple_songs = False
1960 for row in rows:
1961 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1962 multiple_songs = True
1963 break
1964 for row in rows:
1965 mrr = row.get('metadataRowRenderer') or {}
1966 mrr_title = mrr.get('title')
1967 if not mrr_title:
1968 continue
1969 mrr_title = get_text(mrr['title'])
1970 mrr_contents_text = get_text(mrr['contents'][0])
1971 if mrr_title == 'License':
1972 info['license'] = mrr_contents_text
1973 elif not multiple_songs:
1974 if mrr_title == 'Album':
1975 info['album'] = mrr_contents_text
1976 elif mrr_title == 'Artist':
1977 info['artist'] = mrr_contents_text
1978 elif mrr_title == 'Song':
1979 info['track'] = mrr_contents_text
1980
1981 fallbacks = {
1982 'channel': 'uploader',
1983 'channel_id': 'uploader_id',
1984 'channel_url': 'uploader_url',
1985 }
1986 for to, frm in fallbacks.items():
1987 if not info.get(to):
1988 info[to] = info.get(frm)
1989
1990 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1991 v = info.get(s_k)
1992 if v:
1993 info[d_k] = v
b84071c0 1994
06167fbb 1995 # get xsrf for annotations or comments
1996 get_annotations = self._downloader.params.get('writeannotations', False)
1997 get_comments = self._downloader.params.get('getcomments', False)
1998 if get_annotations or get_comments:
29f7c58a 1999 xsrf_token = None
545cc85d 2000 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2001 if ytcfg:
2002 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2003 if not xsrf_token:
2004 xsrf_token = self._search_regex(
2005 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2006 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2007
2008 # annotations
06167fbb 2009 if get_annotations:
64b6a4e9
RA
2010 invideo_url = try_get(
2011 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2012 if xsrf_token and invideo_url:
29f7c58a 2013 xsrf_field_name = None
2014 if ytcfg:
2015 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2016 if not xsrf_field_name:
2017 xsrf_field_name = self._search_regex(
2018 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2019 webpage, 'xsrf field name',
29f7c58a 2020 group='xsrf_field_name', default='session_token')
8a784c74 2021 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2022 self._proto_relative_url(invideo_url),
2023 video_id, note='Downloading annotations',
2024 errnote='Unable to download video annotations', fatal=False,
2025 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2026
06167fbb 2027 # Get comments
2028 # TODO: Refactor and move to seperate function
277d6ff5 2029 def extract_comments():
06167fbb 2030 expected_video_comment_count = 0
2031 video_comments = []
277d6ff5 2032 comment_xsrf = xsrf_token
06167fbb 2033
2034 def find_value(html, key, num_chars=2, separator='"'):
2035 pos_begin = html.find(key) + len(key) + num_chars
2036 pos_end = html.find(separator, pos_begin)
2037 return html[pos_begin: pos_end]
2038
2039 def search_dict(partial, key):
2040 if isinstance(partial, dict):
2041 for k, v in partial.items():
2042 if k == key:
2043 yield v
2044 else:
2045 for o in search_dict(v, key):
2046 yield o
2047 elif isinstance(partial, list):
2048 for i in partial:
2049 for o in search_dict(i, key):
2050 yield o
2051
8a784c74 2052 continuations = []
2053 if initial_data:
2054 try:
2055 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2056 continuations = [ncd['continuation']]
2057 # Handle videos where comments have been disabled entirely
2058 except StopIteration:
2059 pass
06167fbb 2060
8d0ea5f9 2061 def get_continuation(continuation, session_token, replies=False):
06167fbb 2062 query = {
66c935fb 2063 'pbj': 1,
2064 'ctoken': continuation,
06167fbb 2065 }
2066 if replies:
2067 query['action_get_comment_replies'] = 1
2068 else:
2069 query['action_get_comments'] = 1
2070
2071 while True:
2072 content, handle = self._download_webpage_handle(
2073 'https://www.youtube.com/comment_service_ajax',
2074 video_id,
2075 note=False,
2076 expected_status=[413],
2077 data=urlencode_postdata({
2078 'session_token': session_token
2079 }),
2080 query=query,
2081 headers={
2082 'Accept': '*/*',
2083 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2084 'X-YouTube-Client-Name': '1',
2085 'X-YouTube-Client-Version': '2.20201202.06.01'
2086 }
2087 )
2088
2089 response_code = handle.getcode()
2090 if (response_code == 200):
2091 return self._parse_json(content, video_id)
8d0ea5f9 2092 if (response_code == 413):
06167fbb 2093 return None
2094 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2095
2096 first_continuation = True
885d36d4 2097 chain_msg = ''
2098 self.to_screen('Downloading comments')
06167fbb 2099 while continuations:
885d36d4 2100 continuation = continuations.pop()
277d6ff5 2101 comment_response = get_continuation(continuation, comment_xsrf)
06167fbb 2102 if not comment_response:
2103 continue
2104 if list(search_dict(comment_response, 'externalErrorMessage')):
2105 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2106
8d0ea5f9
B
2107 if 'continuationContents' not in comment_response['response']:
2108 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2109 continue
2110 # not sure if this actually helps
2111 if 'xsrf_token' in comment_response:
277d6ff5 2112 comment_xsrf = comment_response['xsrf_token']
8d0ea5f9 2113
06167fbb 2114 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2115 if first_continuation:
2116 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2117 first_continuation = False
2118 if 'contents' not in item_section:
2119 # continuation returned no comments?
2120 # set an empty array as to not break the for loop
2121 item_section['contents'] = []
2122
2123 for meta_comment in item_section['contents']:
2124 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2125 video_comments.append({
2126 'id': comment['commentId'],
ba7bf12d 2127 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2128 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2129 'author': comment.get('authorText', {}).get('simpleText', ''),
2130 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2131 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2132 'parent': 'root'
2133 })
2134 if 'replies' not in meta_comment['commentThreadRenderer']:
2135 continue
2136
8d0ea5f9
B
2137 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2138 while reply_continuations:
06167fbb 2139 time.sleep(1)
8d0ea5f9 2140 continuation = reply_continuations.pop()
277d6ff5 2141 replies_data = get_continuation(continuation, comment_xsrf, True)
06167fbb 2142 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2143 continue
06167fbb 2144
2145 if self._downloader.params.get('verbose', False):
885d36d4 2146 chain_msg = ' (chain %s)' % comment['commentId']
2147 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2148 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2149 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2150 reply_comment = reply_meta['commentRenderer']
2151 video_comments.append({
2152 'id': reply_comment['commentId'],
2153 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2154 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2155 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2156 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2157 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2158 'parent': comment['commentId']
2159 })
2160 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2161 continue
8d0ea5f9 2162 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2163
885d36d4 2164 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2165 if 'continuations' in item_section:
8d0ea5f9 2166 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2167 time.sleep(1)
2168
885d36d4 2169 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
277d6ff5 2170 return {
545cc85d 2171 'comments': video_comments,
2172 'comment_count': expected_video_comment_count
277d6ff5 2173 }
2174
2175 if get_comments:
2176 info['__post_extractor'] = extract_comments
4ea3be0a 2177
545cc85d 2178 self.mark_watched(video_id, player_response)
d77ab8e2 2179
545cc85d 2180 return info
c5e8d7af 2181
5f6a1245 2182
8bdd16b4 2183class YoutubeTabIE(YoutubeBaseInfoExtractor):
2184 IE_DESC = 'YouTube.com tab'
70d5c17b 2185 _VALID_URL = r'''(?x)
2186 https?://
2187 (?:\w+\.)?
2188 (?:
2189 youtube(?:kids)?\.com|
2190 invidio\.us
2191 )/
2192 (?:
2193 (?:channel|c|user)/|
2194 (?P<not_channel>
9ba5705a 2195 feed/|hashtag/|
70d5c17b 2196 (?:playlist|watch)\?.*?\blist=
2197 )|
29f7c58a 2198 (?!(?:%s)\b) # Direct URLs
70d5c17b 2199 )
2200 (?P<id>[^/?\#&]+)
2201 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2202 IE_NAME = 'youtube:tab'
2203
81127aa5 2204 _TESTS = [{
8bdd16b4 2205 # playlists, multipage
2206 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2207 'playlist_mincount': 94,
2208 'info_dict': {
2209 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2210 'title': 'Игорь Клейнер - Playlists',
2211 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2212 'uploader': 'Игорь Клейнер',
2213 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2214 },
2215 }, {
2216 # playlists, multipage, different order
2217 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2218 'playlist_mincount': 94,
2219 'info_dict': {
2220 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2221 'title': 'Игорь Клейнер - Playlists',
2222 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2223 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2224 'uploader': 'Игорь Клейнер',
8bdd16b4 2225 },
2226 }, {
2227 # playlists, singlepage
2228 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2229 'playlist_mincount': 4,
2230 'info_dict': {
2231 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2232 'title': 'ThirstForScience - Playlists',
2233 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2234 'uploader': 'ThirstForScience',
2235 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2236 }
2237 }, {
2238 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2239 'only_matching': True,
2240 }, {
2241 # basic, single video playlist
0e30a7b9 2242 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2243 'info_dict': {
0e30a7b9 2244 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2245 'uploader': 'Sergey M.',
2246 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2247 'title': 'youtube-dl public playlist',
81127aa5 2248 },
0e30a7b9 2249 'playlist_count': 1,
9291475f 2250 }, {
8bdd16b4 2251 # empty playlist
0e30a7b9 2252 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2253 'info_dict': {
0e30a7b9 2254 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2255 'uploader': 'Sergey M.',
2256 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2257 'title': 'youtube-dl empty playlist',
9291475f
PH
2258 },
2259 'playlist_count': 0,
2260 }, {
8bdd16b4 2261 # Home tab
2262 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2263 'info_dict': {
8bdd16b4 2264 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2265 'title': 'lex will - Home',
2266 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2267 'uploader': 'lex will',
2268 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2269 },
8bdd16b4 2270 'playlist_mincount': 2,
9291475f 2271 }, {
8bdd16b4 2272 # Videos tab
2273 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2274 'info_dict': {
8bdd16b4 2275 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2276 'title': 'lex will - Videos',
2277 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2278 'uploader': 'lex will',
2279 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2280 },
8bdd16b4 2281 'playlist_mincount': 975,
9291475f 2282 }, {
8bdd16b4 2283 # Videos tab, sorted by popular
2284 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2285 'info_dict': {
8bdd16b4 2286 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2287 'title': 'lex will - Videos',
2288 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2289 'uploader': 'lex will',
2290 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2291 },
8bdd16b4 2292 'playlist_mincount': 199,
9291475f 2293 }, {
8bdd16b4 2294 # Playlists tab
2295 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2296 'info_dict': {
8bdd16b4 2297 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2298 'title': 'lex will - Playlists',
2299 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2300 'uploader': 'lex will',
2301 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2302 },
8bdd16b4 2303 'playlist_mincount': 17,
ac7553d0 2304 }, {
8bdd16b4 2305 # Community tab
2306 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2307 'info_dict': {
8bdd16b4 2308 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2309 'title': 'lex will - Community',
2310 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2311 'uploader': 'lex will',
2312 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2313 },
2314 'playlist_mincount': 18,
87dadd45 2315 }, {
8bdd16b4 2316 # Channels tab
2317 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2318 'info_dict': {
8bdd16b4 2319 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2320 'title': 'lex will - Channels',
2321 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2322 'uploader': 'lex will',
2323 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2324 },
deaec5af 2325 'playlist_mincount': 12,
6b08cdf6 2326 }, {
a0566bbf 2327 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2328 'only_matching': True,
2329 }, {
a0566bbf 2330 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2331 'only_matching': True,
2332 }, {
a0566bbf 2333 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2334 'only_matching': True,
2335 }, {
2336 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2337 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2338 'info_dict': {
2339 'title': '29C3: Not my department',
2340 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2341 'uploader': 'Christiaan008',
2342 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2343 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2344 },
2345 'playlist_count': 96,
2346 }, {
2347 'note': 'Large playlist',
2348 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2349 'info_dict': {
8bdd16b4 2350 'title': 'Uploads from Cauchemar',
2351 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2352 'uploader': 'Cauchemar',
2353 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2354 },
8bdd16b4 2355 'playlist_mincount': 1123,
2356 }, {
2357 # even larger playlist, 8832 videos
2358 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2359 'only_matching': True,
4b7df0d3
JMF
2360 }, {
2361 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2362 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2363 'info_dict': {
acf757f4
PH
2364 'title': 'Uploads from Interstellar Movie',
2365 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2366 'uploader': 'Interstellar Movie',
8bdd16b4 2367 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2368 },
481cc733 2369 'playlist_mincount': 21,
8bdd16b4 2370 }, {
2371 # https://github.com/ytdl-org/youtube-dl/issues/21844
2372 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2373 'info_dict': {
2374 'title': 'Data Analysis with Dr Mike Pound',
2375 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2376 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2377 'uploader': 'Computerphile',
deaec5af 2378 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2379 },
2380 'playlist_mincount': 11,
2381 }, {
a0566bbf 2382 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2383 'only_matching': True,
dacb3a86
S
2384 }, {
2385 # Playlist URL that does not actually serve a playlist
2386 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2387 'info_dict': {
2388 'id': 'FqZTN594JQw',
2389 'ext': 'webm',
2390 'title': "Smiley's People 01 detective, Adventure Series, Action",
2391 'uploader': 'STREEM',
2392 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2393 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2394 'upload_date': '20150526',
2395 'license': 'Standard YouTube License',
2396 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2397 'categories': ['People & Blogs'],
2398 'tags': list,
dbdaaa23 2399 'view_count': int,
dacb3a86
S
2400 'like_count': int,
2401 'dislike_count': int,
2402 },
2403 'params': {
2404 'skip_download': True,
2405 },
13a75688 2406 'skip': 'This video is not available.',
dacb3a86 2407 'add_ie': [YoutubeIE.ie_key()],
481cc733 2408 }, {
8bdd16b4 2409 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2410 'only_matching': True,
66b48727 2411 }, {
8bdd16b4 2412 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2413 'only_matching': True,
a0566bbf 2414 }, {
2415 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2416 'info_dict': {
2417 'id': '9Auq9mYxFEE',
2418 'ext': 'mp4',
deaec5af 2419 'title': compat_str,
a0566bbf 2420 'uploader': 'Sky News',
2421 'uploader_id': 'skynews',
2422 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2423 'upload_date': '20191102',
deaec5af 2424 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2425 'categories': ['News & Politics'],
2426 'tags': list,
2427 'like_count': int,
2428 'dislike_count': int,
2429 },
2430 'params': {
2431 'skip_download': True,
2432 },
2433 }, {
2434 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2435 'info_dict': {
2436 'id': 'a48o2S1cPoo',
2437 'ext': 'mp4',
2438 'title': 'The Young Turks - Live Main Show',
2439 'uploader': 'The Young Turks',
2440 'uploader_id': 'TheYoungTurks',
2441 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2442 'upload_date': '20150715',
2443 'license': 'Standard YouTube License',
2444 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2445 'categories': ['News & Politics'],
2446 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2447 'like_count': int,
2448 'dislike_count': int,
2449 },
2450 'params': {
2451 'skip_download': True,
2452 },
2453 'only_matching': True,
2454 }, {
2455 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2456 'only_matching': True,
2457 }, {
2458 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2459 'only_matching': True,
3d3dddc9 2460 }, {
2461 'url': 'https://www.youtube.com/feed/trending',
2462 'only_matching': True,
2463 }, {
2464 # needs auth
2465 'url': 'https://www.youtube.com/feed/library',
2466 'only_matching': True,
2467 }, {
2468 # needs auth
2469 'url': 'https://www.youtube.com/feed/history',
2470 'only_matching': True,
2471 }, {
2472 # needs auth
2473 'url': 'https://www.youtube.com/feed/subscriptions',
2474 'only_matching': True,
2475 }, {
2476 # needs auth
2477 'url': 'https://www.youtube.com/feed/watch_later',
2478 'only_matching': True,
2479 }, {
2480 # no longer available?
2481 'url': 'https://www.youtube.com/feed/recommended',
2482 'only_matching': True,
29f7c58a 2483 }, {
2484 # inline playlist with not always working continuations
2485 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2486 'only_matching': True,
2487 }, {
2488 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2489 'only_matching': True,
2490 }, {
2491 'url': 'https://www.youtube.com/course',
2492 'only_matching': True,
2493 }, {
2494 'url': 'https://www.youtube.com/zsecurity',
2495 'only_matching': True,
2496 }, {
2497 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2498 'only_matching': True,
2499 }, {
2500 'url': 'https://www.youtube.com/TheYoungTurks/live',
2501 'only_matching': True,
2502 }]
2503
2504 @classmethod
2505 def suitable(cls, url):
2506 return False if YoutubeIE.suitable(url) else super(
2507 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2508
2509 def _extract_channel_id(self, webpage):
2510 channel_id = self._html_search_meta(
2511 'channelId', webpage, 'channel id', default=None)
2512 if channel_id:
2513 return channel_id
2514 channel_url = self._html_search_meta(
2515 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2516 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2517 'twitter:app:url:googleplay'), webpage, 'channel url')
2518 return self._search_regex(
2519 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2520 channel_url, 'channel id')
15f6397c 2521
8bdd16b4 2522 @staticmethod
cd7c66cf 2523 def _extract_basic_item_renderer(item):
2524 # Modified from _extract_grid_item_renderer
2525 known_renderers = (
2526 'playlistRenderer', 'videoRenderer', 'channelRenderer'
2527 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2528 )
2529 for key, renderer in item.items():
2530 if key not in known_renderers:
2531 continue
2532 return renderer
8bdd16b4 2533
8bdd16b4 2534 def _grid_entries(self, grid_renderer):
2535 for item in grid_renderer['items']:
2536 if not isinstance(item, dict):
39b62db1 2537 continue
cd7c66cf 2538 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2539 if not isinstance(renderer, dict):
2540 continue
2541 title = try_get(
2542 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2543 # playlist
2544 playlist_id = renderer.get('playlistId')
2545 if playlist_id:
2546 yield self.url_result(
2547 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2548 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2549 video_title=title)
2550 # video
2551 video_id = renderer.get('videoId')
2552 if video_id:
2553 yield self._extract_video(renderer)
2554 # channel
2555 channel_id = renderer.get('channelId')
2556 if channel_id:
2557 title = try_get(
2558 renderer, lambda x: x['title']['simpleText'], compat_str)
2559 yield self.url_result(
2560 'https://www.youtube.com/channel/%s' % channel_id,
2561 ie=YoutubeTabIE.ie_key(), video_title=title)
2562
3d3dddc9 2563 def _shelf_entries_from_content(self, shelf_renderer):
2564 content = shelf_renderer.get('content')
2565 if not isinstance(content, dict):
8bdd16b4 2566 return
cd7c66cf 2567 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2568 if renderer:
2569 # TODO: add support for nested playlists so each shelf is processed
2570 # as separate playlist
2571 # TODO: this includes only first N items
2572 for entry in self._grid_entries(renderer):
2573 yield entry
2574 renderer = content.get('horizontalListRenderer')
2575 if renderer:
2576 # TODO
2577 pass
8bdd16b4 2578
29f7c58a 2579 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2580 ep = try_get(
2581 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2582 compat_str)
2583 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2584 if shelf_url:
29f7c58a 2585 # Skipping links to another channels, note that checking for
2586 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2587 # will not work
2588 if skip_channels and '/channels?' in shelf_url:
2589 return
3d3dddc9 2590 title = try_get(
2591 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2592 yield self.url_result(shelf_url, video_title=title)
2593 # Shelf may not contain shelf URL, fallback to extraction from content
2594 for entry in self._shelf_entries_from_content(shelf_renderer):
2595 yield entry
c5e8d7af 2596
8bdd16b4 2597 def _playlist_entries(self, video_list_renderer):
2598 for content in video_list_renderer['contents']:
2599 if not isinstance(content, dict):
2600 continue
2601 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2602 if not isinstance(renderer, dict):
2603 continue
2604 video_id = renderer.get('videoId')
2605 if not video_id:
2606 continue
2607 yield self._extract_video(renderer)
07aeced6 2608
3462ffa8 2609 def _rich_entries(self, rich_grid_renderer):
2610 renderer = try_get(
70d5c17b 2611 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2612 video_id = renderer.get('videoId')
2613 if not video_id:
2614 return
2615 yield self._extract_video(renderer)
2616
8bdd16b4 2617 def _video_entry(self, video_renderer):
2618 video_id = video_renderer.get('videoId')
2619 if video_id:
2620 return self._extract_video(video_renderer)
dacb3a86 2621
8bdd16b4 2622 def _post_thread_entries(self, post_thread_renderer):
2623 post_renderer = try_get(
2624 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2625 if not post_renderer:
2626 return
2627 # video attachment
2628 video_renderer = try_get(
2629 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2630 video_id = None
2631 if video_renderer:
2632 entry = self._video_entry(video_renderer)
2633 if entry:
2634 yield entry
2635 # inline video links
2636 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2637 for run in runs:
2638 if not isinstance(run, dict):
2639 continue
2640 ep_url = try_get(
2641 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2642 if not ep_url:
2643 continue
2644 if not YoutubeIE.suitable(ep_url):
2645 continue
2646 ep_video_id = YoutubeIE._match_id(ep_url)
2647 if video_id == ep_video_id:
2648 continue
2649 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2650
8bdd16b4 2651 def _post_thread_continuation_entries(self, post_thread_continuation):
2652 contents = post_thread_continuation.get('contents')
2653 if not isinstance(contents, list):
2654 return
2655 for content in contents:
2656 renderer = content.get('backstagePostThreadRenderer')
2657 if not isinstance(renderer, dict):
2658 continue
2659 for entry in self._post_thread_entries(renderer):
2660 yield entry
07aeced6 2661
29f7c58a 2662 @staticmethod
2663 def _build_continuation_query(continuation, ctp=None):
2664 query = {
2665 'ctoken': continuation,
2666 'continuation': continuation,
2667 }
2668 if ctp:
2669 query['itct'] = ctp
2670 return query
2671
8bdd16b4 2672 @staticmethod
2673 def _extract_next_continuation_data(renderer):
2674 next_continuation = try_get(
2675 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2676 if not next_continuation:
2677 return
2678 continuation = next_continuation.get('continuation')
2679 if not continuation:
2680 return
2681 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2682 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2683
8bdd16b4 2684 @classmethod
2685 def _extract_continuation(cls, renderer):
2686 next_continuation = cls._extract_next_continuation_data(renderer)
2687 if next_continuation:
2688 return next_continuation
cc2db878 2689 contents = []
2690 for key in ('contents', 'items'):
2691 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2692 for content in contents:
2693 if not isinstance(content, dict):
2694 continue
2695 continuation_ep = try_get(
2696 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2697 dict)
2698 if not continuation_ep:
2699 continue
2700 continuation = try_get(
2701 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2702 if not continuation:
2703 continue
2704 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2705 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2706
d069eca7 2707 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2708
70d5c17b 2709 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2710 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2711 for content in contents:
2712 if not isinstance(content, dict):
8bdd16b4 2713 continue
70d5c17b 2714 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2715 if not is_renderer:
70d5c17b 2716 renderer = content.get('richItemRenderer')
3462ffa8 2717 if renderer:
2718 for entry in self._rich_entries(renderer):
2719 yield entry
2720 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2721 continue
3462ffa8 2722 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2723 for isr_content in isr_contents:
2724 if not isinstance(isr_content, dict):
2725 continue
69184e41 2726
2727 known_renderers = {
2728 'playlistVideoListRenderer': self._playlist_entries,
2729 'gridRenderer': self._grid_entries,
2730 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2731 'backstagePostThreadRenderer': self._post_thread_entries,
2732 'videoRenderer': lambda x: [self._video_entry(x)],
2733 }
2734 for key, renderer in isr_content.items():
2735 if key not in known_renderers:
2736 continue
2737 for entry in known_renderers[key](renderer):
2738 if entry:
2739 yield entry
3462ffa8 2740 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2741 break
70d5c17b 2742
3462ffa8 2743 if not continuation_list[0]:
2744 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2745
2746 if not continuation_list[0]:
2747 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2748
2749 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2750 tab_content = try_get(tab, lambda x: x['content'], dict)
2751 if not tab_content:
2752 return
3462ffa8 2753 parent_renderer = (
29f7c58a 2754 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2755 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2756 for entry in extract_entries(parent_renderer):
2757 yield entry
3462ffa8 2758 continuation = continuation_list[0]
8bdd16b4 2759
2760 headers = {
2761 'x-youtube-client-name': '1',
2762 'x-youtube-client-version': '2.20201112.04.01',
2763 }
2764 if identity_token:
2765 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2766
d069eca7
M
2767 if account_syncid:
2768 headers['X-Goog-PageId'] = account_syncid
2769 headers['X-Goog-AuthUser'] = 0
2770
8bdd16b4 2771 for page_num in itertools.count(1):
2772 if not continuation:
2773 break
62bff2c1 2774 retries = self._downloader.params.get('extractor_retries', 3)
2775 count = -1
2776 last_error = None
2777 while count < retries:
2778 count += 1
2779 if last_error:
2780 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2781 try:
a5c56234
M
2782 response = self._call_api(
2783 ep="browse", fatal=True, headers=headers,
2784 video_id='%s page %s' % (item_id, page_num),
2785 query={
2786 'continuation': continuation['continuation'],
2787 'clickTracking': {'clickTrackingParams': continuation['itct']},
2788 },
2789 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2790 except ExtractorError as e:
62bff2c1 2791 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2792 # Downloading page may result in intermittent 5xx HTTP error
2793 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2794 last_error = 'HTTP Error %s' % e.cause.code
2795 if count < retries:
29f7c58a 2796 continue
2797 raise
62bff2c1 2798 else:
62bff2c1 2799 # Youtube sometimes sends incomplete data
2800 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 2801 if dict_get(response,
2802 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 2803 break
f3eaa8dd
M
2804
2805 # Youtube may send alerts if there was an issue with the continuation page
2806 self._extract_alerts(response, expected=False)
2807
2808 last_error = 'Incomplete data received'
c705177d 2809 if count >= retries:
2810 self._downloader.report_error(last_error)
a5c56234
M
2811
2812 if not response:
8bdd16b4 2813 break
ebf1b291 2814
69184e41 2815 known_continuation_renderers = {
2816 'playlistVideoListContinuation': self._playlist_entries,
2817 'gridContinuation': self._grid_entries,
2818 'itemSectionContinuation': self._post_thread_continuation_entries,
2819 'sectionListContinuation': extract_entries, # for feeds
2820 }
8bdd16b4 2821 continuation_contents = try_get(
69184e41 2822 response, lambda x: x['continuationContents'], dict) or {}
2823 continuation_renderer = None
2824 for key, value in continuation_contents.items():
2825 if key not in known_continuation_renderers:
3462ffa8 2826 continue
69184e41 2827 continuation_renderer = value
2828 continuation_list = [None]
2829 for entry in known_continuation_renderers[key](continuation_renderer):
2830 yield entry
2831 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2832 break
2833 if continuation_renderer:
2834 continue
c5e8d7af 2835
a1b535bd 2836 known_renderers = {
2837 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2838 'gridVideoRenderer': (self._grid_entries, 'items'),
2839 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 2840 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 2841 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 2842 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 2843 }
8bdd16b4 2844 continuation_items = try_get(
26fe8ffe 2845 response,
2846 lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2847 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2848 video_items_renderer = None
2849 for key, value in continuation_item.items():
2850 if key not in known_renderers:
8bdd16b4 2851 continue
a1b535bd 2852 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2853 continuation_list = [None]
a1b535bd 2854 for entry in known_renderers[key][0](video_items_renderer):
2855 yield entry
9ba5705a 2856 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2857 break
2858 if video_items_renderer:
2859 continue
8bdd16b4 2860 break
9558dcec 2861
8bdd16b4 2862 @staticmethod
2863 def _extract_selected_tab(tabs):
2864 for tab in tabs:
2865 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2866 return tab['tabRenderer']
2b3c2546 2867 else:
8bdd16b4 2868 raise ExtractorError('Unable to find selected tab')
b82f815f 2869
8bdd16b4 2870 @staticmethod
2871 def _extract_uploader(data):
2872 uploader = {}
2873 sidebar_renderer = try_get(
2874 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2875 if sidebar_renderer:
2876 for item in sidebar_renderer:
2877 if not isinstance(item, dict):
2878 continue
2879 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2880 if not isinstance(renderer, dict):
2881 continue
2882 owner = try_get(
2883 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2884 if owner:
2885 uploader['uploader'] = owner.get('text')
2886 uploader['uploader_id'] = try_get(
2887 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2888 uploader['uploader_url'] = urljoin(
2889 'https://www.youtube.com/',
2890 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2891 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2892
d069eca7 2893 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 2894 playlist_id = title = description = channel_url = channel_name = channel_id = None
2895 thumbnails_list = tags = []
2896
8bdd16b4 2897 selected_tab = self._extract_selected_tab(tabs)
2898 renderer = try_get(
2899 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2900 if renderer:
b60419c5 2901 channel_name = renderer.get('title')
2902 channel_url = renderer.get('channelUrl')
2903 channel_id = renderer.get('externalId')
64c0d954 2904
64c0d954 2905 if not renderer:
2906 renderer = try_get(
2907 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2908 if renderer:
2909 title = renderer.get('title')
ecc97af3 2910 description = renderer.get('description', '')
b60419c5 2911 playlist_id = channel_id
2912 tags = renderer.get('keywords', '').split()
2913 thumbnails_list = (
2914 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2915 or try_get(
2916 data,
2917 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2918 list)
b60419c5 2919 or [])
2920
2921 thumbnails = []
2922 for t in thumbnails_list:
2923 if not isinstance(t, dict):
2924 continue
2925 thumbnail_url = url_or_none(t.get('url'))
2926 if not thumbnail_url:
2927 continue
2928 thumbnails.append({
2929 'url': thumbnail_url,
2930 'width': int_or_none(t.get('width')),
2931 'height': int_or_none(t.get('height')),
2932 })
64c0d954 2933
3462ffa8 2934 if playlist_id is None:
70d5c17b 2935 playlist_id = item_id
2936 if title is None:
b60419c5 2937 title = playlist_id
2938 title += format_field(selected_tab, 'title', ' - %s')
2939
2940 metadata = {
2941 'playlist_id': playlist_id,
2942 'playlist_title': title,
2943 'playlist_description': description,
2944 'uploader': channel_name,
2945 'uploader_id': channel_id,
2946 'uploader_url': channel_url,
2947 'thumbnails': thumbnails,
2948 'tags': tags,
2949 }
2950 if not channel_id:
2951 metadata.update(self._extract_uploader(data))
2952 metadata.update({
2953 'channel': metadata['uploader'],
2954 'channel_id': metadata['uploader_id'],
2955 'channel_url': metadata['uploader_url']})
2956 return self.playlist_result(
d069eca7
M
2957 self._entries(
2958 selected_tab, playlist_id,
2959 self._extract_identity_token(webpage, item_id),
2960 self._extract_account_syncid(data)),
b60419c5 2961 **metadata)
73c4ac2c 2962
cd7c66cf 2963 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 2964 first_id = last_id = None
2965 for page_num in itertools.count(1):
cd7c66cf 2966 videos = list(self._playlist_entries(playlist))
2967 if not videos:
2968 return
2be71994 2969 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
2970 if start >= len(videos):
2971 return
2972 for video in videos[start:]:
2973 if video['id'] == first_id:
2974 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
2975 return
2976 yield video
2977 first_id = first_id or videos[0]['id']
2978 last_id = videos[-1]['id']
cd7c66cf 2979
cd7c66cf 2980 _, data = self._extract_webpage(
2be71994 2981 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 2982 '%s page %d' % (playlist_id, page_num))
2983 playlist = try_get(
2984 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2985
29f7c58a 2986 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2987 title = playlist.get('title') or try_get(
2988 data, lambda x: x['titleText']['simpleText'], compat_str)
2989 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 2990
2991 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 2992 playlist_url = urljoin(url, try_get(
2993 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2994 compat_str))
2995 if playlist_url and playlist_url != url:
2996 return self.url_result(
2997 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2998 video_title=title)
cd7c66cf 2999
8bdd16b4 3000 return self.playlist_result(
cd7c66cf 3001 self._extract_mix_playlist(playlist, playlist_id),
3002 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3003
f3eaa8dd
M
3004 def _extract_alerts(self, data, expected=False):
3005
3006 def _real_extract_alerts():
3007 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3008 if not isinstance(alert_dict, dict):
02ced43c 3009 continue
f3eaa8dd
M
3010 for alert in alert_dict.values():
3011 alert_type = alert.get('type')
3012 if not alert_type:
3013 continue
3014 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3015 if message:
3016 yield alert_type, message
f3eaa8dd
M
3017 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3018 message = try_get(run, lambda x: x['text'], compat_str)
3019 if message:
3020 yield alert_type, message
3021
3022 err_msg = None
3023 for alert_type, alert_message in _real_extract_alerts():
3024 if alert_type.lower() == 'error':
3025 if err_msg:
3026 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3027 err_msg = alert_message
3028 else:
3029 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3030
3031 if err_msg:
3032 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3033
29f7c58a 3034 def _extract_identity_token(self, webpage, item_id):
3035 ytcfg = self._extract_ytcfg(item_id, webpage)
3036 if ytcfg:
3037 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3038 if token:
3039 return token
3040 return self._search_regex(
3041 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3042 'identity token', default=None)
3043
d069eca7
M
3044 @staticmethod
3045 def _extract_account_syncid(data):
3046 """Extract syncId required to download private playlists of secondary channels"""
3047 sync_ids = (
3048 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
3049 or '').split("||")
3050 if len(sync_ids) >= 2 and sync_ids[1]:
3051 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
3052 # and just "user_syncid||" for primary channel. We only want the channel_syncid
3053 return sync_ids[0]
3054
cd7c66cf 3055 def _extract_webpage(self, url, item_id):
62bff2c1 3056 retries = self._downloader.params.get('extractor_retries', 3)
3057 count = -1
c705177d 3058 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3059 while count < retries:
62bff2c1 3060 count += 1
14fdfea9 3061 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3062 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3063 if count:
c705177d 3064 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3065 webpage = self._download_webpage(
3066 url, item_id,
cd7c66cf 3067 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3068 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3069 self._extract_alerts(data, expected=True)
14fdfea9 3070 if data.get('contents') or data.get('currentVideoEndpoint'):
3071 break
c705177d 3072 if count >= retries:
3073 self._downloader.report_error(last_error)
cd7c66cf 3074 return webpage, data
3075
3076 def _real_extract(self, url):
3077 item_id = self._match_id(url)
3078 url = compat_urlparse.urlunparse(
3079 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3080
3081 # This is not matched in a channel page with a tab selected
3082 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3083 mobj = mobj.groupdict() if mobj else {}
3084 if mobj and not mobj.get('not_channel'):
3085 self._downloader.report_warning(
3086 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3087 'To download only the videos in the home page, add a "/featured" to the URL')
3088 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3089
3090 # Handle both video/playlist URLs
3091 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3092 video_id = qs.get('v', [None])[0]
3093 playlist_id = qs.get('list', [None])[0]
3094
3095 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3096 if not playlist_id:
3097 # If there is neither video or playlist ids,
3098 # youtube redirects to home page, which is undesirable
3099 raise ExtractorError('Unable to recognize tab page')
3100 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3101 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3102
3103 if video_id and playlist_id:
3104 if self._downloader.params.get('noplaylist'):
3105 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3106 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3107 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3108
3109 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3110
8bdd16b4 3111 tabs = try_get(
3112 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3113 if tabs:
d069eca7 3114 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3115
8bdd16b4 3116 playlist = try_get(
3117 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3118 if playlist:
29f7c58a 3119 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3120
a0566bbf 3121 video_id = try_get(
3122 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3123 compat_str) or video_id
8bdd16b4 3124 if video_id:
cd7c66cf 3125 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3126 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3127
8bdd16b4 3128 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3129
c5e8d7af 3130
8bdd16b4 3131class YoutubePlaylistIE(InfoExtractor):
3132 IE_DESC = 'YouTube.com playlists'
3133 _VALID_URL = r'''(?x)(?:
3134 (?:https?://)?
3135 (?:\w+\.)?
3136 (?:
3137 (?:
3138 youtube(?:kids)?\.com|
29f7c58a 3139 invidio\.us
8bdd16b4 3140 )
3141 /.*?\?.*?\blist=
3142 )?
3143 (?P<id>%(playlist_id)s)
3144 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3145 IE_NAME = 'youtube:playlist'
cdc628a4 3146 _TESTS = [{
8bdd16b4 3147 'note': 'issue #673',
3148 'url': 'PLBB231211A4F62143',
cdc628a4 3149 'info_dict': {
8bdd16b4 3150 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3151 'id': 'PLBB231211A4F62143',
3152 'uploader': 'Wickydoo',
3153 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3154 },
3155 'playlist_mincount': 29,
3156 }, {
3157 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3158 'info_dict': {
3159 'title': 'YDL_safe_search',
3160 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3161 },
3162 'playlist_count': 2,
3163 'skip': 'This playlist is private',
9558dcec 3164 }, {
8bdd16b4 3165 'note': 'embedded',
3166 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3167 'playlist_count': 4,
9558dcec 3168 'info_dict': {
8bdd16b4 3169 'title': 'JODA15',
3170 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3171 'uploader': 'milan',
3172 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3173 }
cdc628a4 3174 }, {
8bdd16b4 3175 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3176 'playlist_mincount': 982,
3177 'info_dict': {
3178 'title': '2018 Chinese New Singles (11/6 updated)',
3179 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3180 'uploader': 'LBK',
3181 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3182 }
daa0df9e 3183 }, {
29f7c58a 3184 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3185 'only_matching': True,
3186 }, {
3187 # music album playlist
3188 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3189 'only_matching': True,
3190 }]
3191
3192 @classmethod
3193 def suitable(cls, url):
3194 return False if YoutubeTabIE.suitable(url) else super(
3195 YoutubePlaylistIE, cls).suitable(url)
3196
3197 def _real_extract(self, url):
3198 playlist_id = self._match_id(url)
3199 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3200 if not qs:
3201 qs = {'list': playlist_id}
3202 return self.url_result(
3203 update_url_query('https://www.youtube.com/playlist', qs),
3204 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3205
3206
3207class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3208 IE_DESC = 'youtu.be'
29f7c58a 3209 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3210 _TESTS = [{
8bdd16b4 3211 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3212 'info_dict': {
3213 'id': 'yeWKywCrFtk',
3214 'ext': 'mp4',
3215 'title': 'Small Scale Baler and Braiding Rugs',
3216 'uploader': 'Backus-Page House Museum',
3217 'uploader_id': 'backuspagemuseum',
3218 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3219 'upload_date': '20161008',
3220 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3221 'categories': ['Nonprofits & Activism'],
3222 'tags': list,
3223 'like_count': int,
3224 'dislike_count': int,
3225 },
3226 'params': {
3227 'noplaylist': True,
3228 'skip_download': True,
3229 },
39e7107d 3230 }, {
8bdd16b4 3231 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3232 'only_matching': True,
cdc628a4
PH
3233 }]
3234
8bdd16b4 3235 def _real_extract(self, url):
29f7c58a 3236 mobj = re.match(self._VALID_URL, url)
3237 video_id = mobj.group('id')
3238 playlist_id = mobj.group('playlist_id')
8bdd16b4 3239 return self.url_result(
29f7c58a 3240 update_url_query('https://www.youtube.com/watch', {
3241 'v': video_id,
3242 'list': playlist_id,
3243 'feature': 'youtu.be',
3244 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3245
3246
3247class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3248 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3249 _VALID_URL = r'ytuser:(?P<id>.+)'
3250 _TESTS = [{
3251 'url': 'ytuser:phihag',
3252 'only_matching': True,
3253 }]
3254
3255 def _real_extract(self, url):
3256 user_id = self._match_id(url)
3257 return self.url_result(
3258 'https://www.youtube.com/user/%s' % user_id,
3259 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3260
b05654f0 3261
3d3dddc9 3262class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3263 IE_NAME = 'youtube:favorites'
3264 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3265 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3266 _LOGIN_REQUIRED = True
3267 _TESTS = [{
3268 'url': ':ytfav',
3269 'only_matching': True,
3270 }, {
3271 'url': ':ytfavorites',
3272 'only_matching': True,
3273 }]
3274
3275 def _real_extract(self, url):
3276 return self.url_result(
3277 'https://www.youtube.com/playlist?list=LL',
3278 ie=YoutubeTabIE.ie_key())
3279
3280
8bdd16b4 3281class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3282 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3283 # there doesn't appear to be a real limit, for example if you search for
3284 # 'python' you get more than 8.000.000 results
3285 _MAX_RESULTS = float('inf')
78caa52a 3286 IE_NAME = 'youtube:search'
b05654f0 3287 _SEARCH_KEY = 'ytsearch'
6c894ea1 3288 _SEARCH_PARAMS = None
9dd8e46a 3289 _TESTS = []
b05654f0 3290
6c894ea1 3291 def _entries(self, query, n):
a5c56234 3292 data = {'query': query}
6c894ea1
U
3293 if self._SEARCH_PARAMS:
3294 data['params'] = self._SEARCH_PARAMS
3295 total = 0
3296 for page_num in itertools.count(1):
a5c56234
M
3297 search = self._call_api(
3298 ep='search', video_id='query "%s"' % query, fatal=False,
3299 note='Downloading page %s' % page_num, query=data)
6c894ea1 3300 if not search:
b4c08069 3301 break
6c894ea1
U
3302 slr_contents = try_get(
3303 search,
3304 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3305 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3306 list)
3307 if not slr_contents:
a22b2fd1 3308 break
0366ae87 3309
0366ae87
M
3310 # Youtube sometimes adds promoted content to searches,
3311 # changing the index location of videos and token.
3312 # So we search through all entries till we find them.
30a074c2 3313 continuation_token = None
3314 for slr_content in slr_contents:
a96c6d15 3315 if continuation_token is None:
3316 continuation_token = try_get(
3317 slr_content,
3318 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3319 compat_str)
3320
30a074c2 3321 isr_contents = try_get(
3322 slr_content,
3323 lambda x: x['itemSectionRenderer']['contents'],
3324 list)
9da76d30 3325 if not isr_contents:
30a074c2 3326 continue
3327 for content in isr_contents:
3328 if not isinstance(content, dict):
3329 continue
3330 video = content.get('videoRenderer')
3331 if not isinstance(video, dict):
3332 continue
3333 video_id = video.get('videoId')
3334 if not video_id:
3335 continue
3336
3337 yield self._extract_video(video)
3338 total += 1
3339 if total == n:
3340 return
0366ae87 3341
0366ae87 3342 if not continuation_token:
6c894ea1 3343 break
0366ae87 3344 data['continuation'] = continuation_token
b05654f0 3345
6c894ea1
U
3346 def _get_n_results(self, query, n):
3347 """Get a specified number of results for a query"""
3348 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3349
c9ae7b95 3350
a3dd9248 3351class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3352 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3353 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3354 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3355 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3356
c9ae7b95 3357
386e1dd9 3358class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3359 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3360 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3361 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3362 # _MAX_RESULTS = 100
3462ffa8 3363 _TESTS = [{
3364 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3365 'playlist_mincount': 5,
3366 'info_dict': {
3367 'title': 'youtube-dl test video',
3368 }
3369 }, {
3370 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3371 'only_matching': True,
3372 }]
3373
386e1dd9 3374 @classmethod
3375 def _make_valid_url(cls):
3376 return cls._VALID_URL
3377
3462ffa8 3378 def _real_extract(self, url):
386e1dd9 3379 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3380 query = (qs.get('search_query') or qs.get('q'))[0]
3381 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3382 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3383
3384
3385class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3386 """
25f14e9f 3387 Base class for feed extractors
3d3dddc9 3388 Subclasses must define the _FEED_NAME property.
d7ae0639 3389 """
b2e8bc1b 3390 _LOGIN_REQUIRED = True
ef2f3c7f 3391 _TESTS = []
d7ae0639
JMF
3392
3393 @property
3394 def IE_NAME(self):
78caa52a 3395 return 'youtube:%s' % self._FEED_NAME
04cc9617 3396
81f0259b 3397 def _real_initialize(self):
b2e8bc1b 3398 self._login()
81f0259b 3399
3853309f 3400 def _real_extract(self, url):
3d3dddc9 3401 return self.url_result(
3402 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3403 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3404
3405
ef2f3c7f 3406class YoutubeWatchLaterIE(InfoExtractor):
3407 IE_NAME = 'youtube:watchlater'
70d5c17b 3408 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3409 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3410 _TESTS = [{
8bdd16b4 3411 'url': ':ytwatchlater',
bc7a9cd8
S
3412 'only_matching': True,
3413 }]
25f14e9f
S
3414
3415 def _real_extract(self, url):
ef2f3c7f 3416 return self.url_result(
3417 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3418
3419
25f14e9f
S
3420class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3421 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3422 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3423 _FEED_NAME = 'recommended'
3d3dddc9 3424 _TESTS = [{
3425 'url': ':ytrec',
3426 'only_matching': True,
3427 }, {
3428 'url': ':ytrecommended',
3429 'only_matching': True,
3430 }, {
3431 'url': 'https://youtube.com',
3432 'only_matching': True,
3433 }]
1ed5b5c9 3434
1ed5b5c9 3435
25f14e9f 3436class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3437 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3438 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3439 _FEED_NAME = 'subscriptions'
3d3dddc9 3440 _TESTS = [{
3441 'url': ':ytsubs',
3442 'only_matching': True,
3443 }, {
3444 'url': ':ytsubscriptions',
3445 'only_matching': True,
3446 }]
1ed5b5c9 3447
1ed5b5c9 3448
25f14e9f 3449class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3450 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3451 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3452 _FEED_NAME = 'history'
3d3dddc9 3453 _TESTS = [{
3454 'url': ':ythistory',
3455 'only_matching': True,
3456 }]
1ed5b5c9
JMF
3457
3458
15870e90
PH
3459class YoutubeTruncatedURLIE(InfoExtractor):
3460 IE_NAME = 'youtube:truncated_url'
3461 IE_DESC = False # Do not list
975d35db 3462 _VALID_URL = r'''(?x)
b95aab84
PH
3463 (?:https?://)?
3464 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3465 (?:watch\?(?:
c4808c60 3466 feature=[a-z_]+|
b95aab84
PH
3467 annotation_id=annotation_[^&]+|
3468 x-yt-cl=[0-9]+|
c1708b89 3469 hl=[^&]*|
287be8c6 3470 t=[0-9]+
b95aab84
PH
3471 )?
3472 |
3473 attribution_link\?a=[^&]+
3474 )
3475 $
975d35db 3476 '''
15870e90 3477
c4808c60 3478 _TESTS = [{
2d3d2997 3479 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3480 'only_matching': True,
dc2fc736 3481 }, {
2d3d2997 3482 'url': 'https://www.youtube.com/watch?',
dc2fc736 3483 'only_matching': True,
b95aab84
PH
3484 }, {
3485 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3486 'only_matching': True,
3487 }, {
3488 'url': 'https://www.youtube.com/watch?feature=foo',
3489 'only_matching': True,
c1708b89
PH
3490 }, {
3491 'url': 'https://www.youtube.com/watch?hl=en-GB',
3492 'only_matching': True,
287be8c6
PH
3493 }, {
3494 'url': 'https://www.youtube.com/watch?t=2372',
3495 'only_matching': True,
c4808c60
PH
3496 }]
3497
15870e90
PH
3498 def _real_extract(self, url):
3499 raise ExtractorError(
78caa52a
PH
3500 'Did you forget to quote the URL? Remember that & is a meta '
3501 'character in most shells, so you want to put the URL in quotes, '
3867038a 3502 'like youtube-dl '
2d3d2997 3503 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3504 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3505 expected=True)
772fd5cc
PH
3506
3507
3508class YoutubeTruncatedIDIE(InfoExtractor):
3509 IE_NAME = 'youtube:truncated_id'
3510 IE_DESC = False # Do not list
b95aab84 3511 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3512
3513 _TESTS = [{
3514 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3515 'only_matching': True,
3516 }]
3517
3518 def _real_extract(self, url):
3519 video_id = self._match_id(url)
3520 raise ExtractorError(
3521 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3522 expected=True)