]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Fix history, trending and mix playlists (#136)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c5e8d7af 28 clean_html,
c5e8d7af 29 ExtractorError,
b60419c5 30 format_field,
2d30521a 31 float_or_none,
dd27fd17 32 int_or_none,
94278f72 33 mimetype2ext,
6310acf5 34 parse_codecs,
7c80519c 35 parse_duration,
dca3ff4a 36 qualities,
3995d37d 37 remove_start,
cf7e015f 38 smuggle_url,
dbdaaa23 39 str_or_none,
c93d53f5 40 str_to_int,
556dbe7f 41 try_get,
c5e8d7af
PH
42 unescapeHTML,
43 unified_strdate,
cf7e015f 44 unsmuggle_url,
8bdd16b4 45 update_url_query,
21c340b8 46 url_or_none,
6e6bc8da 47 urlencode_postdata,
8bdd16b4 48 urljoin,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
56
57 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
58 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
59 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 60
3462ffa8 61 _RESERVED_NAMES = (
cd7c66cf 62 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
63 r'movies|results|shared|hashtag|trending|feed|feeds|'
64 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 65
b2e8bc1b
JMF
66 _NETRC_MACHINE = 'youtube'
67 # If True it will raise an error if no login info is provided
68 _LOGIN_REQUIRED = False
69
70d5c17b 70 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 71
25f14e9f
S
72 def _ids_to_results(self, ids):
73 return [
74 self.url_result(vid_id, 'Youtube', video_id=vid_id)
75 for vid_id in ids]
76
b2e8bc1b 77 def _login(self):
83317f69 78 """
79 Attempt to log in to YouTube.
80 True is returned if successful or skipped.
81 False is returned if login failed.
82
83 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
84 """
68217024 85 username, password = self._get_login_info()
b2e8bc1b
JMF
86 # No authentication to be performed
87 if username is None:
70d35d16 88 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 89 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 90 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
91 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 92 return True
b2e8bc1b 93
7cc3570e
PH
94 login_page = self._download_webpage(
95 self._LOGIN_URL, None,
69ea8ca4
PH
96 note='Downloading login page',
97 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
98 if login_page is False:
99 return
b2e8bc1b 100
1212e997 101 login_form = self._hidden_inputs(login_page)
c5e8d7af 102
e00eb564
S
103 def req(url, f_req, note, errnote):
104 data = login_form.copy()
105 data.update({
106 'pstMsg': 1,
107 'checkConnection': 'youtube',
108 'checkedDomains': 'youtube',
109 'hl': 'en',
110 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 111 'f.req': json.dumps(f_req),
e00eb564
S
112 'flowName': 'GlifWebSignIn',
113 'flowEntry': 'ServiceLogin',
baf67a60
S
114 # TODO: reverse actual botguard identifier generation algo
115 'bgRequest': '["identifier",""]',
041bc3ad 116 })
e00eb564
S
117 return self._download_json(
118 url, None, note=note, errnote=errnote,
119 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 fatal=False,
121 data=urlencode_postdata(data), headers={
122 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
123 'Google-Accounts-XSRF': 1,
124 })
125
3995d37d
S
126 def warn(message):
127 self._downloader.report_warning(message)
128
129 lookup_req = [
130 username,
131 None, [], None, 'US', None, None, 2, False, True,
132 [
133 None, None,
134 [2, 1, None, 1,
135 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 None, [], 4],
137 1, [None, None, []], None, None, None, True
138 ],
139 username,
140 ]
141
e00eb564 142 lookup_results = req(
3995d37d 143 self._LOOKUP_URL, lookup_req,
e00eb564
S
144 'Looking up account info', 'Unable to look up account info')
145
146 if lookup_results is False:
147 return False
041bc3ad 148
3995d37d
S
149 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 if not user_hash:
151 warn('Unable to extract user hash')
152 return False
153
154 challenge_req = [
155 user_hash,
156 None, 1, None, [1, None, None, None, [password, None, True]],
157 [
158 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ]]
83317f69 161
3995d37d
S
162 challenge_results = req(
163 self._CHALLENGE_URL, challenge_req,
164 'Logging in', 'Unable to log in')
83317f69 165
3995d37d 166 if challenge_results is False:
e00eb564 167 return
83317f69 168
3995d37d
S
169 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 if login_res:
171 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 warn(
173 'Unable to login: %s' % 'Invalid password'
174 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
175 return False
176
177 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 if not res:
179 warn('Unable to extract result entry')
180 return False
181
9a6628aa
S
182 login_challenge = try_get(res, lambda x: x[0][0], list)
183 if login_challenge:
184 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
185 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
186 # SEND_SUCCESS - TFA code has been successfully sent to phone
187 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 188 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
189 if status == 'QUOTA_EXCEEDED':
190 warn('Exceeded the limit of TFA codes, try later')
191 return False
192
193 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 if not tl:
195 warn('Unable to extract TL')
196 return False
197
198 tfa_code = self._get_tfa_info('2-step verification code')
199
200 if not tfa_code:
201 warn(
202 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
203 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
204 return False
205
206 tfa_code = remove_start(tfa_code, 'G-')
207
208 tfa_req = [
209 user_hash, None, 2, None,
210 [
211 9, None, None, None, None, None, None, None,
212 [None, tfa_code, True, 2]
213 ]]
214
215 tfa_results = req(
216 self._TFA_URL.format(tl), tfa_req,
217 'Submitting TFA code', 'Unable to submit TFA code')
218
219 if tfa_results is False:
220 return False
221
222 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 if tfa_res:
224 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 warn(
226 'Unable to finish TFA: %s' % 'Invalid TFA code'
227 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
228 return False
229
230 check_cookie_url = try_get(
231 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
232 else:
233 CHALLENGES = {
234 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
235 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
236 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 }
238 challenge = CHALLENGES.get(
239 challenge_str,
240 '%s returned error %s.' % (self.IE_NAME, challenge_str))
241 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
242 return False
3995d37d
S
243 else:
244 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245
246 if not check_cookie_url:
247 warn('Unable to extract CheckCookie URL')
248 return False
e00eb564
S
249
250 check_cookie_results = self._download_webpage(
3995d37d
S
251 check_cookie_url, None, 'Checking cookie', fatal=False)
252
253 if check_cookie_results is False:
254 return False
e00eb564 255
3995d37d
S
256 if 'https://myaccount.google.com/' not in check_cookie_results:
257 warn('Unable to log in')
b2e8bc1b 258 return False
e00eb564 259
b2e8bc1b
JMF
260 return True
261
30226342 262 def _download_webpage_handle(self, *args, **kwargs):
c1148516 263 query = kwargs.get('query', {}).copy()
c1148516 264 kwargs['query'] = query
30226342 265 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
266 *args, **compat_kwargs(kwargs))
267
b2e8bc1b
JMF
268 def _real_initialize(self):
269 if self._downloader is None:
270 return
b2e8bc1b
JMF
271 if not self._login():
272 return
c5e8d7af 273
8bdd16b4 274 _DEFAULT_API_DATA = {
275 'context': {
276 'client': {
277 'clientName': 'WEB',
a5c56234 278 'clientVersion': '2.20210301.08.00',
8bdd16b4 279 }
280 },
281 }
8377574c 282
a0566bbf 283 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 284 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
285 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 286
a5c56234
M
287 def _generate_sapisidhash_header(self):
288 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
289 if sapisid_cookie is None:
290 return
291 time_now = round(time.time())
292 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
293 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
294
295 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
296 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 297 data = self._DEFAULT_API_DATA.copy()
298 data.update(query)
a5c56234
M
299 headers = headers or {}
300 headers.update({'content-type': 'application/json'})
301 auth = self._generate_sapisidhash_header()
302 if auth is not None:
303 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
9833e7a0 304
545cc85d 305 return self._download_json(
a5c56234
M
306 'https://www.youtube.com/youtubei/v1/%s' % ep,
307 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
308 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 309 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 310
8bdd16b4 311 def _extract_yt_initial_data(self, video_id, webpage):
312 return self._parse_json(
313 self._search_regex(
29f7c58a 314 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 315 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 316 video_id)
0c148415 317
29f7c58a 318 def _extract_ytcfg(self, video_id, webpage):
319 return self._parse_json(
320 self._search_regex(
321 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
322 default='{}'), video_id, fatal=False)
323
30a074c2 324 def _extract_video(self, renderer):
325 video_id = renderer.get('videoId')
326 title = try_get(
327 renderer,
328 (lambda x: x['title']['runs'][0]['text'],
329 lambda x: x['title']['simpleText']), compat_str)
330 description = try_get(
331 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
332 compat_str)
333 duration = parse_duration(try_get(
334 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
335 view_count_text = try_get(
336 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
337 view_count = str_to_int(self._search_regex(
338 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
339 'view count', default=None))
340 uploader = try_get(
bc2ca1bb 341 renderer,
342 (lambda x: x['ownerText']['runs'][0]['text'],
343 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 344 return {
345 '_type': 'url_transparent',
346 'ie_key': YoutubeIE.ie_key(),
347 'id': video_id,
348 'url': video_id,
349 'title': title,
350 'description': description,
351 'duration': duration,
352 'view_count': view_count,
353 'uploader': uploader,
354 }
355
0c148415 356
360e1ca5 357class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 358 IE_DESC = 'YouTube.com'
bc2ca1bb 359 _INVIDIOUS_SITES = (
360 # invidious-redirect websites
361 r'(?:www\.)?redirect\.invidious\.io',
362 r'(?:(?:www|dev)\.)?invidio\.us',
363 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
364 r'(?:www\.)?invidious\.pussthecat\.org',
365 r'(?:www\.)?invidious\.048596\.xyz',
366 r'(?:www\.)?invidious\.zee\.li',
367 r'(?:www\.)?vid\.puffyan\.us',
368 r'(?:(?:www|au)\.)?ytprivate\.com',
369 r'(?:www\.)?invidious\.namazso\.eu',
370 r'(?:www\.)?invidious\.ethibox\.fr',
371 r'(?:www\.)?inv\.skyn3t\.in',
372 r'(?:www\.)?invidious\.himiko\.cloud',
373 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
374 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
375 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
376 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
377 # youtube-dl invidious instances list
378 r'(?:(?:www|no)\.)?invidiou\.sh',
379 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
380 r'(?:www\.)?invidious\.kabi\.tk',
381 r'(?:www\.)?invidious\.13ad\.de',
382 r'(?:www\.)?invidious\.mastodon\.host',
383 r'(?:www\.)?invidious\.zapashcanon\.fr',
384 r'(?:www\.)?invidious\.kavin\.rocks',
385 r'(?:www\.)?invidious\.tube',
386 r'(?:www\.)?invidiou\.site',
387 r'(?:www\.)?invidious\.site',
388 r'(?:www\.)?invidious\.xyz',
389 r'(?:www\.)?invidious\.nixnet\.xyz',
390 r'(?:www\.)?invidious\.drycat\.fr',
391 r'(?:www\.)?tube\.poal\.co',
392 r'(?:www\.)?tube\.connect\.cafe',
393 r'(?:www\.)?vid\.wxzm\.sx',
394 r'(?:www\.)?vid\.mint\.lgbt',
395 r'(?:www\.)?yewtu\.be',
396 r'(?:www\.)?yt\.elukerio\.org',
397 r'(?:www\.)?yt\.lelux\.fi',
398 r'(?:www\.)?invidious\.ggc-project\.de',
399 r'(?:www\.)?yt\.maisputain\.ovh',
400 r'(?:www\.)?invidious\.toot\.koeln',
401 r'(?:www\.)?invidious\.fdn\.fr',
402 r'(?:www\.)?watch\.nettohikari\.com',
403 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
404 r'(?:www\.)?qklhadlycap4cnod\.onion',
405 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
406 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
407 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
408 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
409 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
410 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
411 )
cb7dfeea 412 _VALID_URL = r"""(?x)^
c5e8d7af 413 (
edb53e2d 414 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 415 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
416 (?:www\.)?deturl\.com/www\.youtube\.com|
417 (?:www\.)?pwnyoutube\.com|
418 (?:www\.)?hooktube\.com|
419 (?:www\.)?yourepeat\.com|
420 tube\.majestyc\.net|
421 %(invidious)s|
422 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
423 (?:.*?\#/)? # handle anchor (#/) redirect urls
424 (?: # the various things that can precede the ID:
ac7553d0 425 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 426 |(?: # or the v= param in all its forms
f7000f3a 427 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 428 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 429 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
430 v=
431 )
f4b05232 432 ))
cbaed4bb
S
433 |(?:
434 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
435 vid\.plus| # or vid.plus/xxxx
436 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 437 %(invidious)s
cbaed4bb 438 )/
edb53e2d 439 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 440 )
c5e8d7af 441 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 442 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
443 (?!.*?\blist=
444 (?:
445 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
446 WL # WL are handled by the watch later IE
447 )
448 )
c5e8d7af 449 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 450 $""" % {
451 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
452 'invidious': '|'.join(_INVIDIOUS_SITES),
453 }
e40c758c 454 _PLAYER_INFO_RE = (
cc2db878 455 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
456 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 457 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 458 )
2c62dc26 459 _formats = {
c2d3cb4c 460 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
461 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
462 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
463 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
464 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
465 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
466 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
467 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 468 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 469 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
470 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
471 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
472 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
473 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
474 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 475 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 476 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
477 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 478
479
480 # 3D videos
c2d3cb4c 481 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
482 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
483 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
484 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 485 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
486 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
487 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 488
96fb5605 489 # Apple HTTP Live Streaming
11f12195 490 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 491 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
492 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
493 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
494 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
495 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 496 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
497 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
498
499 # DASH mp4 video
d23028a8
S
500 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
501 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
502 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
503 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
504 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 505 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
506 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
507 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
508 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
509 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
510 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
511 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 512
f6f1fc92 513 # Dash mp4 audio
d23028a8
S
514 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
515 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
516 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
517 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
518 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
519 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
520 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
521
522 # Dash webm
d23028a8
S
523 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
524 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
525 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
526 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
527 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
528 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
529 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
530 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
532 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
534 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
536 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
537 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 538 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
539 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
540 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
541 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
542 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
543 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
544 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
545
546 # Dash webm audio
d23028a8
S
547 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
548 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 549
0857baad 550 # Dash webm audio with opus inside
d23028a8
S
551 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
552 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
553 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 554
ce6b9a2d
PH
555 # RTMP (unnamed)
556 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
557
558 # av01 video only formats sometimes served with "unknown" codecs
559 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
560 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
561 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
562 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 563 }
29f7c58a 564 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 565
fd5c4aab
S
566 _GEO_BYPASS = False
567
78caa52a 568 IE_NAME = 'youtube'
2eb88d95
PH
569 _TESTS = [
570 {
2d3d2997 571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
572 'info_dict': {
573 'id': 'BaW_jenozKc',
574 'ext': 'mp4',
3867038a 575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
579 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
580 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 581 'upload_date': '20121002',
3867038a 582 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 583 'categories': ['Science & Technology'],
3867038a 584 'tags': ['youtube-dl'],
556dbe7f 585 'duration': 10,
dbdaaa23 586 'view_count': int,
3e7c1224
PH
587 'like_count': int,
588 'dislike_count': int,
7c80519c 589 'start_time': 1,
297a564b 590 'end_time': 9,
2eb88d95 591 }
0e853ca4 592 },
fccd3771 593 {
4bc3a23e
PH
594 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
595 'note': 'Embed-only video (#1746)',
596 'info_dict': {
597 'id': 'yZIXLfi8CZQ',
598 'ext': 'mp4',
599 'upload_date': '20120608',
600 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
601 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
602 'uploader': 'SET India',
94bfcd23 603 'uploader_id': 'setindia',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 605 'age_limit': 18,
545cc85d 606 },
607 'skip': 'Private video',
fccd3771 608 },
11b56058 609 {
8bdd16b4 610 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
611 'note': 'Use the first video ID in the URL',
612 'info_dict': {
613 'id': 'BaW_jenozKc',
614 'ext': 'mp4',
3867038a 615 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
616 'uploader': 'Philipp Hagemeister',
617 'uploader_id': 'phihag',
ec85ded8 618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 619 'upload_date': '20121002',
3867038a 620 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 621 'categories': ['Science & Technology'],
3867038a 622 'tags': ['youtube-dl'],
556dbe7f 623 'duration': 10,
dbdaaa23 624 'view_count': int,
11b56058
PM
625 'like_count': int,
626 'dislike_count': int,
34a7de29
S
627 },
628 'params': {
629 'skip_download': True,
630 },
11b56058 631 },
dd27fd17 632 {
2d3d2997 633 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
634 'note': '256k DASH audio (format 141) via DASH manifest',
635 'info_dict': {
636 'id': 'a9LDPn-MO4I',
637 'ext': 'm4a',
638 'upload_date': '20121002',
639 'uploader_id': '8KVIDEO',
ec85ded8 640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
641 'description': '',
642 'uploader': '8KVIDEO',
643 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 644 },
4bc3a23e
PH
645 'params': {
646 'youtube_include_dash_manifest': True,
647 'format': '141',
4919603f 648 },
de3c7fe0 649 'skip': 'format 141 not served anymore',
dd27fd17 650 },
8bdd16b4 651 # DASH manifest with encrypted signature
652 {
653 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
654 'info_dict': {
655 'id': 'IB3lcPjvWLA',
656 'ext': 'm4a',
657 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
658 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
659 'duration': 244,
660 'uploader': 'AfrojackVEVO',
661 'uploader_id': 'AfrojackVEVO',
662 'upload_date': '20131011',
cc2db878 663 'abr': 129.495,
8bdd16b4 664 },
665 'params': {
666 'youtube_include_dash_manifest': True,
667 'format': '141/bestaudio[ext=m4a]',
668 },
669 },
aa79ac0c
PH
670 # Controversy video
671 {
672 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
673 'info_dict': {
674 'id': 'T4XJQO3qol8',
675 'ext': 'mp4',
556dbe7f 676 'duration': 219,
aa79ac0c 677 'upload_date': '20100909',
4fe54c12 678 'uploader': 'Amazing Atheist',
aa79ac0c 679 'uploader_id': 'TheAmazingAtheist',
ec85ded8 680 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 681 'title': 'Burning Everyone\'s Koran',
545cc85d 682 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 683 }
c522adb1 684 },
dd2d55f1 685 # Normal age-gate video (embed allowed)
c522adb1 686 {
2d3d2997 687 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
688 'info_dict': {
689 'id': 'HtVdAasjOgU',
690 'ext': 'mp4',
691 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 692 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 693 'duration': 142,
c522adb1
JMF
694 'uploader': 'The Witcher',
695 'uploader_id': 'WitcherGame',
ec85ded8 696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 697 'upload_date': '20140605',
34952f09 698 'age_limit': 18,
c522adb1
JMF
699 },
700 },
8bdd16b4 701 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
702 # YouTube Red ad is not captured for creator
703 {
704 'url': '__2ABJjxzNo',
705 'info_dict': {
706 'id': '__2ABJjxzNo',
707 'ext': 'mp4',
708 'duration': 266,
709 'upload_date': '20100430',
710 'uploader_id': 'deadmau5',
711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 712 'creator': 'deadmau5',
713 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 714 'uploader': 'deadmau5',
715 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 716 'alt_title': 'Some Chords',
8bdd16b4 717 },
718 'expected_warnings': [
719 'DASH manifest missing',
720 ]
721 },
067aa17e 722 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
723 {
724 'url': 'lqQg6PlCWgI',
725 'info_dict': {
726 'id': 'lqQg6PlCWgI',
727 'ext': 'mp4',
556dbe7f 728 'duration': 6085,
90227264 729 'upload_date': '20150827',
cbe2bd91 730 'uploader_id': 'olympic',
ec85ded8 731 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 732 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 733 'uploader': 'Olympic',
cbe2bd91
PH
734 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
735 },
736 'params': {
737 'skip_download': 'requires avconv',
e52a40ab 738 }
cbe2bd91 739 },
6271f1ca
PH
740 # Non-square pixels
741 {
742 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
743 'info_dict': {
744 'id': '_b-2C3KPAM0',
745 'ext': 'mp4',
746 'stretched_ratio': 16 / 9.,
556dbe7f 747 'duration': 85,
6271f1ca
PH
748 'upload_date': '20110310',
749 'uploader_id': 'AllenMeow',
ec85ded8 750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 751 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 752 'uploader': '孫ᄋᄅ',
6271f1ca
PH
753 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
754 },
06b491eb
S
755 },
756 # url_encoded_fmt_stream_map is empty string
757 {
758 'url': 'qEJwOuvDf7I',
759 'info_dict': {
760 'id': 'qEJwOuvDf7I',
f57b7835 761 'ext': 'webm',
06b491eb
S
762 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
763 'description': '',
764 'upload_date': '20150404',
765 'uploader_id': 'spbelect',
766 'uploader': 'Наблюдатели Петербурга',
767 },
768 'params': {
769 'skip_download': 'requires avconv',
e323cf3f
S
770 },
771 'skip': 'This live event has ended.',
06b491eb 772 },
067aa17e 773 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
774 {
775 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
776 'info_dict': {
777 'id': 'FIl7x6_3R5Y',
eb6793ba 778 'ext': 'webm',
da77d856
S
779 'title': 'md5:7b81415841e02ecd4313668cde88737a',
780 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 781 'duration': 220,
da77d856
S
782 'upload_date': '20150625',
783 'uploader_id': 'dorappi2000',
ec85ded8 784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 785 'uploader': 'dorappi2000',
eb6793ba 786 'formats': 'mincount:31',
da77d856 787 },
eb6793ba 788 'skip': 'not actual anymore',
2ee8f5d8 789 },
8a1a26ce
YCH
790 # DASH manifest with segment_list
791 {
792 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
793 'md5': '8ce563a1d667b599d21064e982ab9e31',
794 'info_dict': {
795 'id': 'CsmdDsKjzN8',
796 'ext': 'mp4',
17ee98e1 797 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
798 'uploader': 'Airtek',
799 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
800 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
801 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
802 },
803 'params': {
804 'youtube_include_dash_manifest': True,
805 'format': '135', # bestvideo
be49068d
S
806 },
807 'skip': 'This live event has ended.',
2ee8f5d8 808 },
cf7e015f
S
809 {
810 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 811 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 812 'info_dict': {
545cc85d 813 'id': 'jvGDaLqkpTg',
814 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
815 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
816 },
817 'playlist': [{
818 'info_dict': {
545cc85d 819 'id': 'jvGDaLqkpTg',
cf7e015f 820 'ext': 'mp4',
545cc85d 821 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
822 'description': 'md5:e03b909557865076822aa169218d6a5d',
823 'duration': 10643,
824 'upload_date': '20161111',
825 'uploader': 'Team PGP',
826 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
828 },
829 }, {
830 'info_dict': {
545cc85d 831 'id': '3AKt1R1aDnw',
cf7e015f 832 'ext': 'mp4',
545cc85d 833 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
834 'description': 'md5:e03b909557865076822aa169218d6a5d',
835 'duration': 10991,
836 'upload_date': '20161111',
837 'uploader': 'Team PGP',
838 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
839 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
840 },
841 }, {
842 'info_dict': {
545cc85d 843 'id': 'RtAMM00gpVc',
cf7e015f 844 'ext': 'mp4',
545cc85d 845 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
846 'description': 'md5:e03b909557865076822aa169218d6a5d',
847 'duration': 10995,
848 'upload_date': '20161111',
849 'uploader': 'Team PGP',
850 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
852 },
853 }, {
854 'info_dict': {
545cc85d 855 'id': '6N2fdlP3C5U',
cf7e015f 856 'ext': 'mp4',
545cc85d 857 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
858 'description': 'md5:e03b909557865076822aa169218d6a5d',
859 'duration': 10990,
860 'upload_date': '20161111',
861 'uploader': 'Team PGP',
862 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
864 },
865 }],
866 'params': {
867 'skip_download': True,
868 },
cbaed4bb 869 },
f9f49d87 870 {
067aa17e 871 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
872 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
873 'info_dict': {
874 'id': 'gVfLd0zydlo',
875 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
876 },
877 'playlist_count': 2,
be49068d 878 'skip': 'Not multifeed anymore',
f9f49d87 879 },
cbaed4bb 880 {
2d3d2997 881 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 882 'only_matching': True,
0e49d9a6 883 },
6d4fc66b 884 {
2d3d2997 885 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
886 'only_matching': True,
887 },
0e49d9a6 888 {
067aa17e 889 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 890 # Also tests cut-off URL expansion in video description (see
067aa17e
S
891 # https://github.com/ytdl-org/youtube-dl/issues/1892,
892 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
893 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
894 'info_dict': {
895 'id': 'lsguqyKfVQg',
896 'ext': 'mp4',
897 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 898 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 899 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 900 'duration': 133,
0e49d9a6
LL
901 'upload_date': '20151119',
902 'uploader_id': 'IronSoulElf',
ec85ded8 903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 904 'uploader': 'IronSoulElf',
eb6793ba
S
905 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
906 'track': 'Dark Walk - Position Music',
907 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 908 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
909 },
910 'params': {
911 'skip_download': True,
912 },
913 },
61f92af1 914 {
067aa17e 915 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
916 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
917 'only_matching': True,
918 },
313dfc45
LL
919 {
920 # Video with yt:stretch=17:0
921 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
922 'info_dict': {
923 'id': 'Q39EVAstoRM',
924 'ext': 'mp4',
925 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
926 'description': 'md5:ee18a25c350637c8faff806845bddee9',
927 'upload_date': '20151107',
928 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
929 'uploader': 'CH GAMER DROID',
930 },
931 'params': {
932 'skip_download': True,
933 },
be49068d 934 'skip': 'This video does not exist.',
313dfc45 935 },
7caf9830
S
936 {
937 # Video licensed under Creative Commons
938 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
939 'info_dict': {
940 'id': 'M4gD1WSo5mA',
941 'ext': 'mp4',
942 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
943 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 944 'duration': 721,
7caf9830
S
945 'upload_date': '20150127',
946 'uploader_id': 'BerkmanCenter',
ec85ded8 947 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 948 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
949 'license': 'Creative Commons Attribution license (reuse allowed)',
950 },
951 'params': {
952 'skip_download': True,
953 },
954 },
fd050249
S
955 {
956 # Channel-like uploader_url
957 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
958 'info_dict': {
959 'id': 'eQcmzGIKrzg',
960 'ext': 'mp4',
961 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 962 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 963 'duration': 4060,
fd050249 964 'upload_date': '20151119',
eb6793ba 965 'uploader': 'Bernie Sanders',
fd050249 966 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
968 'license': 'Creative Commons Attribution license (reuse allowed)',
969 },
970 'params': {
971 'skip_download': True,
972 },
973 },
040ac686
S
974 {
975 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
976 'only_matching': True,
7f29cf54
S
977 },
978 {
067aa17e 979 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
980 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
981 'only_matching': True,
6496ccb4
S
982 },
983 {
984 # Rental video preview
985 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
986 'info_dict': {
987 'id': 'uGpuVWrhIzE',
988 'ext': 'mp4',
989 'title': 'Piku - Trailer',
990 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
991 'upload_date': '20150811',
992 'uploader': 'FlixMatrix',
993 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
995 'license': 'Standard YouTube License',
996 },
997 'params': {
998 'skip_download': True,
999 },
eb6793ba 1000 'skip': 'This video is not available.',
022a5d66 1001 },
12afdc2a
S
1002 {
1003 # YouTube Red video with episode data
1004 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1005 'info_dict': {
1006 'id': 'iqKdEhx-dD4',
1007 'ext': 'mp4',
1008 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1009 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1010 'duration': 2085,
12afdc2a
S
1011 'upload_date': '20170118',
1012 'uploader': 'Vsauce',
1013 'uploader_id': 'Vsauce',
1014 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1015 'series': 'Mind Field',
1016 'season_number': 1,
1017 'episode_number': 1,
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
1022 'expected_warnings': [
1023 'Skipping DASH manifest',
1024 ],
1025 },
c7121fa7
S
1026 {
1027 # The following content has been identified by the YouTube community
1028 # as inappropriate or offensive to some audiences.
1029 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1030 'info_dict': {
1031 'id': '6SJNVb0GnPI',
1032 'ext': 'mp4',
1033 'title': 'Race Differences in Intelligence',
1034 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1035 'duration': 965,
1036 'upload_date': '20140124',
1037 'uploader': 'New Century Foundation',
1038 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1040 },
1041 'params': {
1042 'skip_download': True,
1043 },
545cc85d 1044 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1045 },
022a5d66
S
1046 {
1047 # itag 212
1048 'url': '1t24XAntNCY',
1049 'only_matching': True,
fd5c4aab
S
1050 },
1051 {
1052 # geo restricted to JP
1053 'url': 'sJL6WA-aGkQ',
1054 'only_matching': True,
1055 },
cd5a74a2
S
1056 {
1057 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1058 'only_matching': True,
1059 },
bc2ca1bb 1060 {
1061 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1062 'only_matching': True,
1063 },
1064 {
1065 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1066 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1067 'only_matching': True,
1068 },
825cd268
RA
1069 {
1070 # DRM protected
1071 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1072 'only_matching': True,
4fe54c12
S
1073 },
1074 {
1075 # Video with unsupported adaptive stream type formats
1076 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1077 'info_dict': {
1078 'id': 'Z4Vy8R84T1U',
1079 'ext': 'mp4',
1080 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1081 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1082 'duration': 433,
1083 'upload_date': '20130923',
1084 'uploader': 'Amelia Putri Harwita',
1085 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1087 'formats': 'maxcount:10',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 'youtube_include_dash_manifest': False,
1092 },
5429d6a9 1093 'skip': 'not actual anymore',
5caabd3c 1094 },
1095 {
822b9d9c 1096 # Youtube Music Auto-generated description
5caabd3c 1097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1098 'info_dict': {
1099 'id': 'MgNrAu2pzNs',
1100 'ext': 'mp4',
1101 'title': 'Voyeur Girl',
1102 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1103 'upload_date': '20190312',
5429d6a9
S
1104 'uploader': 'Stephen - Topic',
1105 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1106 'artist': 'Stephen',
1107 'track': 'Voyeur Girl',
1108 'album': 'it\'s too much love to know my dear',
1109 'release_date': '20190313',
1110 'release_year': 2019,
1111 },
1112 'params': {
1113 'skip_download': True,
1114 },
1115 },
66b48727
RA
1116 {
1117 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1118 'only_matching': True,
1119 },
011e75e6
S
1120 {
1121 # invalid -> valid video id redirection
1122 'url': 'DJztXj2GPfl',
1123 'info_dict': {
1124 'id': 'DJztXj2GPfk',
1125 'ext': 'mp4',
1126 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1127 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1128 'upload_date': '20090125',
1129 'uploader': 'Prochorowka',
1130 'uploader_id': 'Prochorowka',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1132 'artist': 'Panjabi MC',
1133 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1134 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1135 },
1136 'params': {
1137 'skip_download': True,
1138 },
545cc85d 1139 'skip': 'Video unavailable',
ea74e00b
DP
1140 },
1141 {
1142 # empty description results in an empty string
1143 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1144 'info_dict': {
1145 'id': 'x41yOUIvK2k',
1146 'ext': 'mp4',
1147 'title': 'IMG 3456',
1148 'description': '',
1149 'upload_date': '20170613',
1150 'uploader_id': 'ElevageOrVert',
1151 'uploader': 'ElevageOrVert',
1152 },
1153 'params': {
1154 'skip_download': True,
1155 },
1156 },
a0566bbf 1157 {
29f7c58a 1158 # with '};' inside yt initial data (see [1])
1159 # see [2] for an example with '};' inside ytInitialPlayerResponse
1160 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1161 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1162 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1163 'info_dict': {
1164 'id': 'CHqg6qOn4no',
1165 'ext': 'mp4',
1166 'title': 'Part 77 Sort a list of simple types in c#',
1167 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1168 'upload_date': '20130831',
1169 'uploader_id': 'kudvenkat',
1170 'uploader': 'kudvenkat',
1171 },
1172 'params': {
1173 'skip_download': True,
1174 },
1175 },
29f7c58a 1176 {
1177 # another example of '};' in ytInitialData
1178 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1179 'only_matching': True,
1180 },
1181 {
1182 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1183 'only_matching': True,
1184 },
545cc85d 1185 {
cc2db878 1186 # https://github.com/ytdl-org/youtube-dl/pull/28094
1187 'url': 'OtqTfy26tG0',
1188 'info_dict': {
1189 'id': 'OtqTfy26tG0',
1190 'ext': 'mp4',
1191 'title': 'Burn Out',
1192 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1193 'upload_date': '20141120',
1194 'uploader': 'The Cinematic Orchestra - Topic',
1195 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1196 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1197 'artist': 'The Cinematic Orchestra',
1198 'track': 'Burn Out',
1199 'album': 'Every Day',
1200 'release_data': None,
1201 'release_year': None,
1202 },
1203 'params': {
1204 'skip_download': True,
1205 },
545cc85d 1206 },
bc2ca1bb 1207 {
1208 # controversial video, only works with bpctr when authenticated with cookies
1209 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1210 'only_matching': True,
1211 },
2eb88d95
PH
1212 ]
1213
e0df6211
PH
1214 def __init__(self, *args, **kwargs):
1215 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1216 self._code_cache = {}
83799698 1217 self._player_cache = {}
e0df6211 1218
60064c53
PH
1219 def _signature_cache_id(self, example_sig):
1220 """ Return a string representation of a signature """
78caa52a 1221 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1222
e40c758c
S
1223 @classmethod
1224 def _extract_player_info(cls, player_url):
1225 for player_re in cls._PLAYER_INFO_RE:
1226 id_m = re.search(player_re, player_url)
1227 if id_m:
1228 break
1229 else:
c081b35c 1230 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1231 return id_m.group('id')
e40c758c
S
1232
1233 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1234 player_id = self._extract_player_info(player_url)
e0df6211 1235
c4417ddb 1236 # Read from filesystem cache
545cc85d 1237 func_id = 'js_%s_%s' % (
1238 player_id, self._signature_cache_id(example_sig))
c4417ddb 1239 assert os.path.basename(func_id) == func_id
a0e07d31 1240
69ea8ca4 1241 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1242 if cache_spec is not None:
78caa52a 1243 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1244
545cc85d 1245 if player_id not in self._code_cache:
1246 self._code_cache[player_id] = self._download_webpage(
e0df6211 1247 player_url, video_id,
545cc85d 1248 note='Downloading player ' + player_id,
69ea8ca4 1249 errnote='Download of %s failed' % player_url)
545cc85d 1250 code = self._code_cache[player_id]
1251 res = self._parse_sig_js(code)
e0df6211 1252
785521bf
PH
1253 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1254 cache_res = res(test_string)
1255 cache_spec = [ord(c) for c in cache_res]
83799698 1256
69ea8ca4 1257 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1258 return res
1259
60064c53 1260 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1261 def gen_sig_code(idxs):
1262 def _genslice(start, end, step):
78caa52a 1263 starts = '' if start == 0 else str(start)
8bcc8756 1264 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1265 steps = '' if step == 1 else (':%d' % step)
78caa52a 1266 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1267
1268 step = None
7af808a5
PH
1269 # Quelch pyflakes warnings - start will be set when step is set
1270 start = '(Never used)'
edf3e38e
PH
1271 for i, prev in zip(idxs[1:], idxs[:-1]):
1272 if step is not None:
1273 if i - prev == step:
1274 continue
1275 yield _genslice(start, prev, step)
1276 step = None
1277 continue
1278 if i - prev in [-1, 1]:
1279 step = i - prev
1280 start = prev
1281 continue
1282 else:
78caa52a 1283 yield 's[%d]' % prev
edf3e38e 1284 if step is None:
78caa52a 1285 yield 's[%d]' % i
edf3e38e
PH
1286 else:
1287 yield _genslice(start, i, step)
1288
78caa52a 1289 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1290 cache_res = func(test_string)
edf3e38e 1291 cache_spec = [ord(c) for c in cache_res]
78caa52a 1292 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1293 signature_id_tuple = '(%s)' % (
1294 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1295 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1296 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1297 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1298
e0df6211
PH
1299 def _parse_sig_js(self, jscode):
1300 funcname = self._search_regex(
abefc03f
S
1301 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1302 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1303 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1304 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1305 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1306 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1307 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1308 # Obsolete patterns
1309 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1310 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1311 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1312 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1313 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1314 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1315 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1316 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1317 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1318
1319 jsi = JSInterpreter(jscode)
1320 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1321 return lambda s: initial_function([s])
1322
545cc85d 1323 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1324 """Turn the encrypted s field into a working signature"""
6b37f0be 1325
c8bf86d5 1326 if player_url is None:
69ea8ca4 1327 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1328
69ea8ca4 1329 if player_url.startswith('//'):
78caa52a 1330 player_url = 'https:' + player_url
3c90cc8b
S
1331 elif not re.match(r'https?://', player_url):
1332 player_url = compat_urlparse.urljoin(
1333 'https://www.youtube.com', player_url)
c8bf86d5 1334 try:
62af3a0e 1335 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1336 if player_id not in self._player_cache:
1337 func = self._extract_signature_function(
60064c53 1338 video_id, player_url, s
c8bf86d5
PH
1339 )
1340 self._player_cache[player_id] = func
1341 func = self._player_cache[player_id]
1342 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1343 self._print_sig_code(func, s)
c8bf86d5
PH
1344 return func(s)
1345 except Exception as e:
1346 tb = traceback.format_exc()
1347 raise ExtractorError(
78caa52a 1348 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1349
545cc85d 1350 def _mark_watched(self, video_id, player_response):
21c340b8
S
1351 playback_url = url_or_none(try_get(
1352 player_response,
545cc85d 1353 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1354 if not playback_url:
1355 return
1356 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1357 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1358
1359 # cpn generation algorithm is reverse engineered from base.js.
1360 # In fact it works even with dummy cpn.
1361 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1362 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1363
1364 qs.update({
1365 'ver': ['2'],
1366 'cpn': [cpn],
1367 })
1368 playback_url = compat_urlparse.urlunparse(
15707c7e 1369 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1370
1371 self._download_webpage(
1372 playback_url, video_id, 'Marking watched',
1373 'Unable to mark watched', fatal=False)
1374
66c9fa36
S
1375 @staticmethod
1376 def _extract_urls(webpage):
1377 # Embedded YouTube player
1378 entries = [
1379 unescapeHTML(mobj.group('url'))
1380 for mobj in re.finditer(r'''(?x)
1381 (?:
1382 <iframe[^>]+?src=|
1383 data-video-url=|
1384 <embed[^>]+?src=|
1385 embedSWF\(?:\s*|
1386 <object[^>]+data=|
1387 new\s+SWFObject\(
1388 )
1389 (["\'])
1390 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1391 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1392 \1''', webpage)]
1393
1394 # lazyYT YouTube embed
1395 entries.extend(list(map(
1396 unescapeHTML,
1397 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1398
1399 # Wordpress "YouTube Video Importer" plugin
1400 matches = re.findall(r'''(?x)<div[^>]+
1401 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1402 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1403 entries.extend(m[-1] for m in matches)
1404
1405 return entries
1406
1407 @staticmethod
1408 def _extract_url(webpage):
1409 urls = YoutubeIE._extract_urls(webpage)
1410 return urls[0] if urls else None
1411
97665381
PH
1412 @classmethod
1413 def extract_id(cls, url):
1414 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1415 if mobj is None:
69ea8ca4 1416 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1417 video_id = mobj.group(2)
1418 return video_id
1419
545cc85d 1420 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1421 chapters_list = try_get(
8bdd16b4 1422 data,
84213ea8
S
1423 lambda x: x['playerOverlays']
1424 ['playerOverlayRenderer']
1425 ['decoratedPlayerBarRenderer']
1426 ['decoratedPlayerBarRenderer']
1427 ['playerBar']
1428 ['chapteredPlayerBarRenderer']
1429 ['chapters'],
1430 list)
1431 if not chapters_list:
1432 return
1433
1434 def chapter_time(chapter):
1435 return float_or_none(
1436 try_get(
1437 chapter,
1438 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1439 int),
1440 scale=1000)
1441 chapters = []
1442 for next_num, chapter in enumerate(chapters_list, start=1):
1443 start_time = chapter_time(chapter)
1444 if start_time is None:
1445 continue
1446 end_time = (chapter_time(chapters_list[next_num])
1447 if next_num < len(chapters_list) else duration)
1448 if end_time is None:
1449 continue
1450 title = try_get(
1451 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1452 compat_str)
1453 chapters.append({
1454 'start_time': start_time,
1455 'end_time': end_time,
1456 'title': title,
1457 })
1458 return chapters
1459
545cc85d 1460 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1461 return self._parse_json(self._search_regex(
1462 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1463 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1464
c5e8d7af 1465 def _real_extract(self, url):
cf7e015f 1466 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1467 video_id = self._match_id(url)
1468 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1469 webpage_url = base_url + 'watch?v=' + video_id
1470 webpage = self._download_webpage(
1471 webpage_url + '&has_verified=1&bpctr=9999999999',
1472 video_id, fatal=False)
545cc85d 1473
1474 player_response = None
1475 if webpage:
1476 player_response = self._extract_yt_initial_variable(
1477 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1478 video_id, 'initial player response')
1479 if not player_response:
1480 player_response = self._call_api(
1481 'player', {'videoId': video_id}, video_id)
1482
1483 playability_status = player_response.get('playabilityStatus') or {}
1484 if playability_status.get('reason') == 'Sign in to confirm your age':
1485 pr = self._parse_json(try_get(compat_parse_qs(
1486 self._download_webpage(
1487 base_url + 'get_video_info', video_id,
1488 'Refetching age-gated info webpage',
1489 'unable to download video info webpage', query={
1490 'video_id': video_id,
7c60c33e 1491 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1492 }, fatal=False)),
1493 lambda x: x['player_response'][0],
1494 compat_str) or '{}', video_id)
1495 if pr:
1496 player_response = pr
1497
1498 trailer_video_id = try_get(
1499 playability_status,
1500 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1501 compat_str)
1502 if trailer_video_id:
1503 return self.url_result(
1504 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1505
545cc85d 1506 def get_text(x):
1507 if not x:
c2d125d9 1508 return
545cc85d 1509 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1510
545cc85d 1511 search_meta = (
1512 lambda x: self._html_search_meta(x, webpage, default=None)) \
1513 if webpage else lambda x: None
dbdaaa23 1514
545cc85d 1515 video_details = player_response.get('videoDetails') or {}
37357d21 1516 microformat = try_get(
545cc85d 1517 player_response,
1518 lambda x: x['microformat']['playerMicroformatRenderer'],
1519 dict) or {}
1520 video_title = video_details.get('title') \
1521 or get_text(microformat.get('title')) \
1522 or search_meta(['og:title', 'twitter:title', 'title'])
1523 video_description = video_details.get('shortDescription')
cf7e015f 1524
8fe10494 1525 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1526 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1527 multifeed_metadata_list = try_get(
1528 player_response,
1529 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1530 compat_str)
8fe10494
S
1531 if multifeed_metadata_list:
1532 entries = []
1533 feed_ids = []
1534 for feed in multifeed_metadata_list.split(','):
1535 # Unquote should take place before split on comma (,) since textual
1536 # fields may contain comma as well (see
067aa17e 1537 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1538 feed_data = compat_parse_qs(
1539 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1540
1541 def feed_entry(name):
545cc85d 1542 return try_get(
1543 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1544
1545 feed_id = feed_entry('id')
1546 if not feed_id:
1547 continue
1548 feed_title = feed_entry('title')
1549 title = video_title
1550 if feed_title:
1551 title += ' (%s)' % feed_title
8fe10494
S
1552 entries.append({
1553 '_type': 'url_transparent',
1554 'ie_key': 'Youtube',
1555 'url': smuggle_url(
545cc85d 1556 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1557 {'force_singlefeed': True}),
6b09401b 1558 'title': title,
8fe10494 1559 })
6b09401b 1560 feed_ids.append(feed_id)
8fe10494
S
1561 self.to_screen(
1562 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1563 % (', '.join(feed_ids), video_id))
545cc85d 1564 return self.playlist_result(
1565 entries, video_id, video_title, video_description)
8fe10494
S
1566 else:
1567 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1568
545cc85d 1569 formats = []
1570 itags = []
cc2db878 1571 itag_qualities = {}
545cc85d 1572 player_url = None
dca3ff4a 1573 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1574 streaming_data = player_response.get('streamingData') or {}
1575 streaming_formats = streaming_data.get('formats') or []
1576 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1577 for fmt in streaming_formats:
1578 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1579 continue
321bf820 1580
cc2db878 1581 itag = str_or_none(fmt.get('itag'))
1582 quality = fmt.get('quality')
1583 if itag and quality:
1584 itag_qualities[itag] = quality
1585 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1586 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1587 # number of fragment that would subsequently requested with (`&sq=N`)
1588 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1589 continue
1590
545cc85d 1591 fmt_url = fmt.get('url')
1592 if not fmt_url:
1593 sc = compat_parse_qs(fmt.get('signatureCipher'))
1594 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1595 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1596 if not (sc and fmt_url and encrypted_sig):
1597 continue
1598 if not player_url:
1599 if not webpage:
1600 continue
1601 player_url = self._search_regex(
1602 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1603 webpage, 'player URL', fatal=False)
1604 if not player_url:
201e9eaa 1605 continue
545cc85d 1606 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1607 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1608 fmt_url += '&' + sp + '=' + signature
1609
545cc85d 1610 if itag:
1611 itags.append(itag)
cc2db878 1612 tbr = float_or_none(
1613 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1614 dct = {
1615 'asr': int_or_none(fmt.get('audioSampleRate')),
1616 'filesize': int_or_none(fmt.get('contentLength')),
1617 'format_id': itag,
1618 'format_note': fmt.get('qualityLabel') or quality,
1619 'fps': int_or_none(fmt.get('fps')),
1620 'height': int_or_none(fmt.get('height')),
dca3ff4a 1621 'quality': q(quality),
cc2db878 1622 'tbr': tbr,
545cc85d 1623 'url': fmt_url,
1624 'width': fmt.get('width'),
1625 }
1626 mimetype = fmt.get('mimeType')
1627 if mimetype:
1628 mobj = re.match(
1629 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1630 if mobj:
1631 dct['ext'] = mimetype2ext(mobj.group(1))
1632 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1633 no_audio = dct.get('acodec') == 'none'
1634 no_video = dct.get('vcodec') == 'none'
1635 if no_audio:
1636 dct['vbr'] = tbr
1637 if no_video:
1638 dct['abr'] = tbr
1639 if no_audio or no_video:
545cc85d 1640 dct['downloader_options'] = {
1641 # Youtube throttles chunks >~10M
1642 'http_chunk_size': 10485760,
bf1317d2 1643 }
7c60c33e 1644 if dct.get('ext'):
1645 dct['container'] = dct['ext'] + '_dash'
545cc85d 1646 formats.append(dct)
1647
1648 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1649 if hls_manifest_url:
1650 for f in self._extract_m3u8_formats(
1651 hls_manifest_url, video_id, 'mp4', fatal=False):
1652 itag = self._search_regex(
1653 r'/itag/(\d+)', f['url'], 'itag', default=None)
1654 if itag:
1655 f['format_id'] = itag
1656 formats.append(f)
1657
1658 if self._downloader.params.get('youtube_include_dash_manifest'):
1659 dash_manifest_url = streaming_data.get('dashManifestUrl')
1660 if dash_manifest_url:
545cc85d 1661 for f in self._extract_mpd_formats(
1662 dash_manifest_url, video_id, fatal=False):
cc2db878 1663 itag = f['format_id']
1664 if itag in itags:
1665 continue
dca3ff4a 1666 if itag in itag_qualities:
1667 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1668 # but kept to maintain feature parity (and code similarity) with youtube-dl
1669 # Remove if this causes any issues with sorting in future
1670 f['quality'] = q(itag_qualities[itag])
545cc85d 1671 filesize = int_or_none(self._search_regex(
1672 r'/clen/(\d+)', f.get('fragment_base_url')
1673 or f['url'], 'file size', default=None))
1674 if filesize:
1675 f['filesize'] = filesize
cc2db878 1676 formats.append(f)
bf1317d2 1677
545cc85d 1678 if not formats:
63ad4d43 1679 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1680 raise ExtractorError(
1681 'This video is DRM protected.', expected=True)
1682 pemr = try_get(
1683 playability_status,
1684 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1685 dict) or {}
1686 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1687 subreason = pemr.get('subreason')
1688 if subreason:
1689 subreason = clean_html(get_text(subreason))
1690 if subreason == 'The uploader has not made this video available in your country.':
1691 countries = microformat.get('availableCountries')
1692 if not countries:
1693 regions_allowed = search_meta('regionsAllowed')
1694 countries = regions_allowed.split(',') if regions_allowed else None
1695 self.raise_geo_restricted(
1696 subreason, countries)
1697 reason += '\n' + subreason
1698 if reason:
1699 raise ExtractorError(reason, expected=True)
bf1317d2 1700
545cc85d 1701 self._sort_formats(formats)
bf1317d2 1702
545cc85d 1703 keywords = video_details.get('keywords') or []
1704 if not keywords and webpage:
1705 keywords = [
1706 unescapeHTML(m.group('content'))
1707 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1708 for keyword in keywords:
1709 if keyword.startswith('yt:stretch='):
1710 w, h = keyword.split('=')[1].split(':')
1711 w, h = int(w), int(h)
1712 if w > 0 and h > 0:
1713 ratio = w / h
1714 for f in formats:
1715 if f.get('vcodec') != 'none':
1716 f['stretched_ratio'] = ratio
6449cd80 1717
545cc85d 1718 thumbnails = []
1719 for container in (video_details, microformat):
1720 for thumbnail in (try_get(
1721 container,
1722 lambda x: x['thumbnail']['thumbnails'], list) or []):
1723 thumbnail_url = thumbnail.get('url')
1724 if not thumbnail_url:
bf1317d2 1725 continue
545cc85d 1726 thumbnails.append({
1727 'height': int_or_none(thumbnail.get('height')),
1728 'url': thumbnail_url,
1729 'width': int_or_none(thumbnail.get('width')),
1730 })
1731 if thumbnails:
1732 break
a6211d23 1733 else:
545cc85d 1734 thumbnail = search_meta(['og:image', 'twitter:image'])
1735 if thumbnail:
1736 thumbnails = [{'url': thumbnail}]
1737
1738 category = microformat.get('category') or search_meta('genre')
1739 channel_id = video_details.get('channelId') \
1740 or microformat.get('externalChannelId') \
1741 or search_meta('channelId')
1742 duration = int_or_none(
1743 video_details.get('lengthSeconds')
1744 or microformat.get('lengthSeconds')) \
1745 or parse_duration(search_meta('duration'))
1746 is_live = video_details.get('isLive')
1747 owner_profile_url = microformat.get('ownerProfileUrl')
1748
1749 info = {
1750 'id': video_id,
1751 'title': self._live_title(video_title) if is_live else video_title,
1752 'formats': formats,
1753 'thumbnails': thumbnails,
1754 'description': video_description,
1755 'upload_date': unified_strdate(
1756 microformat.get('uploadDate')
1757 or search_meta('uploadDate')),
1758 'uploader': video_details['author'],
1759 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1760 'uploader_url': owner_profile_url,
1761 'channel_id': channel_id,
1762 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1763 'duration': duration,
1764 'view_count': int_or_none(
1765 video_details.get('viewCount')
1766 or microformat.get('viewCount')
1767 or search_meta('interactionCount')),
1768 'average_rating': float_or_none(video_details.get('averageRating')),
1769 'age_limit': 18 if (
1770 microformat.get('isFamilySafe') is False
1771 or search_meta('isFamilyFriendly') == 'false'
1772 or search_meta('og:restrictions:age') == '18+') else 0,
1773 'webpage_url': webpage_url,
1774 'categories': [category] if category else None,
1775 'tags': keywords,
1776 'is_live': is_live,
1777 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 1778 'was_live': video_details.get('isLiveContent')
545cc85d 1779 }
b477fc13 1780
545cc85d 1781 pctr = try_get(
1782 player_response,
1783 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1784 subtitles = {}
1785 if pctr:
1786 def process_language(container, base_url, lang_code, query):
1787 lang_subs = []
1788 for fmt in self._SUBTITLE_FORMATS:
1789 query.update({
1790 'fmt': fmt,
1791 })
1792 lang_subs.append({
1793 'ext': fmt,
1794 'url': update_url_query(base_url, query),
1795 })
1796 container[lang_code] = lang_subs
7e72694b 1797
545cc85d 1798 for caption_track in (pctr.get('captionTracks') or []):
1799 base_url = caption_track.get('baseUrl')
1800 if not base_url:
1801 continue
1802 if caption_track.get('kind') != 'asr':
1803 lang_code = caption_track.get('languageCode')
1804 if not lang_code:
1805 continue
1806 process_language(
1807 subtitles, base_url, lang_code, {})
1808 continue
1809 automatic_captions = {}
1810 for translation_language in (pctr.get('translationLanguages') or []):
1811 translation_language_code = translation_language.get('languageCode')
1812 if not translation_language_code:
1813 continue
1814 process_language(
1815 automatic_captions, base_url, translation_language_code,
1816 {'tlang': translation_language_code})
1817 info['automatic_captions'] = automatic_captions
1818 info['subtitles'] = subtitles
7e72694b 1819
545cc85d 1820 parsed_url = compat_urllib_parse_urlparse(url)
1821 for component in [parsed_url.fragment, parsed_url.query]:
1822 query = compat_parse_qs(component)
1823 for k, v in query.items():
1824 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1825 d_k += '_time'
1826 if d_k not in info and k in s_ks:
1827 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1828
1829 # Youtube Music Auto-generated description
822b9d9c 1830 if video_description:
38d70284 1831 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1832 if mobj:
822b9d9c
RA
1833 release_year = mobj.group('release_year')
1834 release_date = mobj.group('release_date')
1835 if release_date:
1836 release_date = release_date.replace('-', '')
1837 if not release_year:
545cc85d 1838 release_year = release_date[:4]
1839 info.update({
1840 'album': mobj.group('album'.strip()),
1841 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1842 'track': mobj.group('track').strip(),
1843 'release_date': release_date,
cc2db878 1844 'release_year': int_or_none(release_year),
545cc85d 1845 })
7e72694b 1846
545cc85d 1847 initial_data = None
1848 if webpage:
1849 initial_data = self._extract_yt_initial_variable(
1850 webpage, self._YT_INITIAL_DATA_RE, video_id,
1851 'yt initial data')
1852 if not initial_data:
1853 initial_data = self._call_api(
1854 'next', {'videoId': video_id}, video_id, fatal=False)
1855
1856 if not is_live:
1857 try:
1858 # This will error if there is no livechat
1859 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1860 info['subtitles']['live_chat'] = [{
1861 'video_id': video_id,
1862 'ext': 'json',
1863 'protocol': 'youtube_live_chat_replay',
1864 }]
1865 except (KeyError, IndexError, TypeError):
1866 pass
1867
1868 if initial_data:
1869 chapters = self._extract_chapters_from_json(
1870 initial_data, video_id, duration)
1871 if not chapters:
1872 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1873 contents = try_get(
1874 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1875 list)
1876 if not contents:
1877 continue
1878
1879 def chapter_time(mmlir):
1880 return parse_duration(
1881 get_text(mmlir.get('timeDescription')))
1882
1883 chapters = []
1884 for next_num, content in enumerate(contents, start=1):
1885 mmlir = content.get('macroMarkersListItemRenderer') or {}
1886 start_time = chapter_time(mmlir)
1887 end_time = chapter_time(try_get(
1888 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1889 if next_num < len(contents) else duration
1890 if start_time is None or end_time is None:
1891 continue
1892 chapters.append({
1893 'start_time': start_time,
1894 'end_time': end_time,
1895 'title': get_text(mmlir.get('title')),
1896 })
1897 if chapters:
1898 break
1899 if chapters:
1900 info['chapters'] = chapters
1901
1902 contents = try_get(
1903 initial_data,
1904 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1905 list) or []
1906 for content in contents:
1907 vpir = content.get('videoPrimaryInfoRenderer')
1908 if vpir:
1909 stl = vpir.get('superTitleLink')
1910 if stl:
1911 stl = get_text(stl)
1912 if try_get(
1913 vpir,
1914 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1915 info['location'] = stl
1916 else:
1917 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1918 if mobj:
1919 info.update({
1920 'series': mobj.group(1),
1921 'season_number': int(mobj.group(2)),
1922 'episode_number': int(mobj.group(3)),
1923 })
1924 for tlb in (try_get(
1925 vpir,
1926 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1927 list) or []):
1928 tbr = tlb.get('toggleButtonRenderer') or {}
1929 for getter, regex in [(
1930 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1931 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1932 lambda x: x['accessibility'],
1933 lambda x: x['accessibilityData']['accessibilityData'],
1934 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1935 label = (try_get(tbr, getter, dict) or {}).get('label')
1936 if label:
1937 mobj = re.match(regex, label)
1938 if mobj:
1939 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1940 break
1941 sbr_tooltip = try_get(
1942 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1943 if sbr_tooltip:
1944 like_count, dislike_count = sbr_tooltip.split(' / ')
1945 info.update({
1946 'like_count': str_to_int(like_count),
1947 'dislike_count': str_to_int(dislike_count),
1948 })
1949 vsir = content.get('videoSecondaryInfoRenderer')
1950 if vsir:
1951 info['channel'] = get_text(try_get(
1952 vsir,
1953 lambda x: x['owner']['videoOwnerRenderer']['title'],
1954 compat_str))
1955 rows = try_get(
1956 vsir,
1957 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1958 list) or []
1959 multiple_songs = False
1960 for row in rows:
1961 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1962 multiple_songs = True
1963 break
1964 for row in rows:
1965 mrr = row.get('metadataRowRenderer') or {}
1966 mrr_title = mrr.get('title')
1967 if not mrr_title:
1968 continue
1969 mrr_title = get_text(mrr['title'])
1970 mrr_contents_text = get_text(mrr['contents'][0])
1971 if mrr_title == 'License':
1972 info['license'] = mrr_contents_text
1973 elif not multiple_songs:
1974 if mrr_title == 'Album':
1975 info['album'] = mrr_contents_text
1976 elif mrr_title == 'Artist':
1977 info['artist'] = mrr_contents_text
1978 elif mrr_title == 'Song':
1979 info['track'] = mrr_contents_text
1980
1981 fallbacks = {
1982 'channel': 'uploader',
1983 'channel_id': 'uploader_id',
1984 'channel_url': 'uploader_url',
1985 }
1986 for to, frm in fallbacks.items():
1987 if not info.get(to):
1988 info[to] = info.get(frm)
1989
1990 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1991 v = info.get(s_k)
1992 if v:
1993 info[d_k] = v
b84071c0 1994
06167fbb 1995 # get xsrf for annotations or comments
1996 get_annotations = self._downloader.params.get('writeannotations', False)
1997 get_comments = self._downloader.params.get('getcomments', False)
1998 if get_annotations or get_comments:
29f7c58a 1999 xsrf_token = None
545cc85d 2000 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2001 if ytcfg:
2002 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2003 if not xsrf_token:
2004 xsrf_token = self._search_regex(
2005 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2006 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2007
2008 # annotations
06167fbb 2009 if get_annotations:
64b6a4e9
RA
2010 invideo_url = try_get(
2011 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2012 if xsrf_token and invideo_url:
29f7c58a 2013 xsrf_field_name = None
2014 if ytcfg:
2015 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2016 if not xsrf_field_name:
2017 xsrf_field_name = self._search_regex(
2018 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2019 webpage, 'xsrf field name',
29f7c58a 2020 group='xsrf_field_name', default='session_token')
8a784c74 2021 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2022 self._proto_relative_url(invideo_url),
2023 video_id, note='Downloading annotations',
2024 errnote='Unable to download video annotations', fatal=False,
2025 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2026
06167fbb 2027 # Get comments
2028 # TODO: Refactor and move to seperate function
277d6ff5 2029 def extract_comments():
06167fbb 2030 expected_video_comment_count = 0
2031 video_comments = []
277d6ff5 2032 comment_xsrf = xsrf_token
06167fbb 2033
2034 def find_value(html, key, num_chars=2, separator='"'):
2035 pos_begin = html.find(key) + len(key) + num_chars
2036 pos_end = html.find(separator, pos_begin)
2037 return html[pos_begin: pos_end]
2038
2039 def search_dict(partial, key):
2040 if isinstance(partial, dict):
2041 for k, v in partial.items():
2042 if k == key:
2043 yield v
2044 else:
2045 for o in search_dict(v, key):
2046 yield o
2047 elif isinstance(partial, list):
2048 for i in partial:
2049 for o in search_dict(i, key):
2050 yield o
2051
8a784c74 2052 continuations = []
2053 if initial_data:
2054 try:
2055 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2056 continuations = [ncd['continuation']]
2057 # Handle videos where comments have been disabled entirely
2058 except StopIteration:
2059 pass
06167fbb 2060
8d0ea5f9 2061 def get_continuation(continuation, session_token, replies=False):
06167fbb 2062 query = {
66c935fb 2063 'pbj': 1,
2064 'ctoken': continuation,
06167fbb 2065 }
2066 if replies:
2067 query['action_get_comment_replies'] = 1
2068 else:
2069 query['action_get_comments'] = 1
2070
2071 while True:
2072 content, handle = self._download_webpage_handle(
2073 'https://www.youtube.com/comment_service_ajax',
2074 video_id,
2075 note=False,
2076 expected_status=[413],
2077 data=urlencode_postdata({
2078 'session_token': session_token
2079 }),
2080 query=query,
2081 headers={
2082 'Accept': '*/*',
2083 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2084 'X-YouTube-Client-Name': '1',
2085 'X-YouTube-Client-Version': '2.20201202.06.01'
2086 }
2087 )
2088
2089 response_code = handle.getcode()
2090 if (response_code == 200):
2091 return self._parse_json(content, video_id)
8d0ea5f9 2092 if (response_code == 413):
06167fbb 2093 return None
2094 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2095
2096 first_continuation = True
885d36d4 2097 chain_msg = ''
2098 self.to_screen('Downloading comments')
06167fbb 2099 while continuations:
885d36d4 2100 continuation = continuations.pop()
277d6ff5 2101 comment_response = get_continuation(continuation, comment_xsrf)
06167fbb 2102 if not comment_response:
2103 continue
2104 if list(search_dict(comment_response, 'externalErrorMessage')):
2105 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2106
8d0ea5f9
B
2107 if 'continuationContents' not in comment_response['response']:
2108 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2109 continue
2110 # not sure if this actually helps
2111 if 'xsrf_token' in comment_response:
277d6ff5 2112 comment_xsrf = comment_response['xsrf_token']
8d0ea5f9 2113
06167fbb 2114 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2115 if first_continuation:
2116 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2117 first_continuation = False
2118 if 'contents' not in item_section:
2119 # continuation returned no comments?
2120 # set an empty array as to not break the for loop
2121 item_section['contents'] = []
2122
2123 for meta_comment in item_section['contents']:
2124 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2125 video_comments.append({
2126 'id': comment['commentId'],
ba7bf12d 2127 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2128 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2129 'author': comment.get('authorText', {}).get('simpleText', ''),
2130 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2131 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2132 'parent': 'root'
2133 })
2134 if 'replies' not in meta_comment['commentThreadRenderer']:
2135 continue
2136
8d0ea5f9
B
2137 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2138 while reply_continuations:
06167fbb 2139 time.sleep(1)
8d0ea5f9 2140 continuation = reply_continuations.pop()
277d6ff5 2141 replies_data = get_continuation(continuation, comment_xsrf, True)
06167fbb 2142 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2143 continue
06167fbb 2144
2145 if self._downloader.params.get('verbose', False):
885d36d4 2146 chain_msg = ' (chain %s)' % comment['commentId']
2147 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2148 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2149 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2150 reply_comment = reply_meta['commentRenderer']
2151 video_comments.append({
2152 'id': reply_comment['commentId'],
2153 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2154 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2155 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2156 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2157 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2158 'parent': comment['commentId']
2159 })
2160 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2161 continue
8d0ea5f9 2162 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2163
885d36d4 2164 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2165 if 'continuations' in item_section:
8d0ea5f9 2166 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2167 time.sleep(1)
2168
885d36d4 2169 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
277d6ff5 2170 return {
545cc85d 2171 'comments': video_comments,
2172 'comment_count': expected_video_comment_count
277d6ff5 2173 }
2174
2175 if get_comments:
2176 info['__post_extractor'] = extract_comments
4ea3be0a 2177
545cc85d 2178 self.mark_watched(video_id, player_response)
d77ab8e2 2179
545cc85d 2180 return info
c5e8d7af 2181
5f6a1245 2182
8bdd16b4 2183class YoutubeTabIE(YoutubeBaseInfoExtractor):
2184 IE_DESC = 'YouTube.com tab'
70d5c17b 2185 _VALID_URL = r'''(?x)
2186 https?://
2187 (?:\w+\.)?
2188 (?:
2189 youtube(?:kids)?\.com|
2190 invidio\.us
2191 )/
2192 (?:
2193 (?:channel|c|user)/|
2194 (?P<not_channel>
9ba5705a 2195 feed/|hashtag/|
70d5c17b 2196 (?:playlist|watch)\?.*?\blist=
2197 )|
29f7c58a 2198 (?!(?:%s)\b) # Direct URLs
70d5c17b 2199 )
2200 (?P<id>[^/?\#&]+)
2201 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2202 IE_NAME = 'youtube:tab'
2203
81127aa5 2204 _TESTS = [{
8bdd16b4 2205 # playlists, multipage
2206 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2207 'playlist_mincount': 94,
2208 'info_dict': {
2209 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2210 'title': 'Игорь Клейнер - Playlists',
2211 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2212 'uploader': 'Игорь Клейнер',
2213 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2214 },
2215 }, {
2216 # playlists, multipage, different order
2217 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2218 'playlist_mincount': 94,
2219 'info_dict': {
2220 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2221 'title': 'Игорь Клейнер - Playlists',
2222 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2223 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2224 'uploader': 'Игорь Клейнер',
8bdd16b4 2225 },
2226 }, {
2227 # playlists, singlepage
2228 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2229 'playlist_mincount': 4,
2230 'info_dict': {
2231 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2232 'title': 'ThirstForScience - Playlists',
2233 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2234 'uploader': 'ThirstForScience',
2235 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2236 }
2237 }, {
2238 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2239 'only_matching': True,
2240 }, {
2241 # basic, single video playlist
0e30a7b9 2242 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2243 'info_dict': {
0e30a7b9 2244 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2245 'uploader': 'Sergey M.',
2246 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2247 'title': 'youtube-dl public playlist',
81127aa5 2248 },
0e30a7b9 2249 'playlist_count': 1,
9291475f 2250 }, {
8bdd16b4 2251 # empty playlist
0e30a7b9 2252 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2253 'info_dict': {
0e30a7b9 2254 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2255 'uploader': 'Sergey M.',
2256 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2257 'title': 'youtube-dl empty playlist',
9291475f
PH
2258 },
2259 'playlist_count': 0,
2260 }, {
8bdd16b4 2261 # Home tab
2262 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2263 'info_dict': {
8bdd16b4 2264 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2265 'title': 'lex will - Home',
2266 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2267 'uploader': 'lex will',
2268 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2269 },
8bdd16b4 2270 'playlist_mincount': 2,
9291475f 2271 }, {
8bdd16b4 2272 # Videos tab
2273 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2274 'info_dict': {
8bdd16b4 2275 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2276 'title': 'lex will - Videos',
2277 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2278 'uploader': 'lex will',
2279 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2280 },
8bdd16b4 2281 'playlist_mincount': 975,
9291475f 2282 }, {
8bdd16b4 2283 # Videos tab, sorted by popular
2284 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2285 'info_dict': {
8bdd16b4 2286 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2287 'title': 'lex will - Videos',
2288 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2289 'uploader': 'lex will',
2290 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2291 },
8bdd16b4 2292 'playlist_mincount': 199,
9291475f 2293 }, {
8bdd16b4 2294 # Playlists tab
2295 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2296 'info_dict': {
8bdd16b4 2297 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2298 'title': 'lex will - Playlists',
2299 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2300 'uploader': 'lex will',
2301 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2302 },
8bdd16b4 2303 'playlist_mincount': 17,
ac7553d0 2304 }, {
8bdd16b4 2305 # Community tab
2306 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2307 'info_dict': {
8bdd16b4 2308 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2309 'title': 'lex will - Community',
2310 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2311 'uploader': 'lex will',
2312 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2313 },
2314 'playlist_mincount': 18,
87dadd45 2315 }, {
8bdd16b4 2316 # Channels tab
2317 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2318 'info_dict': {
8bdd16b4 2319 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2320 'title': 'lex will - Channels',
2321 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2322 'uploader': 'lex will',
2323 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2324 },
deaec5af 2325 'playlist_mincount': 12,
6b08cdf6 2326 }, {
a0566bbf 2327 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2328 'only_matching': True,
2329 }, {
a0566bbf 2330 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2331 'only_matching': True,
2332 }, {
a0566bbf 2333 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2334 'only_matching': True,
2335 }, {
2336 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2337 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2338 'info_dict': {
2339 'title': '29C3: Not my department',
2340 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2341 'uploader': 'Christiaan008',
2342 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2343 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2344 },
2345 'playlist_count': 96,
2346 }, {
2347 'note': 'Large playlist',
2348 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2349 'info_dict': {
8bdd16b4 2350 'title': 'Uploads from Cauchemar',
2351 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2352 'uploader': 'Cauchemar',
2353 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2354 },
8bdd16b4 2355 'playlist_mincount': 1123,
2356 }, {
2357 # even larger playlist, 8832 videos
2358 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2359 'only_matching': True,
4b7df0d3
JMF
2360 }, {
2361 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2362 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2363 'info_dict': {
acf757f4
PH
2364 'title': 'Uploads from Interstellar Movie',
2365 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2366 'uploader': 'Interstellar Movie',
8bdd16b4 2367 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2368 },
481cc733 2369 'playlist_mincount': 21,
8bdd16b4 2370 }, {
2371 # https://github.com/ytdl-org/youtube-dl/issues/21844
2372 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2373 'info_dict': {
2374 'title': 'Data Analysis with Dr Mike Pound',
2375 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2376 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2377 'uploader': 'Computerphile',
deaec5af 2378 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2379 },
2380 'playlist_mincount': 11,
2381 }, {
a0566bbf 2382 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2383 'only_matching': True,
dacb3a86
S
2384 }, {
2385 # Playlist URL that does not actually serve a playlist
2386 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2387 'info_dict': {
2388 'id': 'FqZTN594JQw',
2389 'ext': 'webm',
2390 'title': "Smiley's People 01 detective, Adventure Series, Action",
2391 'uploader': 'STREEM',
2392 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2393 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2394 'upload_date': '20150526',
2395 'license': 'Standard YouTube License',
2396 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2397 'categories': ['People & Blogs'],
2398 'tags': list,
dbdaaa23 2399 'view_count': int,
dacb3a86
S
2400 'like_count': int,
2401 'dislike_count': int,
2402 },
2403 'params': {
2404 'skip_download': True,
2405 },
13a75688 2406 'skip': 'This video is not available.',
dacb3a86 2407 'add_ie': [YoutubeIE.ie_key()],
481cc733 2408 }, {
8bdd16b4 2409 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2410 'only_matching': True,
66b48727 2411 }, {
8bdd16b4 2412 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2413 'only_matching': True,
a0566bbf 2414 }, {
2415 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2416 'info_dict': {
2417 'id': '9Auq9mYxFEE',
2418 'ext': 'mp4',
deaec5af 2419 'title': compat_str,
a0566bbf 2420 'uploader': 'Sky News',
2421 'uploader_id': 'skynews',
2422 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2423 'upload_date': '20191102',
deaec5af 2424 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2425 'categories': ['News & Politics'],
2426 'tags': list,
2427 'like_count': int,
2428 'dislike_count': int,
2429 },
2430 'params': {
2431 'skip_download': True,
2432 },
2433 }, {
2434 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2435 'info_dict': {
2436 'id': 'a48o2S1cPoo',
2437 'ext': 'mp4',
2438 'title': 'The Young Turks - Live Main Show',
2439 'uploader': 'The Young Turks',
2440 'uploader_id': 'TheYoungTurks',
2441 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2442 'upload_date': '20150715',
2443 'license': 'Standard YouTube License',
2444 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2445 'categories': ['News & Politics'],
2446 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2447 'like_count': int,
2448 'dislike_count': int,
2449 },
2450 'params': {
2451 'skip_download': True,
2452 },
2453 'only_matching': True,
2454 }, {
2455 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2456 'only_matching': True,
2457 }, {
2458 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2459 'only_matching': True,
3d3dddc9 2460 }, {
2461 'url': 'https://www.youtube.com/feed/trending',
2462 'only_matching': True,
2463 }, {
2464 # needs auth
2465 'url': 'https://www.youtube.com/feed/library',
2466 'only_matching': True,
2467 }, {
2468 # needs auth
2469 'url': 'https://www.youtube.com/feed/history',
2470 'only_matching': True,
2471 }, {
2472 # needs auth
2473 'url': 'https://www.youtube.com/feed/subscriptions',
2474 'only_matching': True,
2475 }, {
2476 # needs auth
2477 'url': 'https://www.youtube.com/feed/watch_later',
2478 'only_matching': True,
2479 }, {
2480 # no longer available?
2481 'url': 'https://www.youtube.com/feed/recommended',
2482 'only_matching': True,
29f7c58a 2483 }, {
2484 # inline playlist with not always working continuations
2485 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2486 'only_matching': True,
2487 }, {
2488 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2489 'only_matching': True,
2490 }, {
2491 'url': 'https://www.youtube.com/course',
2492 'only_matching': True,
2493 }, {
2494 'url': 'https://www.youtube.com/zsecurity',
2495 'only_matching': True,
2496 }, {
2497 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2498 'only_matching': True,
2499 }, {
2500 'url': 'https://www.youtube.com/TheYoungTurks/live',
2501 'only_matching': True,
2502 }]
2503
2504 @classmethod
2505 def suitable(cls, url):
2506 return False if YoutubeIE.suitable(url) else super(
2507 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2508
2509 def _extract_channel_id(self, webpage):
2510 channel_id = self._html_search_meta(
2511 'channelId', webpage, 'channel id', default=None)
2512 if channel_id:
2513 return channel_id
2514 channel_url = self._html_search_meta(
2515 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2516 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2517 'twitter:app:url:googleplay'), webpage, 'channel url')
2518 return self._search_regex(
2519 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2520 channel_url, 'channel id')
15f6397c 2521
8bdd16b4 2522 @staticmethod
cd7c66cf 2523 def _extract_basic_item_renderer(item):
2524 # Modified from _extract_grid_item_renderer
2525 known_renderers = (
2526 'playlistRenderer', 'videoRenderer', 'channelRenderer'
2527 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2528 )
2529 for key, renderer in item.items():
2530 if key not in known_renderers:
2531 continue
2532 return renderer
8bdd16b4 2533
8bdd16b4 2534 def _grid_entries(self, grid_renderer):
2535 for item in grid_renderer['items']:
2536 if not isinstance(item, dict):
39b62db1 2537 continue
cd7c66cf 2538 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2539 if not isinstance(renderer, dict):
2540 continue
2541 title = try_get(
2542 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2543 # playlist
2544 playlist_id = renderer.get('playlistId')
2545 if playlist_id:
2546 yield self.url_result(
2547 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2548 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2549 video_title=title)
2550 # video
2551 video_id = renderer.get('videoId')
2552 if video_id:
2553 yield self._extract_video(renderer)
2554 # channel
2555 channel_id = renderer.get('channelId')
2556 if channel_id:
2557 title = try_get(
2558 renderer, lambda x: x['title']['simpleText'], compat_str)
2559 yield self.url_result(
2560 'https://www.youtube.com/channel/%s' % channel_id,
2561 ie=YoutubeTabIE.ie_key(), video_title=title)
2562
3d3dddc9 2563 def _shelf_entries_from_content(self, shelf_renderer):
2564 content = shelf_renderer.get('content')
2565 if not isinstance(content, dict):
8bdd16b4 2566 return
cd7c66cf 2567 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2568 if renderer:
2569 # TODO: add support for nested playlists so each shelf is processed
2570 # as separate playlist
2571 # TODO: this includes only first N items
2572 for entry in self._grid_entries(renderer):
2573 yield entry
2574 renderer = content.get('horizontalListRenderer')
2575 if renderer:
2576 # TODO
2577 pass
8bdd16b4 2578
29f7c58a 2579 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2580 ep = try_get(
2581 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2582 compat_str)
2583 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2584 if shelf_url:
29f7c58a 2585 # Skipping links to another channels, note that checking for
2586 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2587 # will not work
2588 if skip_channels and '/channels?' in shelf_url:
2589 return
3d3dddc9 2590 title = try_get(
2591 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2592 yield self.url_result(shelf_url, video_title=title)
2593 # Shelf may not contain shelf URL, fallback to extraction from content
2594 for entry in self._shelf_entries_from_content(shelf_renderer):
2595 yield entry
c5e8d7af 2596
8bdd16b4 2597 def _playlist_entries(self, video_list_renderer):
2598 for content in video_list_renderer['contents']:
2599 if not isinstance(content, dict):
2600 continue
2601 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2602 if not isinstance(renderer, dict):
2603 continue
2604 video_id = renderer.get('videoId')
2605 if not video_id:
2606 continue
2607 yield self._extract_video(renderer)
07aeced6 2608
3462ffa8 2609 def _rich_entries(self, rich_grid_renderer):
2610 renderer = try_get(
70d5c17b 2611 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2612 video_id = renderer.get('videoId')
2613 if not video_id:
2614 return
2615 yield self._extract_video(renderer)
2616
8bdd16b4 2617 def _video_entry(self, video_renderer):
2618 video_id = video_renderer.get('videoId')
2619 if video_id:
2620 return self._extract_video(video_renderer)
dacb3a86 2621
8bdd16b4 2622 def _post_thread_entries(self, post_thread_renderer):
2623 post_renderer = try_get(
2624 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2625 if not post_renderer:
2626 return
2627 # video attachment
2628 video_renderer = try_get(
2629 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2630 video_id = None
2631 if video_renderer:
2632 entry = self._video_entry(video_renderer)
2633 if entry:
2634 yield entry
2635 # inline video links
2636 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2637 for run in runs:
2638 if not isinstance(run, dict):
2639 continue
2640 ep_url = try_get(
2641 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2642 if not ep_url:
2643 continue
2644 if not YoutubeIE.suitable(ep_url):
2645 continue
2646 ep_video_id = YoutubeIE._match_id(ep_url)
2647 if video_id == ep_video_id:
2648 continue
2649 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2650
8bdd16b4 2651 def _post_thread_continuation_entries(self, post_thread_continuation):
2652 contents = post_thread_continuation.get('contents')
2653 if not isinstance(contents, list):
2654 return
2655 for content in contents:
2656 renderer = content.get('backstagePostThreadRenderer')
2657 if not isinstance(renderer, dict):
2658 continue
2659 for entry in self._post_thread_entries(renderer):
2660 yield entry
07aeced6 2661
29f7c58a 2662 @staticmethod
2663 def _build_continuation_query(continuation, ctp=None):
2664 query = {
2665 'ctoken': continuation,
2666 'continuation': continuation,
2667 }
2668 if ctp:
2669 query['itct'] = ctp
2670 return query
2671
8bdd16b4 2672 @staticmethod
2673 def _extract_next_continuation_data(renderer):
2674 next_continuation = try_get(
2675 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2676 if not next_continuation:
2677 return
2678 continuation = next_continuation.get('continuation')
2679 if not continuation:
2680 return
2681 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2682 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2683
8bdd16b4 2684 @classmethod
2685 def _extract_continuation(cls, renderer):
2686 next_continuation = cls._extract_next_continuation_data(renderer)
2687 if next_continuation:
2688 return next_continuation
cc2db878 2689 contents = []
2690 for key in ('contents', 'items'):
2691 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2692 for content in contents:
2693 if not isinstance(content, dict):
2694 continue
2695 continuation_ep = try_get(
2696 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2697 dict)
2698 if not continuation_ep:
2699 continue
2700 continuation = try_get(
2701 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2702 if not continuation:
2703 continue
2704 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2705 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2706
a5c56234 2707 def _entries(self, tab, identity_token, item_id):
3462ffa8 2708
70d5c17b 2709 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2710 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2711 for content in contents:
2712 if not isinstance(content, dict):
8bdd16b4 2713 continue
70d5c17b 2714 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2715 if not is_renderer:
70d5c17b 2716 renderer = content.get('richItemRenderer')
3462ffa8 2717 if renderer:
2718 for entry in self._rich_entries(renderer):
2719 yield entry
2720 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2721 continue
3462ffa8 2722 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2723 for isr_content in isr_contents:
2724 if not isinstance(isr_content, dict):
2725 continue
69184e41 2726
2727 known_renderers = {
2728 'playlistVideoListRenderer': self._playlist_entries,
2729 'gridRenderer': self._grid_entries,
2730 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2731 'backstagePostThreadRenderer': self._post_thread_entries,
2732 'videoRenderer': lambda x: [self._video_entry(x)],
2733 }
2734 for key, renderer in isr_content.items():
2735 if key not in known_renderers:
2736 continue
2737 for entry in known_renderers[key](renderer):
2738 if entry:
2739 yield entry
3462ffa8 2740 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2741 break
70d5c17b 2742
3462ffa8 2743 if not continuation_list[0]:
2744 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2745
2746 if not continuation_list[0]:
2747 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2748
2749 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2750 tab_content = try_get(tab, lambda x: x['content'], dict)
2751 if not tab_content:
2752 return
3462ffa8 2753 parent_renderer = (
29f7c58a 2754 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2755 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2756 for entry in extract_entries(parent_renderer):
2757 yield entry
3462ffa8 2758 continuation = continuation_list[0]
8bdd16b4 2759
2760 headers = {
2761 'x-youtube-client-name': '1',
2762 'x-youtube-client-version': '2.20201112.04.01',
2763 }
2764 if identity_token:
2765 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2766
8bdd16b4 2767 for page_num in itertools.count(1):
2768 if not continuation:
2769 break
62bff2c1 2770 retries = self._downloader.params.get('extractor_retries', 3)
2771 count = -1
2772 last_error = None
2773 while count < retries:
2774 count += 1
2775 if last_error:
2776 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2777 try:
a5c56234
M
2778 response = self._call_api(
2779 ep="browse", fatal=True, headers=headers,
2780 video_id='%s page %s' % (item_id, page_num),
2781 query={
2782 'continuation': continuation['continuation'],
2783 'clickTracking': {'clickTrackingParams': continuation['itct']},
2784 },
2785 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2786 except ExtractorError as e:
62bff2c1 2787 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2788 # Downloading page may result in intermittent 5xx HTTP error
2789 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2790 last_error = 'HTTP Error %s' % e.cause.code
2791 if count < retries:
29f7c58a 2792 continue
2793 raise
62bff2c1 2794 else:
62bff2c1 2795 # Youtube sometimes sends incomplete data
2796 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2797 if response.get('continuationContents') or response.get('onResponseReceivedActions'):
2798 break
2799 last_error = 'Incomplete data recieved'
c705177d 2800 if count >= retries:
2801 self._downloader.report_error(last_error)
a5c56234
M
2802
2803 if not response:
8bdd16b4 2804 break
ebf1b291 2805
69184e41 2806 known_continuation_renderers = {
2807 'playlistVideoListContinuation': self._playlist_entries,
2808 'gridContinuation': self._grid_entries,
2809 'itemSectionContinuation': self._post_thread_continuation_entries,
2810 'sectionListContinuation': extract_entries, # for feeds
2811 }
8bdd16b4 2812 continuation_contents = try_get(
69184e41 2813 response, lambda x: x['continuationContents'], dict) or {}
2814 continuation_renderer = None
2815 for key, value in continuation_contents.items():
2816 if key not in known_continuation_renderers:
3462ffa8 2817 continue
69184e41 2818 continuation_renderer = value
2819 continuation_list = [None]
2820 for entry in known_continuation_renderers[key](continuation_renderer):
2821 yield entry
2822 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2823 break
2824 if continuation_renderer:
2825 continue
c5e8d7af 2826
a1b535bd 2827 known_renderers = {
2828 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2829 'gridVideoRenderer': (self._grid_entries, 'items'),
2830 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 2831 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 2832 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
a1b535bd 2833 }
8bdd16b4 2834 continuation_items = try_get(
2835 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2836 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2837 video_items_renderer = None
2838 for key, value in continuation_item.items():
2839 if key not in known_renderers:
8bdd16b4 2840 continue
a1b535bd 2841 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2842 continuation_list = [None]
a1b535bd 2843 for entry in known_renderers[key][0](video_items_renderer):
2844 yield entry
9ba5705a 2845 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2846 break
2847 if video_items_renderer:
2848 continue
8bdd16b4 2849 break
9558dcec 2850
8bdd16b4 2851 @staticmethod
2852 def _extract_selected_tab(tabs):
2853 for tab in tabs:
2854 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2855 return tab['tabRenderer']
2b3c2546 2856 else:
8bdd16b4 2857 raise ExtractorError('Unable to find selected tab')
b82f815f 2858
8bdd16b4 2859 @staticmethod
2860 def _extract_uploader(data):
2861 uploader = {}
2862 sidebar_renderer = try_get(
2863 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2864 if sidebar_renderer:
2865 for item in sidebar_renderer:
2866 if not isinstance(item, dict):
2867 continue
2868 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2869 if not isinstance(renderer, dict):
2870 continue
2871 owner = try_get(
2872 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2873 if owner:
2874 uploader['uploader'] = owner.get('text')
2875 uploader['uploader_id'] = try_get(
2876 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2877 uploader['uploader_url'] = urljoin(
2878 'https://www.youtube.com/',
2879 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2880 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2881
2882 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2883 playlist_id = title = description = channel_url = channel_name = channel_id = None
2884 thumbnails_list = tags = []
2885
8bdd16b4 2886 selected_tab = self._extract_selected_tab(tabs)
2887 renderer = try_get(
2888 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2889 if renderer:
b60419c5 2890 channel_name = renderer.get('title')
2891 channel_url = renderer.get('channelUrl')
2892 channel_id = renderer.get('externalId')
64c0d954 2893
64c0d954 2894 if not renderer:
2895 renderer = try_get(
2896 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2897 if renderer:
2898 title = renderer.get('title')
ecc97af3 2899 description = renderer.get('description', '')
b60419c5 2900 playlist_id = channel_id
2901 tags = renderer.get('keywords', '').split()
2902 thumbnails_list = (
2903 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2904 or try_get(
2905 data,
2906 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2907 list)
b60419c5 2908 or [])
2909
2910 thumbnails = []
2911 for t in thumbnails_list:
2912 if not isinstance(t, dict):
2913 continue
2914 thumbnail_url = url_or_none(t.get('url'))
2915 if not thumbnail_url:
2916 continue
2917 thumbnails.append({
2918 'url': thumbnail_url,
2919 'width': int_or_none(t.get('width')),
2920 'height': int_or_none(t.get('height')),
2921 })
64c0d954 2922
3462ffa8 2923 if playlist_id is None:
70d5c17b 2924 playlist_id = item_id
2925 if title is None:
b60419c5 2926 title = playlist_id
2927 title += format_field(selected_tab, 'title', ' - %s')
2928
2929 metadata = {
2930 'playlist_id': playlist_id,
2931 'playlist_title': title,
2932 'playlist_description': description,
2933 'uploader': channel_name,
2934 'uploader_id': channel_id,
2935 'uploader_url': channel_url,
2936 'thumbnails': thumbnails,
2937 'tags': tags,
2938 }
2939 if not channel_id:
2940 metadata.update(self._extract_uploader(data))
2941 metadata.update({
2942 'channel': metadata['uploader'],
2943 'channel_id': metadata['uploader_id'],
2944 'channel_url': metadata['uploader_url']})
2945 return self.playlist_result(
a5c56234 2946 self._entries(selected_tab, identity_token, playlist_id),
b60419c5 2947 **metadata)
73c4ac2c 2948
cd7c66cf 2949 def _extract_mix_playlist(self, playlist, playlist_id):
2950 page_num = 0
2951 while True:
2952 videos = list(self._playlist_entries(playlist))
2953 if not videos:
2954 return
2955 video_count = len(videos)
2956 start = min(video_count - 24, 26) if video_count > 25 else 0
2957 for item in videos[start:]:
2958 yield item
2959
2960 page_num += 1
2961 _, data = self._extract_webpage(
2962 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, videos[-1]['id']),
2963 '%s page %d' % (playlist_id, page_num))
2964 playlist = try_get(
2965 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2966
29f7c58a 2967 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2968 title = playlist.get('title') or try_get(
2969 data, lambda x: x['titleText']['simpleText'], compat_str)
2970 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 2971
2972 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 2973 playlist_url = urljoin(url, try_get(
2974 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2975 compat_str))
2976 if playlist_url and playlist_url != url:
2977 return self.url_result(
2978 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2979 video_title=title)
cd7c66cf 2980
8bdd16b4 2981 return self.playlist_result(
cd7c66cf 2982 self._extract_mix_playlist(playlist, playlist_id),
2983 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 2984
29f7c58a 2985 @staticmethod
2986 def _extract_alerts(data):
02ced43c 2987 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2988 if not isinstance(alert_dict, dict):
2989 continue
02ced43c 2990 for renderer in alert_dict:
2991 alert = alert_dict[renderer]
2992 alert_type = alert.get('type')
2993 if not alert_type:
2994 continue
2995 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2996 if message:
2997 yield alert_type, message
2998 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2999 message = try_get(run, lambda x: x['text'], compat_str)
3000 if message:
3001 yield alert_type, message
3002
29f7c58a 3003 def _extract_identity_token(self, webpage, item_id):
3004 ytcfg = self._extract_ytcfg(item_id, webpage)
3005 if ytcfg:
3006 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3007 if token:
3008 return token
3009 return self._search_regex(
3010 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3011 'identity token', default=None)
3012
cd7c66cf 3013 def _extract_webpage(self, url, item_id):
62bff2c1 3014 retries = self._downloader.params.get('extractor_retries', 3)
3015 count = -1
c705177d 3016 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3017 while count < retries:
62bff2c1 3018 count += 1
14fdfea9 3019 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3020 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3021 if count:
c705177d 3022 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3023 webpage = self._download_webpage(
3024 url, item_id,
cd7c66cf 3025 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3026 data = self._extract_yt_initial_data(item_id, webpage)
3027 err_msg = None
3028 for alert_type, alert_message in self._extract_alerts(data):
3029 if alert_type.lower() == 'error':
3030 if err_msg:
3031 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3032 err_msg = alert_message
3033 else:
3034 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3035 if err_msg:
3036 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3037 if data.get('contents') or data.get('currentVideoEndpoint'):
3038 break
c705177d 3039 if count >= retries:
3040 self._downloader.report_error(last_error)
cd7c66cf 3041 return webpage, data
3042
3043 def _real_extract(self, url):
3044 item_id = self._match_id(url)
3045 url = compat_urlparse.urlunparse(
3046 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3047
3048 # This is not matched in a channel page with a tab selected
3049 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3050 mobj = mobj.groupdict() if mobj else {}
3051 if mobj and not mobj.get('not_channel'):
3052 self._downloader.report_warning(
3053 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3054 'To download only the videos in the home page, add a "/featured" to the URL')
3055 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3056
3057 # Handle both video/playlist URLs
3058 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3059 video_id = qs.get('v', [None])[0]
3060 playlist_id = qs.get('list', [None])[0]
3061
3062 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3063 if not playlist_id:
3064 # If there is neither video or playlist ids,
3065 # youtube redirects to home page, which is undesirable
3066 raise ExtractorError('Unable to recognize tab page')
3067 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3068 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3069
3070 if video_id and playlist_id:
3071 if self._downloader.params.get('noplaylist'):
3072 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3073 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3074 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3075
3076 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3077
8bdd16b4 3078 tabs = try_get(
3079 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3080 if tabs:
cd7c66cf 3081 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 3082 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
cd7c66cf 3083
8bdd16b4 3084 playlist = try_get(
3085 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3086 if playlist:
29f7c58a 3087 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3088
a0566bbf 3089 video_id = try_get(
3090 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3091 compat_str) or video_id
8bdd16b4 3092 if video_id:
cd7c66cf 3093 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3094 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3095
8bdd16b4 3096 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3097
c5e8d7af 3098
8bdd16b4 3099class YoutubePlaylistIE(InfoExtractor):
3100 IE_DESC = 'YouTube.com playlists'
3101 _VALID_URL = r'''(?x)(?:
3102 (?:https?://)?
3103 (?:\w+\.)?
3104 (?:
3105 (?:
3106 youtube(?:kids)?\.com|
29f7c58a 3107 invidio\.us
8bdd16b4 3108 )
3109 /.*?\?.*?\blist=
3110 )?
3111 (?P<id>%(playlist_id)s)
3112 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3113 IE_NAME = 'youtube:playlist'
cdc628a4 3114 _TESTS = [{
8bdd16b4 3115 'note': 'issue #673',
3116 'url': 'PLBB231211A4F62143',
cdc628a4 3117 'info_dict': {
8bdd16b4 3118 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3119 'id': 'PLBB231211A4F62143',
3120 'uploader': 'Wickydoo',
3121 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3122 },
3123 'playlist_mincount': 29,
3124 }, {
3125 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3126 'info_dict': {
3127 'title': 'YDL_safe_search',
3128 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3129 },
3130 'playlist_count': 2,
3131 'skip': 'This playlist is private',
9558dcec 3132 }, {
8bdd16b4 3133 'note': 'embedded',
3134 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3135 'playlist_count': 4,
9558dcec 3136 'info_dict': {
8bdd16b4 3137 'title': 'JODA15',
3138 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3139 'uploader': 'milan',
3140 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3141 }
cdc628a4 3142 }, {
8bdd16b4 3143 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3144 'playlist_mincount': 982,
3145 'info_dict': {
3146 'title': '2018 Chinese New Singles (11/6 updated)',
3147 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3148 'uploader': 'LBK',
3149 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3150 }
daa0df9e 3151 }, {
29f7c58a 3152 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3153 'only_matching': True,
3154 }, {
3155 # music album playlist
3156 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3157 'only_matching': True,
3158 }]
3159
3160 @classmethod
3161 def suitable(cls, url):
3162 return False if YoutubeTabIE.suitable(url) else super(
3163 YoutubePlaylistIE, cls).suitable(url)
3164
3165 def _real_extract(self, url):
3166 playlist_id = self._match_id(url)
3167 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3168 if not qs:
3169 qs = {'list': playlist_id}
3170 return self.url_result(
3171 update_url_query('https://www.youtube.com/playlist', qs),
3172 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3173
3174
3175class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3176 IE_DESC = 'youtu.be'
29f7c58a 3177 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3178 _TESTS = [{
8bdd16b4 3179 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3180 'info_dict': {
3181 'id': 'yeWKywCrFtk',
3182 'ext': 'mp4',
3183 'title': 'Small Scale Baler and Braiding Rugs',
3184 'uploader': 'Backus-Page House Museum',
3185 'uploader_id': 'backuspagemuseum',
3186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3187 'upload_date': '20161008',
3188 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3189 'categories': ['Nonprofits & Activism'],
3190 'tags': list,
3191 'like_count': int,
3192 'dislike_count': int,
3193 },
3194 'params': {
3195 'noplaylist': True,
3196 'skip_download': True,
3197 },
39e7107d 3198 }, {
8bdd16b4 3199 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3200 'only_matching': True,
cdc628a4
PH
3201 }]
3202
8bdd16b4 3203 def _real_extract(self, url):
29f7c58a 3204 mobj = re.match(self._VALID_URL, url)
3205 video_id = mobj.group('id')
3206 playlist_id = mobj.group('playlist_id')
8bdd16b4 3207 return self.url_result(
29f7c58a 3208 update_url_query('https://www.youtube.com/watch', {
3209 'v': video_id,
3210 'list': playlist_id,
3211 'feature': 'youtu.be',
3212 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3213
3214
3215class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3216 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3217 _VALID_URL = r'ytuser:(?P<id>.+)'
3218 _TESTS = [{
3219 'url': 'ytuser:phihag',
3220 'only_matching': True,
3221 }]
3222
3223 def _real_extract(self, url):
3224 user_id = self._match_id(url)
3225 return self.url_result(
3226 'https://www.youtube.com/user/%s' % user_id,
3227 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3228
b05654f0 3229
3d3dddc9 3230class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3231 IE_NAME = 'youtube:favorites'
3232 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3233 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3234 _LOGIN_REQUIRED = True
3235 _TESTS = [{
3236 'url': ':ytfav',
3237 'only_matching': True,
3238 }, {
3239 'url': ':ytfavorites',
3240 'only_matching': True,
3241 }]
3242
3243 def _real_extract(self, url):
3244 return self.url_result(
3245 'https://www.youtube.com/playlist?list=LL',
3246 ie=YoutubeTabIE.ie_key())
3247
3248
8bdd16b4 3249class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3250 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3251 # there doesn't appear to be a real limit, for example if you search for
3252 # 'python' you get more than 8.000.000 results
3253 _MAX_RESULTS = float('inf')
78caa52a 3254 IE_NAME = 'youtube:search'
b05654f0 3255 _SEARCH_KEY = 'ytsearch'
6c894ea1 3256 _SEARCH_PARAMS = None
9dd8e46a 3257 _TESTS = []
b05654f0 3258
6c894ea1 3259 def _entries(self, query, n):
a5c56234 3260 data = {'query': query}
6c894ea1
U
3261 if self._SEARCH_PARAMS:
3262 data['params'] = self._SEARCH_PARAMS
3263 total = 0
3264 for page_num in itertools.count(1):
a5c56234
M
3265 search = self._call_api(
3266 ep='search', video_id='query "%s"' % query, fatal=False,
3267 note='Downloading page %s' % page_num, query=data)
6c894ea1 3268 if not search:
b4c08069 3269 break
6c894ea1
U
3270 slr_contents = try_get(
3271 search,
3272 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3273 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3274 list)
3275 if not slr_contents:
a22b2fd1 3276 break
0366ae87 3277
0366ae87
M
3278 # Youtube sometimes adds promoted content to searches,
3279 # changing the index location of videos and token.
3280 # So we search through all entries till we find them.
30a074c2 3281 continuation_token = None
3282 for slr_content in slr_contents:
a96c6d15 3283 if continuation_token is None:
3284 continuation_token = try_get(
3285 slr_content,
3286 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3287 compat_str)
3288
30a074c2 3289 isr_contents = try_get(
3290 slr_content,
3291 lambda x: x['itemSectionRenderer']['contents'],
3292 list)
9da76d30 3293 if not isr_contents:
30a074c2 3294 continue
3295 for content in isr_contents:
3296 if not isinstance(content, dict):
3297 continue
3298 video = content.get('videoRenderer')
3299 if not isinstance(video, dict):
3300 continue
3301 video_id = video.get('videoId')
3302 if not video_id:
3303 continue
3304
3305 yield self._extract_video(video)
3306 total += 1
3307 if total == n:
3308 return
0366ae87 3309
0366ae87 3310 if not continuation_token:
6c894ea1 3311 break
0366ae87 3312 data['continuation'] = continuation_token
b05654f0 3313
6c894ea1
U
3314 def _get_n_results(self, query, n):
3315 """Get a specified number of results for a query"""
3316 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3317
c9ae7b95 3318
a3dd9248 3319class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3320 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3321 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3322 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3323 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3324
c9ae7b95 3325
386e1dd9 3326class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3327 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3328 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3329 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3330 # _MAX_RESULTS = 100
3462ffa8 3331 _TESTS = [{
3332 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3333 'playlist_mincount': 5,
3334 'info_dict': {
3335 'title': 'youtube-dl test video',
3336 }
3337 }, {
3338 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3339 'only_matching': True,
3340 }]
3341
386e1dd9 3342 @classmethod
3343 def _make_valid_url(cls):
3344 return cls._VALID_URL
3345
3462ffa8 3346 def _real_extract(self, url):
386e1dd9 3347 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3348 query = (qs.get('search_query') or qs.get('q'))[0]
3349 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3350 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3351
3352
3353class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3354 """
25f14e9f 3355 Base class for feed extractors
3d3dddc9 3356 Subclasses must define the _FEED_NAME property.
d7ae0639 3357 """
b2e8bc1b 3358 _LOGIN_REQUIRED = True
ef2f3c7f 3359 _TESTS = []
d7ae0639
JMF
3360
3361 @property
3362 def IE_NAME(self):
78caa52a 3363 return 'youtube:%s' % self._FEED_NAME
04cc9617 3364
81f0259b 3365 def _real_initialize(self):
b2e8bc1b 3366 self._login()
81f0259b 3367
3853309f 3368 def _real_extract(self, url):
3d3dddc9 3369 return self.url_result(
3370 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3371 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3372
3373
ef2f3c7f 3374class YoutubeWatchLaterIE(InfoExtractor):
3375 IE_NAME = 'youtube:watchlater'
70d5c17b 3376 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3377 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3378 _TESTS = [{
8bdd16b4 3379 'url': ':ytwatchlater',
bc7a9cd8
S
3380 'only_matching': True,
3381 }]
25f14e9f
S
3382
3383 def _real_extract(self, url):
ef2f3c7f 3384 return self.url_result(
3385 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3386
3387
25f14e9f
S
3388class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3389 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3390 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3391 _FEED_NAME = 'recommended'
3d3dddc9 3392 _TESTS = [{
3393 'url': ':ytrec',
3394 'only_matching': True,
3395 }, {
3396 'url': ':ytrecommended',
3397 'only_matching': True,
3398 }, {
3399 'url': 'https://youtube.com',
3400 'only_matching': True,
3401 }]
1ed5b5c9 3402
1ed5b5c9 3403
25f14e9f 3404class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3405 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3406 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3407 _FEED_NAME = 'subscriptions'
3d3dddc9 3408 _TESTS = [{
3409 'url': ':ytsubs',
3410 'only_matching': True,
3411 }, {
3412 'url': ':ytsubscriptions',
3413 'only_matching': True,
3414 }]
1ed5b5c9 3415
1ed5b5c9 3416
25f14e9f 3417class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3418 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3419 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3420 _FEED_NAME = 'history'
3d3dddc9 3421 _TESTS = [{
3422 'url': ':ythistory',
3423 'only_matching': True,
3424 }]
1ed5b5c9
JMF
3425
3426
15870e90
PH
3427class YoutubeTruncatedURLIE(InfoExtractor):
3428 IE_NAME = 'youtube:truncated_url'
3429 IE_DESC = False # Do not list
975d35db 3430 _VALID_URL = r'''(?x)
b95aab84
PH
3431 (?:https?://)?
3432 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3433 (?:watch\?(?:
c4808c60 3434 feature=[a-z_]+|
b95aab84
PH
3435 annotation_id=annotation_[^&]+|
3436 x-yt-cl=[0-9]+|
c1708b89 3437 hl=[^&]*|
287be8c6 3438 t=[0-9]+
b95aab84
PH
3439 )?
3440 |
3441 attribution_link\?a=[^&]+
3442 )
3443 $
975d35db 3444 '''
15870e90 3445
c4808c60 3446 _TESTS = [{
2d3d2997 3447 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3448 'only_matching': True,
dc2fc736 3449 }, {
2d3d2997 3450 'url': 'https://www.youtube.com/watch?',
dc2fc736 3451 'only_matching': True,
b95aab84
PH
3452 }, {
3453 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3454 'only_matching': True,
3455 }, {
3456 'url': 'https://www.youtube.com/watch?feature=foo',
3457 'only_matching': True,
c1708b89
PH
3458 }, {
3459 'url': 'https://www.youtube.com/watch?hl=en-GB',
3460 'only_matching': True,
287be8c6
PH
3461 }, {
3462 'url': 'https://www.youtube.com/watch?t=2372',
3463 'only_matching': True,
c4808c60
PH
3464 }]
3465
15870e90
PH
3466 def _real_extract(self, url):
3467 raise ExtractorError(
78caa52a
PH
3468 'Did you forget to quote the URL? Remember that & is a meta '
3469 'character in most shells, so you want to put the URL in quotes, '
3867038a 3470 'like youtube-dl '
2d3d2997 3471 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3472 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3473 expected=True)
772fd5cc
PH
3474
3475
3476class YoutubeTruncatedIDIE(InfoExtractor):
3477 IE_NAME = 'youtube:truncated_id'
3478 IE_DESC = False # Do not list
b95aab84 3479 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3480
3481 _TESTS = [{
3482 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3483 'only_matching': True,
3484 }]
3485
3486 def _real_extract(self, url):
3487 video_id = self._match_id(url)
3488 raise ExtractorError(
3489 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3490 expected=True)