]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Reduce default of `--extractor-retries` to 3
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
d77ab8e2 8import random
c5e8d7af 9import re
8a784c74 10import time
e0df6211 11import traceback
c5e8d7af 12
b05654f0 13from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 14from ..compat import (
edf3e38e 15 compat_chr,
29f7c58a 16 compat_HTTPError,
8d81f3e3 17 compat_kwargs,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c5e8d7af 27 clean_html,
c5e8d7af 28 ExtractorError,
b60419c5 29 format_field,
2d30521a 30 float_or_none,
dd27fd17 31 int_or_none,
94278f72 32 mimetype2ext,
6310acf5 33 parse_codecs,
7c80519c 34 parse_duration,
dca3ff4a 35 qualities,
3995d37d 36 remove_start,
cf7e015f 37 smuggle_url,
dbdaaa23 38 str_or_none,
c93d53f5 39 str_to_int,
556dbe7f 40 try_get,
c5e8d7af
PH
41 unescapeHTML,
42 unified_strdate,
cf7e015f 43 unsmuggle_url,
8bdd16b4 44 update_url_query,
21c340b8 45 url_or_none,
6e6bc8da 46 urlencode_postdata,
8bdd16b4 47 urljoin,
c5e8d7af
PH
48)
49
5f6a1245 50
de7f3446 51class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
52 """Provide base functions for Youtube extractors"""
53 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 54 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
55
56 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
57 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
58 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 59
3462ffa8 60 _RESERVED_NAMES = (
9ba5705a 61 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
29f7c58a 62 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
63 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 64
b2e8bc1b
JMF
65 _NETRC_MACHINE = 'youtube'
66 # If True it will raise an error if no login info is provided
67 _LOGIN_REQUIRED = False
68
70d5c17b 69 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 70
25f14e9f
S
71 def _ids_to_results(self, ids):
72 return [
73 self.url_result(vid_id, 'Youtube', video_id=vid_id)
74 for vid_id in ids]
75
b2e8bc1b 76 def _login(self):
83317f69 77 """
78 Attempt to log in to YouTube.
79 True is returned if successful or skipped.
80 False is returned if login failed.
81
82 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
83 """
68217024 84 username, password = self._get_login_info()
b2e8bc1b
JMF
85 # No authentication to be performed
86 if username is None:
70d35d16 87 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 88 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 89 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
90 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 91 return True
b2e8bc1b 92
7cc3570e
PH
93 login_page = self._download_webpage(
94 self._LOGIN_URL, None,
69ea8ca4
PH
95 note='Downloading login page',
96 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
97 if login_page is False:
98 return
b2e8bc1b 99
1212e997 100 login_form = self._hidden_inputs(login_page)
c5e8d7af 101
e00eb564
S
102 def req(url, f_req, note, errnote):
103 data = login_form.copy()
104 data.update({
105 'pstMsg': 1,
106 'checkConnection': 'youtube',
107 'checkedDomains': 'youtube',
108 'hl': 'en',
109 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 110 'f.req': json.dumps(f_req),
e00eb564
S
111 'flowName': 'GlifWebSignIn',
112 'flowEntry': 'ServiceLogin',
baf67a60
S
113 # TODO: reverse actual botguard identifier generation algo
114 'bgRequest': '["identifier",""]',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
9a6628aa
S
181 login_challenge = try_get(res, lambda x: x[0][0], list)
182 if login_challenge:
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 187 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
231 else:
232 CHALLENGES = {
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
236 }
237 challenge = CHALLENGES.get(
238 challenge_str,
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
241 return False
3995d37d
S
242 else:
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
244
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
247 return False
e00eb564
S
248
249 check_cookie_results = self._download_webpage(
3995d37d
S
250 check_cookie_url, None, 'Checking cookie', fatal=False)
251
252 if check_cookie_results is False:
253 return False
e00eb564 254
3995d37d
S
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
b2e8bc1b 257 return False
e00eb564 258
b2e8bc1b
JMF
259 return True
260
30226342 261 def _download_webpage_handle(self, *args, **kwargs):
c1148516 262 query = kwargs.get('query', {}).copy()
c1148516 263 kwargs['query'] = query
30226342 264 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
265 *args, **compat_kwargs(kwargs))
266
b2e8bc1b
JMF
267 def _real_initialize(self):
268 if self._downloader is None:
269 return
b2e8bc1b
JMF
270 if not self._login():
271 return
c5e8d7af 272
8bdd16b4 273 _DEFAULT_API_DATA = {
274 'context': {
275 'client': {
276 'clientName': 'WEB',
277 'clientVersion': '2.20201021.03.00',
278 }
279 },
280 }
8377574c 281
a0566bbf 282 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 283 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
284 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 285
545cc85d 286 def _call_api(self, ep, query, video_id, fatal=True):
8bdd16b4 287 data = self._DEFAULT_API_DATA.copy()
288 data.update(query)
9833e7a0 289
545cc85d 290 return self._download_json(
8bdd16b4 291 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
292 note='Downloading API JSON', errnote='Unable to download API page',
545cc85d 293 data=json.dumps(data).encode('utf8'), fatal=fatal,
8bdd16b4 294 headers={'content-type': 'application/json'},
295 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 296
8bdd16b4 297 def _extract_yt_initial_data(self, video_id, webpage):
298 return self._parse_json(
299 self._search_regex(
29f7c58a 300 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 301 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 302 video_id)
0c148415 303
29f7c58a 304 def _extract_ytcfg(self, video_id, webpage):
305 return self._parse_json(
306 self._search_regex(
307 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
308 default='{}'), video_id, fatal=False)
309
30a074c2 310 def _extract_video(self, renderer):
311 video_id = renderer.get('videoId')
312 title = try_get(
313 renderer,
314 (lambda x: x['title']['runs'][0]['text'],
315 lambda x: x['title']['simpleText']), compat_str)
316 description = try_get(
317 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
318 compat_str)
319 duration = parse_duration(try_get(
320 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
321 view_count_text = try_get(
322 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
323 view_count = str_to_int(self._search_regex(
324 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
325 'view count', default=None))
326 uploader = try_get(
bc2ca1bb 327 renderer,
328 (lambda x: x['ownerText']['runs'][0]['text'],
329 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 330 return {
331 '_type': 'url_transparent',
332 'ie_key': YoutubeIE.ie_key(),
333 'id': video_id,
334 'url': video_id,
335 'title': title,
336 'description': description,
337 'duration': duration,
338 'view_count': view_count,
339 'uploader': uploader,
340 }
341
0c148415 342
360e1ca5 343class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 344 IE_DESC = 'YouTube.com'
bc2ca1bb 345 _INVIDIOUS_SITES = (
346 # invidious-redirect websites
347 r'(?:www\.)?redirect\.invidious\.io',
348 r'(?:(?:www|dev)\.)?invidio\.us',
349 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
350 r'(?:www\.)?invidious\.pussthecat\.org',
351 r'(?:www\.)?invidious\.048596\.xyz',
352 r'(?:www\.)?invidious\.zee\.li',
353 r'(?:www\.)?vid\.puffyan\.us',
354 r'(?:(?:www|au)\.)?ytprivate\.com',
355 r'(?:www\.)?invidious\.namazso\.eu',
356 r'(?:www\.)?invidious\.ethibox\.fr',
357 r'(?:www\.)?inv\.skyn3t\.in',
358 r'(?:www\.)?invidious\.himiko\.cloud',
359 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
360 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
361 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
362 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
363 # youtube-dl invidious instances list
364 r'(?:(?:www|no)\.)?invidiou\.sh',
365 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
366 r'(?:www\.)?invidious\.kabi\.tk',
367 r'(?:www\.)?invidious\.13ad\.de',
368 r'(?:www\.)?invidious\.mastodon\.host',
369 r'(?:www\.)?invidious\.zapashcanon\.fr',
370 r'(?:www\.)?invidious\.kavin\.rocks',
371 r'(?:www\.)?invidious\.tube',
372 r'(?:www\.)?invidiou\.site',
373 r'(?:www\.)?invidious\.site',
374 r'(?:www\.)?invidious\.xyz',
375 r'(?:www\.)?invidious\.nixnet\.xyz',
376 r'(?:www\.)?invidious\.drycat\.fr',
377 r'(?:www\.)?tube\.poal\.co',
378 r'(?:www\.)?tube\.connect\.cafe',
379 r'(?:www\.)?vid\.wxzm\.sx',
380 r'(?:www\.)?vid\.mint\.lgbt',
381 r'(?:www\.)?yewtu\.be',
382 r'(?:www\.)?yt\.elukerio\.org',
383 r'(?:www\.)?yt\.lelux\.fi',
384 r'(?:www\.)?invidious\.ggc-project\.de',
385 r'(?:www\.)?yt\.maisputain\.ovh',
386 r'(?:www\.)?invidious\.toot\.koeln',
387 r'(?:www\.)?invidious\.fdn\.fr',
388 r'(?:www\.)?watch\.nettohikari\.com',
389 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
390 r'(?:www\.)?qklhadlycap4cnod\.onion',
391 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
392 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
393 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
394 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
395 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
396 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
397 )
cb7dfeea 398 _VALID_URL = r"""(?x)^
c5e8d7af 399 (
edb53e2d 400 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 401 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
402 (?:www\.)?deturl\.com/www\.youtube\.com|
403 (?:www\.)?pwnyoutube\.com|
404 (?:www\.)?hooktube\.com|
405 (?:www\.)?yourepeat\.com|
406 tube\.majestyc\.net|
407 %(invidious)s|
408 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
409 (?:.*?\#/)? # handle anchor (#/) redirect urls
410 (?: # the various things that can precede the ID:
ac7553d0 411 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 412 |(?: # or the v= param in all its forms
f7000f3a 413 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 414 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 415 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
416 v=
417 )
f4b05232 418 ))
cbaed4bb
S
419 |(?:
420 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
421 vid\.plus| # or vid.plus/xxxx
422 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 423 %(invidious)s
cbaed4bb 424 )/
edb53e2d 425 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 426 )
c5e8d7af 427 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 428 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
429 (?!.*?\blist=
430 (?:
431 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
432 WL # WL are handled by the watch later IE
433 )
434 )
c5e8d7af 435 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 436 $""" % {
437 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
438 'invidious': '|'.join(_INVIDIOUS_SITES),
439 }
e40c758c 440 _PLAYER_INFO_RE = (
cc2db878 441 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
442 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 443 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 444 )
2c62dc26 445 _formats = {
c2d3cb4c 446 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
447 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
448 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
449 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
450 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
451 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
452 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
453 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 454 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 455 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
456 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
457 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
458 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
459 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
460 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 461 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 462 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
463 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 464
465
466 # 3D videos
c2d3cb4c 467 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
468 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
469 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
470 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 471 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
472 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
473 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 474
96fb5605 475 # Apple HTTP Live Streaming
11f12195 476 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 477 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
478 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
479 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
480 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
481 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 482 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
483 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
484
485 # DASH mp4 video
d23028a8
S
486 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
487 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
488 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 491 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
492 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
493 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
494 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
495 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
496 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
497 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 498
f6f1fc92 499 # Dash mp4 audio
d23028a8
S
500 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
501 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
502 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
503 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
504 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
505 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
506 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
507
508 # Dash webm
d23028a8
S
509 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
510 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
512 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
513 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
514 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
515 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
516 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
517 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
521 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 524 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
525 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
527 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
528 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
529 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
530 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
531
532 # Dash webm audio
d23028a8
S
533 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
534 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 535
0857baad 536 # Dash webm audio with opus inside
d23028a8
S
537 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
538 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
539 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 540
ce6b9a2d
PH
541 # RTMP (unnamed)
542 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
543
544 # av01 video only formats sometimes served with "unknown" codecs
545 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
546 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
547 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
548 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 549 }
29f7c58a 550 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 551
fd5c4aab
S
552 _GEO_BYPASS = False
553
78caa52a 554 IE_NAME = 'youtube'
2eb88d95
PH
555 _TESTS = [
556 {
2d3d2997 557 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
558 'info_dict': {
559 'id': 'BaW_jenozKc',
560 'ext': 'mp4',
3867038a 561 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
562 'uploader': 'Philipp Hagemeister',
563 'uploader_id': 'phihag',
ec85ded8 564 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
565 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
566 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 567 'upload_date': '20121002',
3867038a 568 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 569 'categories': ['Science & Technology'],
3867038a 570 'tags': ['youtube-dl'],
556dbe7f 571 'duration': 10,
dbdaaa23 572 'view_count': int,
3e7c1224
PH
573 'like_count': int,
574 'dislike_count': int,
7c80519c 575 'start_time': 1,
297a564b 576 'end_time': 9,
2eb88d95 577 }
0e853ca4 578 },
fccd3771 579 {
4bc3a23e
PH
580 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
581 'note': 'Embed-only video (#1746)',
582 'info_dict': {
583 'id': 'yZIXLfi8CZQ',
584 'ext': 'mp4',
585 'upload_date': '20120608',
586 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
587 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
588 'uploader': 'SET India',
94bfcd23 589 'uploader_id': 'setindia',
ec85ded8 590 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 591 'age_limit': 18,
545cc85d 592 },
593 'skip': 'Private video',
fccd3771 594 },
11b56058 595 {
8bdd16b4 596 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
597 'note': 'Use the first video ID in the URL',
598 'info_dict': {
599 'id': 'BaW_jenozKc',
600 'ext': 'mp4',
3867038a 601 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
602 'uploader': 'Philipp Hagemeister',
603 'uploader_id': 'phihag',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 605 'upload_date': '20121002',
3867038a 606 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 607 'categories': ['Science & Technology'],
3867038a 608 'tags': ['youtube-dl'],
556dbe7f 609 'duration': 10,
dbdaaa23 610 'view_count': int,
11b56058
PM
611 'like_count': int,
612 'dislike_count': int,
34a7de29
S
613 },
614 'params': {
615 'skip_download': True,
616 },
11b56058 617 },
dd27fd17 618 {
2d3d2997 619 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
620 'note': '256k DASH audio (format 141) via DASH manifest',
621 'info_dict': {
622 'id': 'a9LDPn-MO4I',
623 'ext': 'm4a',
624 'upload_date': '20121002',
625 'uploader_id': '8KVIDEO',
ec85ded8 626 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
627 'description': '',
628 'uploader': '8KVIDEO',
629 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 630 },
4bc3a23e
PH
631 'params': {
632 'youtube_include_dash_manifest': True,
633 'format': '141',
4919603f 634 },
de3c7fe0 635 'skip': 'format 141 not served anymore',
dd27fd17 636 },
8bdd16b4 637 # DASH manifest with encrypted signature
638 {
639 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
640 'info_dict': {
641 'id': 'IB3lcPjvWLA',
642 'ext': 'm4a',
643 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
644 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
645 'duration': 244,
646 'uploader': 'AfrojackVEVO',
647 'uploader_id': 'AfrojackVEVO',
648 'upload_date': '20131011',
cc2db878 649 'abr': 129.495,
8bdd16b4 650 },
651 'params': {
652 'youtube_include_dash_manifest': True,
653 'format': '141/bestaudio[ext=m4a]',
654 },
655 },
aa79ac0c
PH
656 # Controversy video
657 {
658 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
659 'info_dict': {
660 'id': 'T4XJQO3qol8',
661 'ext': 'mp4',
556dbe7f 662 'duration': 219,
aa79ac0c 663 'upload_date': '20100909',
4fe54c12 664 'uploader': 'Amazing Atheist',
aa79ac0c 665 'uploader_id': 'TheAmazingAtheist',
ec85ded8 666 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 667 'title': 'Burning Everyone\'s Koran',
545cc85d 668 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 669 }
c522adb1 670 },
dd2d55f1 671 # Normal age-gate video (embed allowed)
c522adb1 672 {
2d3d2997 673 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
674 'info_dict': {
675 'id': 'HtVdAasjOgU',
676 'ext': 'mp4',
677 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 678 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 679 'duration': 142,
c522adb1
JMF
680 'uploader': 'The Witcher',
681 'uploader_id': 'WitcherGame',
ec85ded8 682 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 683 'upload_date': '20140605',
34952f09 684 'age_limit': 18,
c522adb1
JMF
685 },
686 },
8bdd16b4 687 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
688 # YouTube Red ad is not captured for creator
689 {
690 'url': '__2ABJjxzNo',
691 'info_dict': {
692 'id': '__2ABJjxzNo',
693 'ext': 'mp4',
694 'duration': 266,
695 'upload_date': '20100430',
696 'uploader_id': 'deadmau5',
697 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 698 'creator': 'deadmau5',
699 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 700 'uploader': 'deadmau5',
701 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 702 'alt_title': 'Some Chords',
8bdd16b4 703 },
704 'expected_warnings': [
705 'DASH manifest missing',
706 ]
707 },
067aa17e 708 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
709 {
710 'url': 'lqQg6PlCWgI',
711 'info_dict': {
712 'id': 'lqQg6PlCWgI',
713 'ext': 'mp4',
556dbe7f 714 'duration': 6085,
90227264 715 'upload_date': '20150827',
cbe2bd91 716 'uploader_id': 'olympic',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 718 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 719 'uploader': 'Olympic',
cbe2bd91
PH
720 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
721 },
722 'params': {
723 'skip_download': 'requires avconv',
e52a40ab 724 }
cbe2bd91 725 },
6271f1ca
PH
726 # Non-square pixels
727 {
728 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
729 'info_dict': {
730 'id': '_b-2C3KPAM0',
731 'ext': 'mp4',
732 'stretched_ratio': 16 / 9.,
556dbe7f 733 'duration': 85,
6271f1ca
PH
734 'upload_date': '20110310',
735 'uploader_id': 'AllenMeow',
ec85ded8 736 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 737 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 738 'uploader': '孫ᄋᄅ',
6271f1ca
PH
739 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
740 },
06b491eb
S
741 },
742 # url_encoded_fmt_stream_map is empty string
743 {
744 'url': 'qEJwOuvDf7I',
745 'info_dict': {
746 'id': 'qEJwOuvDf7I',
f57b7835 747 'ext': 'webm',
06b491eb
S
748 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
749 'description': '',
750 'upload_date': '20150404',
751 'uploader_id': 'spbelect',
752 'uploader': 'Наблюдатели Петербурга',
753 },
754 'params': {
755 'skip_download': 'requires avconv',
e323cf3f
S
756 },
757 'skip': 'This live event has ended.',
06b491eb 758 },
067aa17e 759 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
760 {
761 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
762 'info_dict': {
763 'id': 'FIl7x6_3R5Y',
eb6793ba 764 'ext': 'webm',
da77d856
S
765 'title': 'md5:7b81415841e02ecd4313668cde88737a',
766 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 767 'duration': 220,
da77d856
S
768 'upload_date': '20150625',
769 'uploader_id': 'dorappi2000',
ec85ded8 770 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 771 'uploader': 'dorappi2000',
eb6793ba 772 'formats': 'mincount:31',
da77d856 773 },
eb6793ba 774 'skip': 'not actual anymore',
2ee8f5d8 775 },
8a1a26ce
YCH
776 # DASH manifest with segment_list
777 {
778 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
779 'md5': '8ce563a1d667b599d21064e982ab9e31',
780 'info_dict': {
781 'id': 'CsmdDsKjzN8',
782 'ext': 'mp4',
17ee98e1 783 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
784 'uploader': 'Airtek',
785 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
786 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
787 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
788 },
789 'params': {
790 'youtube_include_dash_manifest': True,
791 'format': '135', # bestvideo
be49068d
S
792 },
793 'skip': 'This live event has ended.',
2ee8f5d8 794 },
cf7e015f
S
795 {
796 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 797 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 798 'info_dict': {
545cc85d 799 'id': 'jvGDaLqkpTg',
800 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
801 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
802 },
803 'playlist': [{
804 'info_dict': {
545cc85d 805 'id': 'jvGDaLqkpTg',
cf7e015f 806 'ext': 'mp4',
545cc85d 807 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
808 'description': 'md5:e03b909557865076822aa169218d6a5d',
809 'duration': 10643,
810 'upload_date': '20161111',
811 'uploader': 'Team PGP',
812 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
545cc85d 817 'id': '3AKt1R1aDnw',
cf7e015f 818 'ext': 'mp4',
545cc85d 819 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
820 'description': 'md5:e03b909557865076822aa169218d6a5d',
821 'duration': 10991,
822 'upload_date': '20161111',
823 'uploader': 'Team PGP',
824 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
826 },
827 }, {
828 'info_dict': {
545cc85d 829 'id': 'RtAMM00gpVc',
cf7e015f 830 'ext': 'mp4',
545cc85d 831 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
832 'description': 'md5:e03b909557865076822aa169218d6a5d',
833 'duration': 10995,
834 'upload_date': '20161111',
835 'uploader': 'Team PGP',
836 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
838 },
839 }, {
840 'info_dict': {
545cc85d 841 'id': '6N2fdlP3C5U',
cf7e015f 842 'ext': 'mp4',
545cc85d 843 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
844 'description': 'md5:e03b909557865076822aa169218d6a5d',
845 'duration': 10990,
846 'upload_date': '20161111',
847 'uploader': 'Team PGP',
848 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
849 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
850 },
851 }],
852 'params': {
853 'skip_download': True,
854 },
cbaed4bb 855 },
f9f49d87 856 {
067aa17e 857 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
858 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
859 'info_dict': {
860 'id': 'gVfLd0zydlo',
861 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
862 },
863 'playlist_count': 2,
be49068d 864 'skip': 'Not multifeed anymore',
f9f49d87 865 },
cbaed4bb 866 {
2d3d2997 867 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 868 'only_matching': True,
0e49d9a6 869 },
6d4fc66b 870 {
2d3d2997 871 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
872 'only_matching': True,
873 },
0e49d9a6 874 {
067aa17e 875 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 876 # Also tests cut-off URL expansion in video description (see
067aa17e
S
877 # https://github.com/ytdl-org/youtube-dl/issues/1892,
878 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
879 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
880 'info_dict': {
881 'id': 'lsguqyKfVQg',
882 'ext': 'mp4',
883 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 884 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 885 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 886 'duration': 133,
0e49d9a6
LL
887 'upload_date': '20151119',
888 'uploader_id': 'IronSoulElf',
ec85ded8 889 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 890 'uploader': 'IronSoulElf',
eb6793ba
S
891 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
892 'track': 'Dark Walk - Position Music',
893 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 894 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
895 },
896 'params': {
897 'skip_download': True,
898 },
899 },
61f92af1 900 {
067aa17e 901 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
902 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
903 'only_matching': True,
904 },
313dfc45
LL
905 {
906 # Video with yt:stretch=17:0
907 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
908 'info_dict': {
909 'id': 'Q39EVAstoRM',
910 'ext': 'mp4',
911 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
912 'description': 'md5:ee18a25c350637c8faff806845bddee9',
913 'upload_date': '20151107',
914 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
915 'uploader': 'CH GAMER DROID',
916 },
917 'params': {
918 'skip_download': True,
919 },
be49068d 920 'skip': 'This video does not exist.',
313dfc45 921 },
7caf9830
S
922 {
923 # Video licensed under Creative Commons
924 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
925 'info_dict': {
926 'id': 'M4gD1WSo5mA',
927 'ext': 'mp4',
928 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
929 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 930 'duration': 721,
7caf9830
S
931 'upload_date': '20150127',
932 'uploader_id': 'BerkmanCenter',
ec85ded8 933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 934 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
935 'license': 'Creative Commons Attribution license (reuse allowed)',
936 },
937 'params': {
938 'skip_download': True,
939 },
940 },
fd050249
S
941 {
942 # Channel-like uploader_url
943 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
944 'info_dict': {
945 'id': 'eQcmzGIKrzg',
946 'ext': 'mp4',
947 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 948 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 949 'duration': 4060,
fd050249 950 'upload_date': '20151119',
eb6793ba 951 'uploader': 'Bernie Sanders',
fd050249 952 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
954 'license': 'Creative Commons Attribution license (reuse allowed)',
955 },
956 'params': {
957 'skip_download': True,
958 },
959 },
040ac686
S
960 {
961 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
962 'only_matching': True,
7f29cf54
S
963 },
964 {
067aa17e 965 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
966 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
967 'only_matching': True,
6496ccb4
S
968 },
969 {
970 # Rental video preview
971 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
972 'info_dict': {
973 'id': 'uGpuVWrhIzE',
974 'ext': 'mp4',
975 'title': 'Piku - Trailer',
976 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
977 'upload_date': '20150811',
978 'uploader': 'FlixMatrix',
979 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
981 'license': 'Standard YouTube License',
982 },
983 'params': {
984 'skip_download': True,
985 },
eb6793ba 986 'skip': 'This video is not available.',
022a5d66 987 },
12afdc2a
S
988 {
989 # YouTube Red video with episode data
990 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
991 'info_dict': {
992 'id': 'iqKdEhx-dD4',
993 'ext': 'mp4',
994 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 995 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 996 'duration': 2085,
12afdc2a
S
997 'upload_date': '20170118',
998 'uploader': 'Vsauce',
999 'uploader_id': 'Vsauce',
1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1001 'series': 'Mind Field',
1002 'season_number': 1,
1003 'episode_number': 1,
1004 },
1005 'params': {
1006 'skip_download': True,
1007 },
1008 'expected_warnings': [
1009 'Skipping DASH manifest',
1010 ],
1011 },
c7121fa7
S
1012 {
1013 # The following content has been identified by the YouTube community
1014 # as inappropriate or offensive to some audiences.
1015 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1016 'info_dict': {
1017 'id': '6SJNVb0GnPI',
1018 'ext': 'mp4',
1019 'title': 'Race Differences in Intelligence',
1020 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1021 'duration': 965,
1022 'upload_date': '20140124',
1023 'uploader': 'New Century Foundation',
1024 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1026 },
1027 'params': {
1028 'skip_download': True,
1029 },
545cc85d 1030 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1031 },
022a5d66
S
1032 {
1033 # itag 212
1034 'url': '1t24XAntNCY',
1035 'only_matching': True,
fd5c4aab
S
1036 },
1037 {
1038 # geo restricted to JP
1039 'url': 'sJL6WA-aGkQ',
1040 'only_matching': True,
1041 },
cd5a74a2
S
1042 {
1043 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1044 'only_matching': True,
1045 },
bc2ca1bb 1046 {
1047 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1048 'only_matching': True,
1049 },
1050 {
1051 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1052 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1053 'only_matching': True,
1054 },
825cd268
RA
1055 {
1056 # DRM protected
1057 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1058 'only_matching': True,
4fe54c12
S
1059 },
1060 {
1061 # Video with unsupported adaptive stream type formats
1062 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1063 'info_dict': {
1064 'id': 'Z4Vy8R84T1U',
1065 'ext': 'mp4',
1066 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1067 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1068 'duration': 433,
1069 'upload_date': '20130923',
1070 'uploader': 'Amelia Putri Harwita',
1071 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1073 'formats': 'maxcount:10',
1074 },
1075 'params': {
1076 'skip_download': True,
1077 'youtube_include_dash_manifest': False,
1078 },
5429d6a9 1079 'skip': 'not actual anymore',
5caabd3c 1080 },
1081 {
822b9d9c 1082 # Youtube Music Auto-generated description
5caabd3c 1083 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1084 'info_dict': {
1085 'id': 'MgNrAu2pzNs',
1086 'ext': 'mp4',
1087 'title': 'Voyeur Girl',
1088 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1089 'upload_date': '20190312',
5429d6a9
S
1090 'uploader': 'Stephen - Topic',
1091 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1092 'artist': 'Stephen',
1093 'track': 'Voyeur Girl',
1094 'album': 'it\'s too much love to know my dear',
1095 'release_date': '20190313',
1096 'release_year': 2019,
1097 },
1098 'params': {
1099 'skip_download': True,
1100 },
1101 },
66b48727
RA
1102 {
1103 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1104 'only_matching': True,
1105 },
011e75e6
S
1106 {
1107 # invalid -> valid video id redirection
1108 'url': 'DJztXj2GPfl',
1109 'info_dict': {
1110 'id': 'DJztXj2GPfk',
1111 'ext': 'mp4',
1112 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1113 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1114 'upload_date': '20090125',
1115 'uploader': 'Prochorowka',
1116 'uploader_id': 'Prochorowka',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1118 'artist': 'Panjabi MC',
1119 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1120 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
545cc85d 1125 'skip': 'Video unavailable',
ea74e00b
DP
1126 },
1127 {
1128 # empty description results in an empty string
1129 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1130 'info_dict': {
1131 'id': 'x41yOUIvK2k',
1132 'ext': 'mp4',
1133 'title': 'IMG 3456',
1134 'description': '',
1135 'upload_date': '20170613',
1136 'uploader_id': 'ElevageOrVert',
1137 'uploader': 'ElevageOrVert',
1138 },
1139 'params': {
1140 'skip_download': True,
1141 },
1142 },
a0566bbf 1143 {
29f7c58a 1144 # with '};' inside yt initial data (see [1])
1145 # see [2] for an example with '};' inside ytInitialPlayerResponse
1146 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1147 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1148 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1149 'info_dict': {
1150 'id': 'CHqg6qOn4no',
1151 'ext': 'mp4',
1152 'title': 'Part 77 Sort a list of simple types in c#',
1153 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1154 'upload_date': '20130831',
1155 'uploader_id': 'kudvenkat',
1156 'uploader': 'kudvenkat',
1157 },
1158 'params': {
1159 'skip_download': True,
1160 },
1161 },
29f7c58a 1162 {
1163 # another example of '};' in ytInitialData
1164 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1165 'only_matching': True,
1166 },
1167 {
1168 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1169 'only_matching': True,
1170 },
545cc85d 1171 {
cc2db878 1172 # https://github.com/ytdl-org/youtube-dl/pull/28094
1173 'url': 'OtqTfy26tG0',
1174 'info_dict': {
1175 'id': 'OtqTfy26tG0',
1176 'ext': 'mp4',
1177 'title': 'Burn Out',
1178 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1179 'upload_date': '20141120',
1180 'uploader': 'The Cinematic Orchestra - Topic',
1181 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1182 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1183 'artist': 'The Cinematic Orchestra',
1184 'track': 'Burn Out',
1185 'album': 'Every Day',
1186 'release_data': None,
1187 'release_year': None,
1188 },
1189 'params': {
1190 'skip_download': True,
1191 },
545cc85d 1192 },
bc2ca1bb 1193 {
1194 # controversial video, only works with bpctr when authenticated with cookies
1195 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1196 'only_matching': True,
1197 },
2eb88d95
PH
1198 ]
1199
e0df6211
PH
1200 def __init__(self, *args, **kwargs):
1201 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1202 self._code_cache = {}
83799698 1203 self._player_cache = {}
e0df6211 1204
60064c53
PH
1205 def _signature_cache_id(self, example_sig):
1206 """ Return a string representation of a signature """
78caa52a 1207 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1208
e40c758c
S
1209 @classmethod
1210 def _extract_player_info(cls, player_url):
1211 for player_re in cls._PLAYER_INFO_RE:
1212 id_m = re.search(player_re, player_url)
1213 if id_m:
1214 break
1215 else:
c081b35c 1216 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1217 return id_m.group('id')
e40c758c
S
1218
1219 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1220 player_id = self._extract_player_info(player_url)
e0df6211 1221
c4417ddb 1222 # Read from filesystem cache
545cc85d 1223 func_id = 'js_%s_%s' % (
1224 player_id, self._signature_cache_id(example_sig))
c4417ddb 1225 assert os.path.basename(func_id) == func_id
a0e07d31 1226
69ea8ca4 1227 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1228 if cache_spec is not None:
78caa52a 1229 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1230
545cc85d 1231 if player_id not in self._code_cache:
1232 self._code_cache[player_id] = self._download_webpage(
e0df6211 1233 player_url, video_id,
545cc85d 1234 note='Downloading player ' + player_id,
69ea8ca4 1235 errnote='Download of %s failed' % player_url)
545cc85d 1236 code = self._code_cache[player_id]
1237 res = self._parse_sig_js(code)
e0df6211 1238
785521bf
PH
1239 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1240 cache_res = res(test_string)
1241 cache_spec = [ord(c) for c in cache_res]
83799698 1242
69ea8ca4 1243 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1244 return res
1245
60064c53 1246 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1247 def gen_sig_code(idxs):
1248 def _genslice(start, end, step):
78caa52a 1249 starts = '' if start == 0 else str(start)
8bcc8756 1250 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1251 steps = '' if step == 1 else (':%d' % step)
78caa52a 1252 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1253
1254 step = None
7af808a5
PH
1255 # Quelch pyflakes warnings - start will be set when step is set
1256 start = '(Never used)'
edf3e38e
PH
1257 for i, prev in zip(idxs[1:], idxs[:-1]):
1258 if step is not None:
1259 if i - prev == step:
1260 continue
1261 yield _genslice(start, prev, step)
1262 step = None
1263 continue
1264 if i - prev in [-1, 1]:
1265 step = i - prev
1266 start = prev
1267 continue
1268 else:
78caa52a 1269 yield 's[%d]' % prev
edf3e38e 1270 if step is None:
78caa52a 1271 yield 's[%d]' % i
edf3e38e
PH
1272 else:
1273 yield _genslice(start, i, step)
1274
78caa52a 1275 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1276 cache_res = func(test_string)
edf3e38e 1277 cache_spec = [ord(c) for c in cache_res]
78caa52a 1278 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1279 signature_id_tuple = '(%s)' % (
1280 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1281 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1282 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1283 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1284
e0df6211
PH
1285 def _parse_sig_js(self, jscode):
1286 funcname = self._search_regex(
abefc03f
S
1287 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1288 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1289 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1290 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1291 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1292 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1293 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1294 # Obsolete patterns
1295 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1296 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1297 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1298 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1299 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1300 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1301 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1302 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1303 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1304
1305 jsi = JSInterpreter(jscode)
1306 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1307 return lambda s: initial_function([s])
1308
545cc85d 1309 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1310 """Turn the encrypted s field into a working signature"""
6b37f0be 1311
c8bf86d5 1312 if player_url is None:
69ea8ca4 1313 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1314
69ea8ca4 1315 if player_url.startswith('//'):
78caa52a 1316 player_url = 'https:' + player_url
3c90cc8b
S
1317 elif not re.match(r'https?://', player_url):
1318 player_url = compat_urlparse.urljoin(
1319 'https://www.youtube.com', player_url)
c8bf86d5 1320 try:
62af3a0e 1321 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1322 if player_id not in self._player_cache:
1323 func = self._extract_signature_function(
60064c53 1324 video_id, player_url, s
c8bf86d5
PH
1325 )
1326 self._player_cache[player_id] = func
1327 func = self._player_cache[player_id]
1328 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1329 self._print_sig_code(func, s)
c8bf86d5
PH
1330 return func(s)
1331 except Exception as e:
1332 tb = traceback.format_exc()
1333 raise ExtractorError(
78caa52a 1334 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1335
545cc85d 1336 def _mark_watched(self, video_id, player_response):
21c340b8
S
1337 playback_url = url_or_none(try_get(
1338 player_response,
545cc85d 1339 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1340 if not playback_url:
1341 return
1342 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1343 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1344
1345 # cpn generation algorithm is reverse engineered from base.js.
1346 # In fact it works even with dummy cpn.
1347 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1348 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1349
1350 qs.update({
1351 'ver': ['2'],
1352 'cpn': [cpn],
1353 })
1354 playback_url = compat_urlparse.urlunparse(
15707c7e 1355 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1356
1357 self._download_webpage(
1358 playback_url, video_id, 'Marking watched',
1359 'Unable to mark watched', fatal=False)
1360
66c9fa36
S
1361 @staticmethod
1362 def _extract_urls(webpage):
1363 # Embedded YouTube player
1364 entries = [
1365 unescapeHTML(mobj.group('url'))
1366 for mobj in re.finditer(r'''(?x)
1367 (?:
1368 <iframe[^>]+?src=|
1369 data-video-url=|
1370 <embed[^>]+?src=|
1371 embedSWF\(?:\s*|
1372 <object[^>]+data=|
1373 new\s+SWFObject\(
1374 )
1375 (["\'])
1376 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1377 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1378 \1''', webpage)]
1379
1380 # lazyYT YouTube embed
1381 entries.extend(list(map(
1382 unescapeHTML,
1383 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1384
1385 # Wordpress "YouTube Video Importer" plugin
1386 matches = re.findall(r'''(?x)<div[^>]+
1387 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1388 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1389 entries.extend(m[-1] for m in matches)
1390
1391 return entries
1392
1393 @staticmethod
1394 def _extract_url(webpage):
1395 urls = YoutubeIE._extract_urls(webpage)
1396 return urls[0] if urls else None
1397
97665381
PH
1398 @classmethod
1399 def extract_id(cls, url):
1400 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1401 if mobj is None:
69ea8ca4 1402 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1403 video_id = mobj.group(2)
1404 return video_id
1405
545cc85d 1406 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1407 chapters_list = try_get(
8bdd16b4 1408 data,
84213ea8
S
1409 lambda x: x['playerOverlays']
1410 ['playerOverlayRenderer']
1411 ['decoratedPlayerBarRenderer']
1412 ['decoratedPlayerBarRenderer']
1413 ['playerBar']
1414 ['chapteredPlayerBarRenderer']
1415 ['chapters'],
1416 list)
1417 if not chapters_list:
1418 return
1419
1420 def chapter_time(chapter):
1421 return float_or_none(
1422 try_get(
1423 chapter,
1424 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1425 int),
1426 scale=1000)
1427 chapters = []
1428 for next_num, chapter in enumerate(chapters_list, start=1):
1429 start_time = chapter_time(chapter)
1430 if start_time is None:
1431 continue
1432 end_time = (chapter_time(chapters_list[next_num])
1433 if next_num < len(chapters_list) else duration)
1434 if end_time is None:
1435 continue
1436 title = try_get(
1437 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1438 compat_str)
1439 chapters.append({
1440 'start_time': start_time,
1441 'end_time': end_time,
1442 'title': title,
1443 })
1444 return chapters
1445
545cc85d 1446 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1447 return self._parse_json(self._search_regex(
1448 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1449 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1450
c5e8d7af 1451 def _real_extract(self, url):
cf7e015f 1452 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1453 video_id = self._match_id(url)
1454 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1455 webpage_url = base_url + 'watch?v=' + video_id
1456 webpage = self._download_webpage(
1457 webpage_url + '&has_verified=1&bpctr=9999999999',
1458 video_id, fatal=False)
545cc85d 1459
1460 player_response = None
1461 if webpage:
1462 player_response = self._extract_yt_initial_variable(
1463 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1464 video_id, 'initial player response')
1465 if not player_response:
1466 player_response = self._call_api(
1467 'player', {'videoId': video_id}, video_id)
1468
1469 playability_status = player_response.get('playabilityStatus') or {}
1470 if playability_status.get('reason') == 'Sign in to confirm your age':
1471 pr = self._parse_json(try_get(compat_parse_qs(
1472 self._download_webpage(
1473 base_url + 'get_video_info', video_id,
1474 'Refetching age-gated info webpage',
1475 'unable to download video info webpage', query={
1476 'video_id': video_id,
7c60c33e 1477 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1478 }, fatal=False)),
1479 lambda x: x['player_response'][0],
1480 compat_str) or '{}', video_id)
1481 if pr:
1482 player_response = pr
1483
1484 trailer_video_id = try_get(
1485 playability_status,
1486 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1487 compat_str)
1488 if trailer_video_id:
1489 return self.url_result(
1490 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1491
545cc85d 1492 def get_text(x):
1493 if not x:
c2d125d9 1494 return
545cc85d 1495 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1496
545cc85d 1497 search_meta = (
1498 lambda x: self._html_search_meta(x, webpage, default=None)) \
1499 if webpage else lambda x: None
dbdaaa23 1500
545cc85d 1501 video_details = player_response.get('videoDetails') or {}
37357d21 1502 microformat = try_get(
545cc85d 1503 player_response,
1504 lambda x: x['microformat']['playerMicroformatRenderer'],
1505 dict) or {}
1506 video_title = video_details.get('title') \
1507 or get_text(microformat.get('title')) \
1508 or search_meta(['og:title', 'twitter:title', 'title'])
1509 video_description = video_details.get('shortDescription')
cf7e015f 1510
8fe10494 1511 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1512 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1513 multifeed_metadata_list = try_get(
1514 player_response,
1515 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1516 compat_str)
8fe10494
S
1517 if multifeed_metadata_list:
1518 entries = []
1519 feed_ids = []
1520 for feed in multifeed_metadata_list.split(','):
1521 # Unquote should take place before split on comma (,) since textual
1522 # fields may contain comma as well (see
067aa17e 1523 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1524 feed_data = compat_parse_qs(
1525 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1526
1527 def feed_entry(name):
545cc85d 1528 return try_get(
1529 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1530
1531 feed_id = feed_entry('id')
1532 if not feed_id:
1533 continue
1534 feed_title = feed_entry('title')
1535 title = video_title
1536 if feed_title:
1537 title += ' (%s)' % feed_title
8fe10494
S
1538 entries.append({
1539 '_type': 'url_transparent',
1540 'ie_key': 'Youtube',
1541 'url': smuggle_url(
545cc85d 1542 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1543 {'force_singlefeed': True}),
6b09401b 1544 'title': title,
8fe10494 1545 })
6b09401b 1546 feed_ids.append(feed_id)
8fe10494
S
1547 self.to_screen(
1548 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1549 % (', '.join(feed_ids), video_id))
545cc85d 1550 return self.playlist_result(
1551 entries, video_id, video_title, video_description)
8fe10494
S
1552 else:
1553 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1554
545cc85d 1555 formats = []
1556 itags = []
cc2db878 1557 itag_qualities = {}
545cc85d 1558 player_url = None
dca3ff4a 1559 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1560 streaming_data = player_response.get('streamingData') or {}
1561 streaming_formats = streaming_data.get('formats') or []
1562 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1563 for fmt in streaming_formats:
1564 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1565 continue
321bf820 1566
cc2db878 1567 itag = str_or_none(fmt.get('itag'))
1568 quality = fmt.get('quality')
1569 if itag and quality:
1570 itag_qualities[itag] = quality
1571 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1572 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1573 # number of fragment that would subsequently requested with (`&sq=N`)
1574 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1575 continue
1576
545cc85d 1577 fmt_url = fmt.get('url')
1578 if not fmt_url:
1579 sc = compat_parse_qs(fmt.get('signatureCipher'))
1580 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1581 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1582 if not (sc and fmt_url and encrypted_sig):
1583 continue
1584 if not player_url:
1585 if not webpage:
1586 continue
1587 player_url = self._search_regex(
1588 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1589 webpage, 'player URL', fatal=False)
1590 if not player_url:
201e9eaa 1591 continue
545cc85d 1592 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1593 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1594 fmt_url += '&' + sp + '=' + signature
1595
545cc85d 1596 if itag:
1597 itags.append(itag)
cc2db878 1598 tbr = float_or_none(
1599 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1600 dct = {
1601 'asr': int_or_none(fmt.get('audioSampleRate')),
1602 'filesize': int_or_none(fmt.get('contentLength')),
1603 'format_id': itag,
1604 'format_note': fmt.get('qualityLabel') or quality,
1605 'fps': int_or_none(fmt.get('fps')),
1606 'height': int_or_none(fmt.get('height')),
dca3ff4a 1607 'quality': q(quality),
cc2db878 1608 'tbr': tbr,
545cc85d 1609 'url': fmt_url,
1610 'width': fmt.get('width'),
1611 }
1612 mimetype = fmt.get('mimeType')
1613 if mimetype:
1614 mobj = re.match(
1615 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1616 if mobj:
1617 dct['ext'] = mimetype2ext(mobj.group(1))
1618 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1619 no_audio = dct.get('acodec') == 'none'
1620 no_video = dct.get('vcodec') == 'none'
1621 if no_audio:
1622 dct['vbr'] = tbr
1623 if no_video:
1624 dct['abr'] = tbr
1625 if no_audio or no_video:
545cc85d 1626 dct['downloader_options'] = {
1627 # Youtube throttles chunks >~10M
1628 'http_chunk_size': 10485760,
bf1317d2 1629 }
7c60c33e 1630 if dct.get('ext'):
1631 dct['container'] = dct['ext'] + '_dash'
545cc85d 1632 formats.append(dct)
1633
1634 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1635 if hls_manifest_url:
1636 for f in self._extract_m3u8_formats(
1637 hls_manifest_url, video_id, 'mp4', fatal=False):
1638 itag = self._search_regex(
1639 r'/itag/(\d+)', f['url'], 'itag', default=None)
1640 if itag:
1641 f['format_id'] = itag
1642 formats.append(f)
1643
1644 if self._downloader.params.get('youtube_include_dash_manifest'):
1645 dash_manifest_url = streaming_data.get('dashManifestUrl')
1646 if dash_manifest_url:
545cc85d 1647 for f in self._extract_mpd_formats(
1648 dash_manifest_url, video_id, fatal=False):
cc2db878 1649 itag = f['format_id']
1650 if itag in itags:
1651 continue
dca3ff4a 1652 if itag in itag_qualities:
1653 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1654 # but kept to maintain feature parity (and code similarity) with youtube-dl
1655 # Remove if this causes any issues with sorting in future
1656 f['quality'] = q(itag_qualities[itag])
545cc85d 1657 filesize = int_or_none(self._search_regex(
1658 r'/clen/(\d+)', f.get('fragment_base_url')
1659 or f['url'], 'file size', default=None))
1660 if filesize:
1661 f['filesize'] = filesize
cc2db878 1662 formats.append(f)
bf1317d2 1663
545cc85d 1664 if not formats:
63ad4d43 1665 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1666 raise ExtractorError(
1667 'This video is DRM protected.', expected=True)
1668 pemr = try_get(
1669 playability_status,
1670 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1671 dict) or {}
1672 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1673 subreason = pemr.get('subreason')
1674 if subreason:
1675 subreason = clean_html(get_text(subreason))
1676 if subreason == 'The uploader has not made this video available in your country.':
1677 countries = microformat.get('availableCountries')
1678 if not countries:
1679 regions_allowed = search_meta('regionsAllowed')
1680 countries = regions_allowed.split(',') if regions_allowed else None
1681 self.raise_geo_restricted(
1682 subreason, countries)
1683 reason += '\n' + subreason
1684 if reason:
1685 raise ExtractorError(reason, expected=True)
bf1317d2 1686
545cc85d 1687 self._sort_formats(formats)
bf1317d2 1688
545cc85d 1689 keywords = video_details.get('keywords') or []
1690 if not keywords and webpage:
1691 keywords = [
1692 unescapeHTML(m.group('content'))
1693 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1694 for keyword in keywords:
1695 if keyword.startswith('yt:stretch='):
1696 w, h = keyword.split('=')[1].split(':')
1697 w, h = int(w), int(h)
1698 if w > 0 and h > 0:
1699 ratio = w / h
1700 for f in formats:
1701 if f.get('vcodec') != 'none':
1702 f['stretched_ratio'] = ratio
6449cd80 1703
545cc85d 1704 thumbnails = []
1705 for container in (video_details, microformat):
1706 for thumbnail in (try_get(
1707 container,
1708 lambda x: x['thumbnail']['thumbnails'], list) or []):
1709 thumbnail_url = thumbnail.get('url')
1710 if not thumbnail_url:
bf1317d2 1711 continue
545cc85d 1712 thumbnails.append({
1713 'height': int_or_none(thumbnail.get('height')),
1714 'url': thumbnail_url,
1715 'width': int_or_none(thumbnail.get('width')),
1716 })
1717 if thumbnails:
1718 break
a6211d23 1719 else:
545cc85d 1720 thumbnail = search_meta(['og:image', 'twitter:image'])
1721 if thumbnail:
1722 thumbnails = [{'url': thumbnail}]
1723
1724 category = microformat.get('category') or search_meta('genre')
1725 channel_id = video_details.get('channelId') \
1726 or microformat.get('externalChannelId') \
1727 or search_meta('channelId')
1728 duration = int_or_none(
1729 video_details.get('lengthSeconds')
1730 or microformat.get('lengthSeconds')) \
1731 or parse_duration(search_meta('duration'))
1732 is_live = video_details.get('isLive')
1733 owner_profile_url = microformat.get('ownerProfileUrl')
1734
1735 info = {
1736 'id': video_id,
1737 'title': self._live_title(video_title) if is_live else video_title,
1738 'formats': formats,
1739 'thumbnails': thumbnails,
1740 'description': video_description,
1741 'upload_date': unified_strdate(
1742 microformat.get('uploadDate')
1743 or search_meta('uploadDate')),
1744 'uploader': video_details['author'],
1745 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1746 'uploader_url': owner_profile_url,
1747 'channel_id': channel_id,
1748 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1749 'duration': duration,
1750 'view_count': int_or_none(
1751 video_details.get('viewCount')
1752 or microformat.get('viewCount')
1753 or search_meta('interactionCount')),
1754 'average_rating': float_or_none(video_details.get('averageRating')),
1755 'age_limit': 18 if (
1756 microformat.get('isFamilySafe') is False
1757 or search_meta('isFamilyFriendly') == 'false'
1758 or search_meta('og:restrictions:age') == '18+') else 0,
1759 'webpage_url': webpage_url,
1760 'categories': [category] if category else None,
1761 'tags': keywords,
1762 'is_live': is_live,
1763 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 1764 'was_live': video_details.get('isLiveContent')
545cc85d 1765 }
b477fc13 1766
545cc85d 1767 pctr = try_get(
1768 player_response,
1769 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1770 subtitles = {}
1771 if pctr:
1772 def process_language(container, base_url, lang_code, query):
1773 lang_subs = []
1774 for fmt in self._SUBTITLE_FORMATS:
1775 query.update({
1776 'fmt': fmt,
1777 })
1778 lang_subs.append({
1779 'ext': fmt,
1780 'url': update_url_query(base_url, query),
1781 })
1782 container[lang_code] = lang_subs
7e72694b 1783
545cc85d 1784 for caption_track in (pctr.get('captionTracks') or []):
1785 base_url = caption_track.get('baseUrl')
1786 if not base_url:
1787 continue
1788 if caption_track.get('kind') != 'asr':
1789 lang_code = caption_track.get('languageCode')
1790 if not lang_code:
1791 continue
1792 process_language(
1793 subtitles, base_url, lang_code, {})
1794 continue
1795 automatic_captions = {}
1796 for translation_language in (pctr.get('translationLanguages') or []):
1797 translation_language_code = translation_language.get('languageCode')
1798 if not translation_language_code:
1799 continue
1800 process_language(
1801 automatic_captions, base_url, translation_language_code,
1802 {'tlang': translation_language_code})
1803 info['automatic_captions'] = automatic_captions
1804 info['subtitles'] = subtitles
7e72694b 1805
545cc85d 1806 parsed_url = compat_urllib_parse_urlparse(url)
1807 for component in [parsed_url.fragment, parsed_url.query]:
1808 query = compat_parse_qs(component)
1809 for k, v in query.items():
1810 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1811 d_k += '_time'
1812 if d_k not in info and k in s_ks:
1813 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1814
1815 # Youtube Music Auto-generated description
822b9d9c 1816 if video_description:
38d70284 1817 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1818 if mobj:
822b9d9c
RA
1819 release_year = mobj.group('release_year')
1820 release_date = mobj.group('release_date')
1821 if release_date:
1822 release_date = release_date.replace('-', '')
1823 if not release_year:
545cc85d 1824 release_year = release_date[:4]
1825 info.update({
1826 'album': mobj.group('album'.strip()),
1827 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1828 'track': mobj.group('track').strip(),
1829 'release_date': release_date,
cc2db878 1830 'release_year': int_or_none(release_year),
545cc85d 1831 })
7e72694b 1832
545cc85d 1833 initial_data = None
1834 if webpage:
1835 initial_data = self._extract_yt_initial_variable(
1836 webpage, self._YT_INITIAL_DATA_RE, video_id,
1837 'yt initial data')
1838 if not initial_data:
1839 initial_data = self._call_api(
1840 'next', {'videoId': video_id}, video_id, fatal=False)
1841
1842 if not is_live:
1843 try:
1844 # This will error if there is no livechat
1845 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1846 info['subtitles']['live_chat'] = [{
1847 'video_id': video_id,
1848 'ext': 'json',
1849 'protocol': 'youtube_live_chat_replay',
1850 }]
1851 except (KeyError, IndexError, TypeError):
1852 pass
1853
1854 if initial_data:
1855 chapters = self._extract_chapters_from_json(
1856 initial_data, video_id, duration)
1857 if not chapters:
1858 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1859 contents = try_get(
1860 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1861 list)
1862 if not contents:
1863 continue
1864
1865 def chapter_time(mmlir):
1866 return parse_duration(
1867 get_text(mmlir.get('timeDescription')))
1868
1869 chapters = []
1870 for next_num, content in enumerate(contents, start=1):
1871 mmlir = content.get('macroMarkersListItemRenderer') or {}
1872 start_time = chapter_time(mmlir)
1873 end_time = chapter_time(try_get(
1874 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1875 if next_num < len(contents) else duration
1876 if start_time is None or end_time is None:
1877 continue
1878 chapters.append({
1879 'start_time': start_time,
1880 'end_time': end_time,
1881 'title': get_text(mmlir.get('title')),
1882 })
1883 if chapters:
1884 break
1885 if chapters:
1886 info['chapters'] = chapters
1887
1888 contents = try_get(
1889 initial_data,
1890 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1891 list) or []
1892 for content in contents:
1893 vpir = content.get('videoPrimaryInfoRenderer')
1894 if vpir:
1895 stl = vpir.get('superTitleLink')
1896 if stl:
1897 stl = get_text(stl)
1898 if try_get(
1899 vpir,
1900 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1901 info['location'] = stl
1902 else:
1903 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1904 if mobj:
1905 info.update({
1906 'series': mobj.group(1),
1907 'season_number': int(mobj.group(2)),
1908 'episode_number': int(mobj.group(3)),
1909 })
1910 for tlb in (try_get(
1911 vpir,
1912 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1913 list) or []):
1914 tbr = tlb.get('toggleButtonRenderer') or {}
1915 for getter, regex in [(
1916 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1917 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1918 lambda x: x['accessibility'],
1919 lambda x: x['accessibilityData']['accessibilityData'],
1920 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1921 label = (try_get(tbr, getter, dict) or {}).get('label')
1922 if label:
1923 mobj = re.match(regex, label)
1924 if mobj:
1925 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1926 break
1927 sbr_tooltip = try_get(
1928 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1929 if sbr_tooltip:
1930 like_count, dislike_count = sbr_tooltip.split(' / ')
1931 info.update({
1932 'like_count': str_to_int(like_count),
1933 'dislike_count': str_to_int(dislike_count),
1934 })
1935 vsir = content.get('videoSecondaryInfoRenderer')
1936 if vsir:
1937 info['channel'] = get_text(try_get(
1938 vsir,
1939 lambda x: x['owner']['videoOwnerRenderer']['title'],
1940 compat_str))
1941 rows = try_get(
1942 vsir,
1943 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1944 list) or []
1945 multiple_songs = False
1946 for row in rows:
1947 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1948 multiple_songs = True
1949 break
1950 for row in rows:
1951 mrr = row.get('metadataRowRenderer') or {}
1952 mrr_title = mrr.get('title')
1953 if not mrr_title:
1954 continue
1955 mrr_title = get_text(mrr['title'])
1956 mrr_contents_text = get_text(mrr['contents'][0])
1957 if mrr_title == 'License':
1958 info['license'] = mrr_contents_text
1959 elif not multiple_songs:
1960 if mrr_title == 'Album':
1961 info['album'] = mrr_contents_text
1962 elif mrr_title == 'Artist':
1963 info['artist'] = mrr_contents_text
1964 elif mrr_title == 'Song':
1965 info['track'] = mrr_contents_text
1966
1967 fallbacks = {
1968 'channel': 'uploader',
1969 'channel_id': 'uploader_id',
1970 'channel_url': 'uploader_url',
1971 }
1972 for to, frm in fallbacks.items():
1973 if not info.get(to):
1974 info[to] = info.get(frm)
1975
1976 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1977 v = info.get(s_k)
1978 if v:
1979 info[d_k] = v
b84071c0 1980
06167fbb 1981 # get xsrf for annotations or comments
1982 get_annotations = self._downloader.params.get('writeannotations', False)
1983 get_comments = self._downloader.params.get('getcomments', False)
1984 if get_annotations or get_comments:
29f7c58a 1985 xsrf_token = None
545cc85d 1986 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 1987 if ytcfg:
1988 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
1989 if not xsrf_token:
1990 xsrf_token = self._search_regex(
1991 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 1992 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 1993
1994 # annotations
06167fbb 1995 if get_annotations:
64b6a4e9
RA
1996 invideo_url = try_get(
1997 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
1998 if xsrf_token and invideo_url:
29f7c58a 1999 xsrf_field_name = None
2000 if ytcfg:
2001 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2002 if not xsrf_field_name:
2003 xsrf_field_name = self._search_regex(
2004 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2005 webpage, 'xsrf field name',
29f7c58a 2006 group='xsrf_field_name', default='session_token')
8a784c74 2007 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2008 self._proto_relative_url(invideo_url),
2009 video_id, note='Downloading annotations',
2010 errnote='Unable to download video annotations', fatal=False,
2011 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2012
06167fbb 2013 # Get comments
2014 # TODO: Refactor and move to seperate function
277d6ff5 2015 def extract_comments():
06167fbb 2016 expected_video_comment_count = 0
2017 video_comments = []
277d6ff5 2018 comment_xsrf = xsrf_token
06167fbb 2019
2020 def find_value(html, key, num_chars=2, separator='"'):
2021 pos_begin = html.find(key) + len(key) + num_chars
2022 pos_end = html.find(separator, pos_begin)
2023 return html[pos_begin: pos_end]
2024
2025 def search_dict(partial, key):
2026 if isinstance(partial, dict):
2027 for k, v in partial.items():
2028 if k == key:
2029 yield v
2030 else:
2031 for o in search_dict(v, key):
2032 yield o
2033 elif isinstance(partial, list):
2034 for i in partial:
2035 for o in search_dict(i, key):
2036 yield o
2037
8a784c74 2038 continuations = []
2039 if initial_data:
2040 try:
2041 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2042 continuations = [ncd['continuation']]
2043 # Handle videos where comments have been disabled entirely
2044 except StopIteration:
2045 pass
06167fbb 2046
8d0ea5f9 2047 def get_continuation(continuation, session_token, replies=False):
06167fbb 2048 query = {
66c935fb 2049 'pbj': 1,
2050 'ctoken': continuation,
06167fbb 2051 }
2052 if replies:
2053 query['action_get_comment_replies'] = 1
2054 else:
2055 query['action_get_comments'] = 1
2056
2057 while True:
2058 content, handle = self._download_webpage_handle(
2059 'https://www.youtube.com/comment_service_ajax',
2060 video_id,
2061 note=False,
2062 expected_status=[413],
2063 data=urlencode_postdata({
2064 'session_token': session_token
2065 }),
2066 query=query,
2067 headers={
2068 'Accept': '*/*',
2069 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2070 'X-YouTube-Client-Name': '1',
2071 'X-YouTube-Client-Version': '2.20201202.06.01'
2072 }
2073 )
2074
2075 response_code = handle.getcode()
2076 if (response_code == 200):
2077 return self._parse_json(content, video_id)
8d0ea5f9 2078 if (response_code == 413):
06167fbb 2079 return None
2080 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2081
2082 first_continuation = True
885d36d4 2083 chain_msg = ''
2084 self.to_screen('Downloading comments')
06167fbb 2085 while continuations:
885d36d4 2086 continuation = continuations.pop()
277d6ff5 2087 comment_response = get_continuation(continuation, comment_xsrf)
06167fbb 2088 if not comment_response:
2089 continue
2090 if list(search_dict(comment_response, 'externalErrorMessage')):
2091 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2092
8d0ea5f9
B
2093 if 'continuationContents' not in comment_response['response']:
2094 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2095 continue
2096 # not sure if this actually helps
2097 if 'xsrf_token' in comment_response:
277d6ff5 2098 comment_xsrf = comment_response['xsrf_token']
8d0ea5f9 2099
06167fbb 2100 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2101 if first_continuation:
2102 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2103 first_continuation = False
2104 if 'contents' not in item_section:
2105 # continuation returned no comments?
2106 # set an empty array as to not break the for loop
2107 item_section['contents'] = []
2108
2109 for meta_comment in item_section['contents']:
2110 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2111 video_comments.append({
2112 'id': comment['commentId'],
ba7bf12d 2113 'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
8d0ea5f9 2114 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2115 'author': comment.get('authorText', {}).get('simpleText', ''),
2116 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2117 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2118 'parent': 'root'
2119 })
2120 if 'replies' not in meta_comment['commentThreadRenderer']:
2121 continue
2122
8d0ea5f9
B
2123 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2124 while reply_continuations:
06167fbb 2125 time.sleep(1)
8d0ea5f9 2126 continuation = reply_continuations.pop()
277d6ff5 2127 replies_data = get_continuation(continuation, comment_xsrf, True)
06167fbb 2128 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2129 continue
06167fbb 2130
2131 if self._downloader.params.get('verbose', False):
885d36d4 2132 chain_msg = ' (chain %s)' % comment['commentId']
2133 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2134 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2135 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2136 reply_comment = reply_meta['commentRenderer']
2137 video_comments.append({
2138 'id': reply_comment['commentId'],
2139 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2140 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2141 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2142 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2143 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2144 'parent': comment['commentId']
2145 })
2146 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2147 continue
8d0ea5f9 2148 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2149
885d36d4 2150 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2151 if 'continuations' in item_section:
8d0ea5f9 2152 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2153 time.sleep(1)
2154
885d36d4 2155 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
277d6ff5 2156 return {
545cc85d 2157 'comments': video_comments,
2158 'comment_count': expected_video_comment_count
277d6ff5 2159 }
2160
2161 if get_comments:
2162 info['__post_extractor'] = extract_comments
4ea3be0a 2163
545cc85d 2164 self.mark_watched(video_id, player_response)
d77ab8e2 2165
545cc85d 2166 return info
c5e8d7af 2167
5f6a1245 2168
8bdd16b4 2169class YoutubeTabIE(YoutubeBaseInfoExtractor):
2170 IE_DESC = 'YouTube.com tab'
70d5c17b 2171 _VALID_URL = r'''(?x)
2172 https?://
2173 (?:\w+\.)?
2174 (?:
2175 youtube(?:kids)?\.com|
2176 invidio\.us
2177 )/
2178 (?:
2179 (?:channel|c|user)/|
2180 (?P<not_channel>
9ba5705a 2181 feed/|hashtag/|
70d5c17b 2182 (?:playlist|watch)\?.*?\blist=
2183 )|
29f7c58a 2184 (?!(?:%s)\b) # Direct URLs
70d5c17b 2185 )
2186 (?P<id>[^/?\#&]+)
2187 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2188 IE_NAME = 'youtube:tab'
2189
81127aa5 2190 _TESTS = [{
8bdd16b4 2191 # playlists, multipage
2192 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2193 'playlist_mincount': 94,
2194 'info_dict': {
2195 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2196 'title': 'Игорь Клейнер - Playlists',
2197 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2198 'uploader': 'Игорь Клейнер',
2199 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2200 },
2201 }, {
2202 # playlists, multipage, different order
2203 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2204 'playlist_mincount': 94,
2205 'info_dict': {
2206 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2207 'title': 'Игорь Клейнер - Playlists',
2208 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2209 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2210 'uploader': 'Игорь Клейнер',
8bdd16b4 2211 },
2212 }, {
2213 # playlists, singlepage
2214 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2215 'playlist_mincount': 4,
2216 'info_dict': {
2217 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2218 'title': 'ThirstForScience - Playlists',
2219 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2220 'uploader': 'ThirstForScience',
2221 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2222 }
2223 }, {
2224 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2225 'only_matching': True,
2226 }, {
2227 # basic, single video playlist
0e30a7b9 2228 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2229 'info_dict': {
0e30a7b9 2230 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2231 'uploader': 'Sergey M.',
2232 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2233 'title': 'youtube-dl public playlist',
81127aa5 2234 },
0e30a7b9 2235 'playlist_count': 1,
9291475f 2236 }, {
8bdd16b4 2237 # empty playlist
0e30a7b9 2238 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2239 'info_dict': {
0e30a7b9 2240 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2241 'uploader': 'Sergey M.',
2242 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2243 'title': 'youtube-dl empty playlist',
9291475f
PH
2244 },
2245 'playlist_count': 0,
2246 }, {
8bdd16b4 2247 # Home tab
2248 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2249 'info_dict': {
8bdd16b4 2250 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2251 'title': 'lex will - Home',
2252 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2253 'uploader': 'lex will',
2254 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2255 },
8bdd16b4 2256 'playlist_mincount': 2,
9291475f 2257 }, {
8bdd16b4 2258 # Videos tab
2259 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2260 'info_dict': {
8bdd16b4 2261 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2262 'title': 'lex will - Videos',
2263 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2264 'uploader': 'lex will',
2265 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2266 },
8bdd16b4 2267 'playlist_mincount': 975,
9291475f 2268 }, {
8bdd16b4 2269 # Videos tab, sorted by popular
2270 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2271 'info_dict': {
8bdd16b4 2272 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2273 'title': 'lex will - Videos',
2274 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2275 'uploader': 'lex will',
2276 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2277 },
8bdd16b4 2278 'playlist_mincount': 199,
9291475f 2279 }, {
8bdd16b4 2280 # Playlists tab
2281 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2282 'info_dict': {
8bdd16b4 2283 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2284 'title': 'lex will - Playlists',
2285 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2286 'uploader': 'lex will',
2287 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2288 },
8bdd16b4 2289 'playlist_mincount': 17,
ac7553d0 2290 }, {
8bdd16b4 2291 # Community tab
2292 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2293 'info_dict': {
8bdd16b4 2294 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2295 'title': 'lex will - Community',
2296 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2297 'uploader': 'lex will',
2298 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2299 },
2300 'playlist_mincount': 18,
87dadd45 2301 }, {
8bdd16b4 2302 # Channels tab
2303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2304 'info_dict': {
8bdd16b4 2305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2306 'title': 'lex will - Channels',
2307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2308 'uploader': 'lex will',
2309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2310 },
deaec5af 2311 'playlist_mincount': 12,
6b08cdf6 2312 }, {
a0566bbf 2313 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2314 'only_matching': True,
2315 }, {
a0566bbf 2316 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2317 'only_matching': True,
2318 }, {
a0566bbf 2319 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2320 'only_matching': True,
2321 }, {
2322 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2323 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2324 'info_dict': {
2325 'title': '29C3: Not my department',
2326 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2327 'uploader': 'Christiaan008',
2328 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2329 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2330 },
2331 'playlist_count': 96,
2332 }, {
2333 'note': 'Large playlist',
2334 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2335 'info_dict': {
8bdd16b4 2336 'title': 'Uploads from Cauchemar',
2337 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2338 'uploader': 'Cauchemar',
2339 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2340 },
8bdd16b4 2341 'playlist_mincount': 1123,
2342 }, {
2343 # even larger playlist, 8832 videos
2344 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2345 'only_matching': True,
4b7df0d3
JMF
2346 }, {
2347 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2348 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2349 'info_dict': {
acf757f4
PH
2350 'title': 'Uploads from Interstellar Movie',
2351 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2352 'uploader': 'Interstellar Movie',
8bdd16b4 2353 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2354 },
481cc733 2355 'playlist_mincount': 21,
8bdd16b4 2356 }, {
2357 # https://github.com/ytdl-org/youtube-dl/issues/21844
2358 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2359 'info_dict': {
2360 'title': 'Data Analysis with Dr Mike Pound',
2361 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2362 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2363 'uploader': 'Computerphile',
deaec5af 2364 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2365 },
2366 'playlist_mincount': 11,
2367 }, {
a0566bbf 2368 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2369 'only_matching': True,
dacb3a86
S
2370 }, {
2371 # Playlist URL that does not actually serve a playlist
2372 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2373 'info_dict': {
2374 'id': 'FqZTN594JQw',
2375 'ext': 'webm',
2376 'title': "Smiley's People 01 detective, Adventure Series, Action",
2377 'uploader': 'STREEM',
2378 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2379 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2380 'upload_date': '20150526',
2381 'license': 'Standard YouTube License',
2382 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2383 'categories': ['People & Blogs'],
2384 'tags': list,
dbdaaa23 2385 'view_count': int,
dacb3a86
S
2386 'like_count': int,
2387 'dislike_count': int,
2388 },
2389 'params': {
2390 'skip_download': True,
2391 },
13a75688 2392 'skip': 'This video is not available.',
dacb3a86 2393 'add_ie': [YoutubeIE.ie_key()],
481cc733 2394 }, {
8bdd16b4 2395 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2396 'only_matching': True,
66b48727 2397 }, {
8bdd16b4 2398 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2399 'only_matching': True,
a0566bbf 2400 }, {
2401 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2402 'info_dict': {
2403 'id': '9Auq9mYxFEE',
2404 'ext': 'mp4',
deaec5af 2405 'title': compat_str,
a0566bbf 2406 'uploader': 'Sky News',
2407 'uploader_id': 'skynews',
2408 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2409 'upload_date': '20191102',
deaec5af 2410 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2411 'categories': ['News & Politics'],
2412 'tags': list,
2413 'like_count': int,
2414 'dislike_count': int,
2415 },
2416 'params': {
2417 'skip_download': True,
2418 },
2419 }, {
2420 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2421 'info_dict': {
2422 'id': 'a48o2S1cPoo',
2423 'ext': 'mp4',
2424 'title': 'The Young Turks - Live Main Show',
2425 'uploader': 'The Young Turks',
2426 'uploader_id': 'TheYoungTurks',
2427 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2428 'upload_date': '20150715',
2429 'license': 'Standard YouTube License',
2430 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2431 'categories': ['News & Politics'],
2432 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2433 'like_count': int,
2434 'dislike_count': int,
2435 },
2436 'params': {
2437 'skip_download': True,
2438 },
2439 'only_matching': True,
2440 }, {
2441 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2442 'only_matching': True,
2443 }, {
2444 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2445 'only_matching': True,
3d3dddc9 2446 }, {
2447 'url': 'https://www.youtube.com/feed/trending',
2448 'only_matching': True,
2449 }, {
2450 # needs auth
2451 'url': 'https://www.youtube.com/feed/library',
2452 'only_matching': True,
2453 }, {
2454 # needs auth
2455 'url': 'https://www.youtube.com/feed/history',
2456 'only_matching': True,
2457 }, {
2458 # needs auth
2459 'url': 'https://www.youtube.com/feed/subscriptions',
2460 'only_matching': True,
2461 }, {
2462 # needs auth
2463 'url': 'https://www.youtube.com/feed/watch_later',
2464 'only_matching': True,
2465 }, {
2466 # no longer available?
2467 'url': 'https://www.youtube.com/feed/recommended',
2468 'only_matching': True,
29f7c58a 2469 }, {
2470 # inline playlist with not always working continuations
2471 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2472 'only_matching': True,
2473 }, {
2474 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2475 'only_matching': True,
2476 }, {
2477 'url': 'https://www.youtube.com/course',
2478 'only_matching': True,
2479 }, {
2480 'url': 'https://www.youtube.com/zsecurity',
2481 'only_matching': True,
2482 }, {
2483 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2484 'only_matching': True,
2485 }, {
2486 'url': 'https://www.youtube.com/TheYoungTurks/live',
2487 'only_matching': True,
2488 }]
2489
2490 @classmethod
2491 def suitable(cls, url):
2492 return False if YoutubeIE.suitable(url) else super(
2493 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2494
2495 def _extract_channel_id(self, webpage):
2496 channel_id = self._html_search_meta(
2497 'channelId', webpage, 'channel id', default=None)
2498 if channel_id:
2499 return channel_id
2500 channel_url = self._html_search_meta(
2501 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2502 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2503 'twitter:app:url:googleplay'), webpage, 'channel url')
2504 return self._search_regex(
2505 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2506 channel_url, 'channel id')
15f6397c 2507
8bdd16b4 2508 @staticmethod
2509 def _extract_grid_item_renderer(item):
2510 for item_kind in ('Playlist', 'Video', 'Channel'):
2511 renderer = item.get('grid%sRenderer' % item_kind)
2512 if renderer:
2513 return renderer
2514
8bdd16b4 2515 def _grid_entries(self, grid_renderer):
2516 for item in grid_renderer['items']:
2517 if not isinstance(item, dict):
39b62db1 2518 continue
8bdd16b4 2519 renderer = self._extract_grid_item_renderer(item)
2520 if not isinstance(renderer, dict):
2521 continue
2522 title = try_get(
2523 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2524 # playlist
2525 playlist_id = renderer.get('playlistId')
2526 if playlist_id:
2527 yield self.url_result(
2528 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2529 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2530 video_title=title)
2531 # video
2532 video_id = renderer.get('videoId')
2533 if video_id:
2534 yield self._extract_video(renderer)
2535 # channel
2536 channel_id = renderer.get('channelId')
2537 if channel_id:
2538 title = try_get(
2539 renderer, lambda x: x['title']['simpleText'], compat_str)
2540 yield self.url_result(
2541 'https://www.youtube.com/channel/%s' % channel_id,
2542 ie=YoutubeTabIE.ie_key(), video_title=title)
2543
3d3dddc9 2544 def _shelf_entries_from_content(self, shelf_renderer):
2545 content = shelf_renderer.get('content')
2546 if not isinstance(content, dict):
8bdd16b4 2547 return
3d3dddc9 2548 renderer = content.get('gridRenderer')
2549 if renderer:
2550 # TODO: add support for nested playlists so each shelf is processed
2551 # as separate playlist
2552 # TODO: this includes only first N items
2553 for entry in self._grid_entries(renderer):
2554 yield entry
2555 renderer = content.get('horizontalListRenderer')
2556 if renderer:
2557 # TODO
2558 pass
8bdd16b4 2559
29f7c58a 2560 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2561 ep = try_get(
2562 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2563 compat_str)
2564 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2565 if shelf_url:
29f7c58a 2566 # Skipping links to another channels, note that checking for
2567 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2568 # will not work
2569 if skip_channels and '/channels?' in shelf_url:
2570 return
3d3dddc9 2571 title = try_get(
2572 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2573 yield self.url_result(shelf_url, video_title=title)
2574 # Shelf may not contain shelf URL, fallback to extraction from content
2575 for entry in self._shelf_entries_from_content(shelf_renderer):
2576 yield entry
c5e8d7af 2577
8bdd16b4 2578 def _playlist_entries(self, video_list_renderer):
2579 for content in video_list_renderer['contents']:
2580 if not isinstance(content, dict):
2581 continue
2582 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2583 if not isinstance(renderer, dict):
2584 continue
2585 video_id = renderer.get('videoId')
2586 if not video_id:
2587 continue
2588 yield self._extract_video(renderer)
07aeced6 2589
3d3dddc9 2590 r""" # Not needed in the new implementation
3462ffa8 2591 def _itemSection_entries(self, item_sect_renderer):
2592 for content in item_sect_renderer['contents']:
2593 if not isinstance(content, dict):
2594 continue
2595 renderer = content.get('videoRenderer', {})
2596 if not isinstance(renderer, dict):
2597 continue
2598 video_id = renderer.get('videoId')
2599 if not video_id:
2600 continue
2601 yield self._extract_video(renderer)
3d3dddc9 2602 """
3462ffa8 2603
2604 def _rich_entries(self, rich_grid_renderer):
2605 renderer = try_get(
70d5c17b 2606 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2607 video_id = renderer.get('videoId')
2608 if not video_id:
2609 return
2610 yield self._extract_video(renderer)
2611
8bdd16b4 2612 def _video_entry(self, video_renderer):
2613 video_id = video_renderer.get('videoId')
2614 if video_id:
2615 return self._extract_video(video_renderer)
dacb3a86 2616
8bdd16b4 2617 def _post_thread_entries(self, post_thread_renderer):
2618 post_renderer = try_get(
2619 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2620 if not post_renderer:
2621 return
2622 # video attachment
2623 video_renderer = try_get(
2624 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2625 video_id = None
2626 if video_renderer:
2627 entry = self._video_entry(video_renderer)
2628 if entry:
2629 yield entry
2630 # inline video links
2631 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2632 for run in runs:
2633 if not isinstance(run, dict):
2634 continue
2635 ep_url = try_get(
2636 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2637 if not ep_url:
2638 continue
2639 if not YoutubeIE.suitable(ep_url):
2640 continue
2641 ep_video_id = YoutubeIE._match_id(ep_url)
2642 if video_id == ep_video_id:
2643 continue
2644 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2645
8bdd16b4 2646 def _post_thread_continuation_entries(self, post_thread_continuation):
2647 contents = post_thread_continuation.get('contents')
2648 if not isinstance(contents, list):
2649 return
2650 for content in contents:
2651 renderer = content.get('backstagePostThreadRenderer')
2652 if not isinstance(renderer, dict):
2653 continue
2654 for entry in self._post_thread_entries(renderer):
2655 yield entry
07aeced6 2656
29f7c58a 2657 @staticmethod
2658 def _build_continuation_query(continuation, ctp=None):
2659 query = {
2660 'ctoken': continuation,
2661 'continuation': continuation,
2662 }
2663 if ctp:
2664 query['itct'] = ctp
2665 return query
2666
8bdd16b4 2667 @staticmethod
2668 def _extract_next_continuation_data(renderer):
2669 next_continuation = try_get(
2670 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2671 if not next_continuation:
2672 return
2673 continuation = next_continuation.get('continuation')
2674 if not continuation:
2675 return
2676 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2677 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2678
8bdd16b4 2679 @classmethod
2680 def _extract_continuation(cls, renderer):
2681 next_continuation = cls._extract_next_continuation_data(renderer)
2682 if next_continuation:
2683 return next_continuation
cc2db878 2684 contents = []
2685 for key in ('contents', 'items'):
2686 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2687 for content in contents:
2688 if not isinstance(content, dict):
2689 continue
2690 continuation_ep = try_get(
2691 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2692 dict)
2693 if not continuation_ep:
2694 continue
2695 continuation = try_get(
2696 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2697 if not continuation:
2698 continue
2699 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2700 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2701
8bdd16b4 2702 def _entries(self, tab, identity_token):
3462ffa8 2703
70d5c17b 2704 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2705 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2706 for content in contents:
2707 if not isinstance(content, dict):
8bdd16b4 2708 continue
70d5c17b 2709 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2710 if not is_renderer:
70d5c17b 2711 renderer = content.get('richItemRenderer')
3462ffa8 2712 if renderer:
2713 for entry in self._rich_entries(renderer):
2714 yield entry
2715 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2716 continue
3462ffa8 2717 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2718 for isr_content in isr_contents:
2719 if not isinstance(isr_content, dict):
2720 continue
69184e41 2721
2722 known_renderers = {
2723 'playlistVideoListRenderer': self._playlist_entries,
2724 'gridRenderer': self._grid_entries,
2725 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2726 'backstagePostThreadRenderer': self._post_thread_entries,
2727 'videoRenderer': lambda x: [self._video_entry(x)],
2728 }
2729 for key, renderer in isr_content.items():
2730 if key not in known_renderers:
2731 continue
2732 for entry in known_renderers[key](renderer):
2733 if entry:
2734 yield entry
3462ffa8 2735 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2736 break
70d5c17b 2737
3462ffa8 2738 if not continuation_list[0]:
2739 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2740
2741 if not continuation_list[0]:
2742 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2743
2744 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2745 tab_content = try_get(tab, lambda x: x['content'], dict)
2746 if not tab_content:
2747 return
3462ffa8 2748 parent_renderer = (
29f7c58a 2749 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2750 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2751 for entry in extract_entries(parent_renderer):
2752 yield entry
3462ffa8 2753 continuation = continuation_list[0]
8bdd16b4 2754
2755 headers = {
2756 'x-youtube-client-name': '1',
2757 'x-youtube-client-version': '2.20201112.04.01',
2758 }
2759 if identity_token:
2760 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2761
8bdd16b4 2762 for page_num in itertools.count(1):
2763 if not continuation:
2764 break
62bff2c1 2765 retries = self._downloader.params.get('extractor_retries', 3)
2766 count = -1
2767 last_error = None
2768 while count < retries:
2769 count += 1
2770 if last_error:
2771 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2772 try:
29f7c58a 2773 browse = self._download_json(
2774 'https://www.youtube.com/browse_ajax', None,
2775 'Downloading page %d%s'
2776 % (page_num, ' (retry #%d)' % count if count else ''),
2777 headers=headers, query=continuation)
29f7c58a 2778 except ExtractorError as e:
62bff2c1 2779 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2780 # Downloading page may result in intermittent 5xx HTTP error
2781 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2782 last_error = 'HTTP Error %s' % e.cause.code
2783 if count < retries:
29f7c58a 2784 continue
2785 raise
62bff2c1 2786 else:
2787 response = try_get(browse, lambda x: x[1]['response'], dict)
2788
2789 # Youtube sometimes sends incomplete data
2790 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2791 if response.get('continuationContents') or response.get('onResponseReceivedActions'):
2792 break
2793 last_error = 'Incomplete data recieved'
2794 if not browse or not response:
8bdd16b4 2795 break
ebf1b291 2796
69184e41 2797 known_continuation_renderers = {
2798 'playlistVideoListContinuation': self._playlist_entries,
2799 'gridContinuation': self._grid_entries,
2800 'itemSectionContinuation': self._post_thread_continuation_entries,
2801 'sectionListContinuation': extract_entries, # for feeds
2802 }
8bdd16b4 2803 continuation_contents = try_get(
69184e41 2804 response, lambda x: x['continuationContents'], dict) or {}
2805 continuation_renderer = None
2806 for key, value in continuation_contents.items():
2807 if key not in known_continuation_renderers:
3462ffa8 2808 continue
69184e41 2809 continuation_renderer = value
2810 continuation_list = [None]
2811 for entry in known_continuation_renderers[key](continuation_renderer):
2812 yield entry
2813 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2814 break
2815 if continuation_renderer:
2816 continue
c5e8d7af 2817
a1b535bd 2818 known_renderers = {
2819 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2820 'gridVideoRenderer': (self._grid_entries, 'items'),
2821 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
2822 'itemSectionRenderer': (self._playlist_entries, 'contents'),
9ba5705a 2823 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
a1b535bd 2824 }
8bdd16b4 2825 continuation_items = try_get(
2826 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2827 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2828 video_items_renderer = None
2829 for key, value in continuation_item.items():
2830 if key not in known_renderers:
8bdd16b4 2831 continue
a1b535bd 2832 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2833 continuation_list = [None]
a1b535bd 2834 for entry in known_renderers[key][0](video_items_renderer):
2835 yield entry
9ba5705a 2836 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 2837 break
2838 if video_items_renderer:
2839 continue
8bdd16b4 2840 break
9558dcec 2841
8bdd16b4 2842 @staticmethod
2843 def _extract_selected_tab(tabs):
2844 for tab in tabs:
2845 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2846 return tab['tabRenderer']
2b3c2546 2847 else:
8bdd16b4 2848 raise ExtractorError('Unable to find selected tab')
b82f815f 2849
8bdd16b4 2850 @staticmethod
2851 def _extract_uploader(data):
2852 uploader = {}
2853 sidebar_renderer = try_get(
2854 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2855 if sidebar_renderer:
2856 for item in sidebar_renderer:
2857 if not isinstance(item, dict):
2858 continue
2859 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2860 if not isinstance(renderer, dict):
2861 continue
2862 owner = try_get(
2863 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2864 if owner:
2865 uploader['uploader'] = owner.get('text')
2866 uploader['uploader_id'] = try_get(
2867 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2868 uploader['uploader_url'] = urljoin(
2869 'https://www.youtube.com/',
2870 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2871 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2872
2873 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2874 playlist_id = title = description = channel_url = channel_name = channel_id = None
2875 thumbnails_list = tags = []
2876
8bdd16b4 2877 selected_tab = self._extract_selected_tab(tabs)
2878 renderer = try_get(
2879 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2880 if renderer:
b60419c5 2881 channel_name = renderer.get('title')
2882 channel_url = renderer.get('channelUrl')
2883 channel_id = renderer.get('externalId')
64c0d954 2884
64c0d954 2885 if not renderer:
2886 renderer = try_get(
2887 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2888 if renderer:
2889 title = renderer.get('title')
ecc97af3 2890 description = renderer.get('description', '')
b60419c5 2891 playlist_id = channel_id
2892 tags = renderer.get('keywords', '').split()
2893 thumbnails_list = (
2894 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2895 or try_get(
2896 data,
2897 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2898 list)
b60419c5 2899 or [])
2900
2901 thumbnails = []
2902 for t in thumbnails_list:
2903 if not isinstance(t, dict):
2904 continue
2905 thumbnail_url = url_or_none(t.get('url'))
2906 if not thumbnail_url:
2907 continue
2908 thumbnails.append({
2909 'url': thumbnail_url,
2910 'width': int_or_none(t.get('width')),
2911 'height': int_or_none(t.get('height')),
2912 })
64c0d954 2913
3462ffa8 2914 if playlist_id is None:
70d5c17b 2915 playlist_id = item_id
2916 if title is None:
b60419c5 2917 title = playlist_id
2918 title += format_field(selected_tab, 'title', ' - %s')
2919
2920 metadata = {
2921 'playlist_id': playlist_id,
2922 'playlist_title': title,
2923 'playlist_description': description,
2924 'uploader': channel_name,
2925 'uploader_id': channel_id,
2926 'uploader_url': channel_url,
2927 'thumbnails': thumbnails,
2928 'tags': tags,
2929 }
2930 if not channel_id:
2931 metadata.update(self._extract_uploader(data))
2932 metadata.update({
2933 'channel': metadata['uploader'],
2934 'channel_id': metadata['uploader_id'],
2935 'channel_url': metadata['uploader_url']})
2936 return self.playlist_result(
29f7c58a 2937 self._entries(selected_tab, identity_token),
b60419c5 2938 **metadata)
73c4ac2c 2939
29f7c58a 2940 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2941 title = playlist.get('title') or try_get(
2942 data, lambda x: x['titleText']['simpleText'], compat_str)
2943 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 2944 # Inline playlist rendition continuation does not always work
2945 # at Youtube side, so delegating regular tab-based playlist URL
2946 # processing whenever possible.
2947 playlist_url = urljoin(url, try_get(
2948 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2949 compat_str))
2950 if playlist_url and playlist_url != url:
2951 return self.url_result(
2952 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2953 video_title=title)
8bdd16b4 2954 return self.playlist_result(
2955 self._playlist_entries(playlist), playlist_id=playlist_id,
2956 playlist_title=title)
c5e8d7af 2957
29f7c58a 2958 @staticmethod
2959 def _extract_alerts(data):
02ced43c 2960 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2961 if not isinstance(alert_dict, dict):
2962 continue
02ced43c 2963 for renderer in alert_dict:
2964 alert = alert_dict[renderer]
2965 alert_type = alert.get('type')
2966 if not alert_type:
2967 continue
2968 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2969 if message:
2970 yield alert_type, message
2971 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2972 message = try_get(run, lambda x: x['text'], compat_str)
2973 if message:
2974 yield alert_type, message
2975
29f7c58a 2976 def _extract_identity_token(self, webpage, item_id):
2977 ytcfg = self._extract_ytcfg(item_id, webpage)
2978 if ytcfg:
2979 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2980 if token:
2981 return token
2982 return self._search_regex(
2983 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
2984 'identity token', default=None)
2985
8bdd16b4 2986 def _real_extract(self, url):
2987 item_id = self._match_id(url)
2988 url = compat_urlparse.urlunparse(
2989 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 2990 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 2991 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 2992 self._downloader.report_warning(
2993 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 2994 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 2995 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
2996
8bdd16b4 2997 # Handle both video/playlist URLs
2998 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2999 video_id = qs.get('v', [None])[0]
3000 playlist_id = qs.get('list', [None])[0]
f0c532a4 3001
29f7c58a 3002 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3003 if playlist_id:
3004 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3005 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3006 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3007 else:
3008 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3009 if video_id and playlist_id:
3010 if self._downloader.params.get('noplaylist'):
3011 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3012 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3013 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3014
62bff2c1 3015 retries = self._downloader.params.get('extractor_retries', 3)
3016 count = -1
14fdfea9 3017 while count < retries:
62bff2c1 3018 count += 1
14fdfea9 3019 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3020 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3021 if count:
3022 self.report_warning('Incomplete yt initial data recieved. Retrying ...')
5ef7d9bd 3023 webpage = self._download_webpage(
3024 url, item_id,
62bff2c1 3025 'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
14fdfea9 3026 identity_token = self._extract_identity_token(webpage, item_id)
3027 data = self._extract_yt_initial_data(item_id, webpage)
3028 err_msg = None
3029 for alert_type, alert_message in self._extract_alerts(data):
3030 if alert_type.lower() == 'error':
3031 if err_msg:
3032 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3033 err_msg = alert_message
3034 else:
3035 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3036 if err_msg:
3037 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3038 if data.get('contents') or data.get('currentVideoEndpoint'):
3039 break
14fdfea9 3040
8bdd16b4 3041 tabs = try_get(
3042 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3043 if tabs:
3044 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3045 playlist = try_get(
3046 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3047 if playlist:
29f7c58a 3048 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3049 # Fallback to video extraction if no playlist alike page is recognized.
3050 # First check for the current video then try the v attribute of URL query.
3051 video_id = try_get(
3052 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3053 compat_str) or video_id
8bdd16b4 3054 if video_id:
3055 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3056 # Failed to recognize
3057 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3058
c5e8d7af 3059
8bdd16b4 3060class YoutubePlaylistIE(InfoExtractor):
3061 IE_DESC = 'YouTube.com playlists'
3062 _VALID_URL = r'''(?x)(?:
3063 (?:https?://)?
3064 (?:\w+\.)?
3065 (?:
3066 (?:
3067 youtube(?:kids)?\.com|
29f7c58a 3068 invidio\.us
8bdd16b4 3069 )
3070 /.*?\?.*?\blist=
3071 )?
3072 (?P<id>%(playlist_id)s)
3073 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3074 IE_NAME = 'youtube:playlist'
cdc628a4 3075 _TESTS = [{
8bdd16b4 3076 'note': 'issue #673',
3077 'url': 'PLBB231211A4F62143',
cdc628a4 3078 'info_dict': {
8bdd16b4 3079 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3080 'id': 'PLBB231211A4F62143',
3081 'uploader': 'Wickydoo',
3082 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3083 },
3084 'playlist_mincount': 29,
3085 }, {
3086 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3087 'info_dict': {
3088 'title': 'YDL_safe_search',
3089 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3090 },
3091 'playlist_count': 2,
3092 'skip': 'This playlist is private',
9558dcec 3093 }, {
8bdd16b4 3094 'note': 'embedded',
3095 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3096 'playlist_count': 4,
9558dcec 3097 'info_dict': {
8bdd16b4 3098 'title': 'JODA15',
3099 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3100 'uploader': 'milan',
3101 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3102 }
cdc628a4 3103 }, {
8bdd16b4 3104 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3105 'playlist_mincount': 982,
3106 'info_dict': {
3107 'title': '2018 Chinese New Singles (11/6 updated)',
3108 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3109 'uploader': 'LBK',
3110 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3111 }
daa0df9e 3112 }, {
29f7c58a 3113 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3114 'only_matching': True,
3115 }, {
3116 # music album playlist
3117 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3118 'only_matching': True,
3119 }]
3120
3121 @classmethod
3122 def suitable(cls, url):
3123 return False if YoutubeTabIE.suitable(url) else super(
3124 YoutubePlaylistIE, cls).suitable(url)
3125
3126 def _real_extract(self, url):
3127 playlist_id = self._match_id(url)
3128 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3129 if not qs:
3130 qs = {'list': playlist_id}
3131 return self.url_result(
3132 update_url_query('https://www.youtube.com/playlist', qs),
3133 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3134
3135
3136class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3137 IE_DESC = 'youtu.be'
29f7c58a 3138 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3139 _TESTS = [{
8bdd16b4 3140 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3141 'info_dict': {
3142 'id': 'yeWKywCrFtk',
3143 'ext': 'mp4',
3144 'title': 'Small Scale Baler and Braiding Rugs',
3145 'uploader': 'Backus-Page House Museum',
3146 'uploader_id': 'backuspagemuseum',
3147 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3148 'upload_date': '20161008',
3149 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3150 'categories': ['Nonprofits & Activism'],
3151 'tags': list,
3152 'like_count': int,
3153 'dislike_count': int,
3154 },
3155 'params': {
3156 'noplaylist': True,
3157 'skip_download': True,
3158 },
39e7107d 3159 }, {
8bdd16b4 3160 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3161 'only_matching': True,
cdc628a4
PH
3162 }]
3163
8bdd16b4 3164 def _real_extract(self, url):
29f7c58a 3165 mobj = re.match(self._VALID_URL, url)
3166 video_id = mobj.group('id')
3167 playlist_id = mobj.group('playlist_id')
8bdd16b4 3168 return self.url_result(
29f7c58a 3169 update_url_query('https://www.youtube.com/watch', {
3170 'v': video_id,
3171 'list': playlist_id,
3172 'feature': 'youtu.be',
3173 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3174
3175
3176class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3177 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3178 _VALID_URL = r'ytuser:(?P<id>.+)'
3179 _TESTS = [{
3180 'url': 'ytuser:phihag',
3181 'only_matching': True,
3182 }]
3183
3184 def _real_extract(self, url):
3185 user_id = self._match_id(url)
3186 return self.url_result(
3187 'https://www.youtube.com/user/%s' % user_id,
3188 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3189
b05654f0 3190
3d3dddc9 3191class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3192 IE_NAME = 'youtube:favorites'
3193 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3194 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3195 _LOGIN_REQUIRED = True
3196 _TESTS = [{
3197 'url': ':ytfav',
3198 'only_matching': True,
3199 }, {
3200 'url': ':ytfavorites',
3201 'only_matching': True,
3202 }]
3203
3204 def _real_extract(self, url):
3205 return self.url_result(
3206 'https://www.youtube.com/playlist?list=LL',
3207 ie=YoutubeTabIE.ie_key())
3208
3209
8bdd16b4 3210class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3211 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3212 # there doesn't appear to be a real limit, for example if you search for
3213 # 'python' you get more than 8.000.000 results
3214 _MAX_RESULTS = float('inf')
78caa52a 3215 IE_NAME = 'youtube:search'
b05654f0 3216 _SEARCH_KEY = 'ytsearch'
6c894ea1 3217 _SEARCH_PARAMS = None
9dd8e46a 3218 _TESTS = []
b05654f0 3219
6c894ea1
U
3220 def _entries(self, query, n):
3221 data = {
3222 'context': {
3223 'client': {
3224 'clientName': 'WEB',
3225 'clientVersion': '2.20201021.03.00',
3226 }
3227 },
3228 'query': query,
a22b2fd1 3229 }
6c894ea1
U
3230 if self._SEARCH_PARAMS:
3231 data['params'] = self._SEARCH_PARAMS
3232 total = 0
3233 for page_num in itertools.count(1):
3234 search = self._download_json(
3235 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3236 video_id='query "%s"' % query,
3237 note='Downloading page %s' % page_num,
3238 errnote='Unable to download API page', fatal=False,
3239 data=json.dumps(data).encode('utf8'),
3240 headers={'content-type': 'application/json'})
3241 if not search:
b4c08069 3242 break
6c894ea1
U
3243 slr_contents = try_get(
3244 search,
3245 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3246 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3247 list)
3248 if not slr_contents:
a22b2fd1 3249 break
0366ae87 3250
0366ae87
M
3251 # Youtube sometimes adds promoted content to searches,
3252 # changing the index location of videos and token.
3253 # So we search through all entries till we find them.
30a074c2 3254 continuation_token = None
3255 for slr_content in slr_contents:
a96c6d15 3256 if continuation_token is None:
3257 continuation_token = try_get(
3258 slr_content,
3259 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3260 compat_str)
3261
30a074c2 3262 isr_contents = try_get(
3263 slr_content,
3264 lambda x: x['itemSectionRenderer']['contents'],
3265 list)
9da76d30 3266 if not isr_contents:
30a074c2 3267 continue
3268 for content in isr_contents:
3269 if not isinstance(content, dict):
3270 continue
3271 video = content.get('videoRenderer')
3272 if not isinstance(video, dict):
3273 continue
3274 video_id = video.get('videoId')
3275 if not video_id:
3276 continue
3277
3278 yield self._extract_video(video)
3279 total += 1
3280 if total == n:
3281 return
0366ae87 3282
0366ae87 3283 if not continuation_token:
6c894ea1 3284 break
0366ae87 3285 data['continuation'] = continuation_token
b05654f0 3286
6c894ea1
U
3287 def _get_n_results(self, query, n):
3288 """Get a specified number of results for a query"""
3289 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3290
c9ae7b95 3291
a3dd9248 3292class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3293 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3294 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3295 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3296 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3297
c9ae7b95 3298
386e1dd9 3299class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3300 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3301 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3302 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3303 # _MAX_RESULTS = 100
3462ffa8 3304 _TESTS = [{
3305 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3306 'playlist_mincount': 5,
3307 'info_dict': {
3308 'title': 'youtube-dl test video',
3309 }
3310 }, {
3311 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3312 'only_matching': True,
3313 }]
3314
386e1dd9 3315 @classmethod
3316 def _make_valid_url(cls):
3317 return cls._VALID_URL
3318
3462ffa8 3319 def _real_extract(self, url):
386e1dd9 3320 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3321 query = (qs.get('search_query') or qs.get('q'))[0]
3322 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3323 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3324
3325
3326class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3327 """
25f14e9f 3328 Base class for feed extractors
3d3dddc9 3329 Subclasses must define the _FEED_NAME property.
d7ae0639 3330 """
b2e8bc1b 3331 _LOGIN_REQUIRED = True
3462ffa8 3332 # _MAX_PAGES = 5
ef2f3c7f 3333 _TESTS = []
d7ae0639
JMF
3334
3335 @property
3336 def IE_NAME(self):
78caa52a 3337 return 'youtube:%s' % self._FEED_NAME
04cc9617 3338
81f0259b 3339 def _real_initialize(self):
b2e8bc1b 3340 self._login()
81f0259b 3341
3853309f 3342 def _real_extract(self, url):
3d3dddc9 3343 return self.url_result(
3344 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3345 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3346
3347
ef2f3c7f 3348class YoutubeWatchLaterIE(InfoExtractor):
3349 IE_NAME = 'youtube:watchlater'
70d5c17b 3350 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3351 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3352 _TESTS = [{
8bdd16b4 3353 'url': ':ytwatchlater',
bc7a9cd8
S
3354 'only_matching': True,
3355 }]
25f14e9f
S
3356
3357 def _real_extract(self, url):
ef2f3c7f 3358 return self.url_result(
3359 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3360
3361
25f14e9f
S
3362class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3363 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3364 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3365 _FEED_NAME = 'recommended'
3d3dddc9 3366 _TESTS = [{
3367 'url': ':ytrec',
3368 'only_matching': True,
3369 }, {
3370 'url': ':ytrecommended',
3371 'only_matching': True,
3372 }, {
3373 'url': 'https://youtube.com',
3374 'only_matching': True,
3375 }]
1ed5b5c9 3376
1ed5b5c9 3377
25f14e9f 3378class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3379 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3380 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3381 _FEED_NAME = 'subscriptions'
3d3dddc9 3382 _TESTS = [{
3383 'url': ':ytsubs',
3384 'only_matching': True,
3385 }, {
3386 'url': ':ytsubscriptions',
3387 'only_matching': True,
3388 }]
1ed5b5c9 3389
1ed5b5c9 3390
25f14e9f
S
3391class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3392 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3393 _VALID_URL = r':ythistory'
25f14e9f 3394 _FEED_NAME = 'history'
3d3dddc9 3395 _TESTS = [{
3396 'url': ':ythistory',
3397 'only_matching': True,
3398 }]
1ed5b5c9
JMF
3399
3400
15870e90
PH
3401class YoutubeTruncatedURLIE(InfoExtractor):
3402 IE_NAME = 'youtube:truncated_url'
3403 IE_DESC = False # Do not list
975d35db 3404 _VALID_URL = r'''(?x)
b95aab84
PH
3405 (?:https?://)?
3406 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3407 (?:watch\?(?:
c4808c60 3408 feature=[a-z_]+|
b95aab84
PH
3409 annotation_id=annotation_[^&]+|
3410 x-yt-cl=[0-9]+|
c1708b89 3411 hl=[^&]*|
287be8c6 3412 t=[0-9]+
b95aab84
PH
3413 )?
3414 |
3415 attribution_link\?a=[^&]+
3416 )
3417 $
975d35db 3418 '''
15870e90 3419
c4808c60 3420 _TESTS = [{
2d3d2997 3421 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3422 'only_matching': True,
dc2fc736 3423 }, {
2d3d2997 3424 'url': 'https://www.youtube.com/watch?',
dc2fc736 3425 'only_matching': True,
b95aab84
PH
3426 }, {
3427 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3428 'only_matching': True,
3429 }, {
3430 'url': 'https://www.youtube.com/watch?feature=foo',
3431 'only_matching': True,
c1708b89
PH
3432 }, {
3433 'url': 'https://www.youtube.com/watch?hl=en-GB',
3434 'only_matching': True,
287be8c6
PH
3435 }, {
3436 'url': 'https://www.youtube.com/watch?t=2372',
3437 'only_matching': True,
c4808c60
PH
3438 }]
3439
15870e90
PH
3440 def _real_extract(self, url):
3441 raise ExtractorError(
78caa52a
PH
3442 'Did you forget to quote the URL? Remember that & is a meta '
3443 'character in most shells, so you want to put the URL in quotes, '
3867038a 3444 'like youtube-dl '
2d3d2997 3445 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3446 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3447 expected=True)
772fd5cc
PH
3448
3449
3450class YoutubeTruncatedIDIE(InfoExtractor):
3451 IE_NAME = 'youtube:truncated_id'
3452 IE_DESC = False # Do not list
b95aab84 3453 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3454
3455 _TESTS = [{
3456 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3457 'only_matching': True,
3458 }]
3459
3460 def _real_extract(self, url):
3461 video_id = self._match_id(url)
3462 raise ExtractorError(
3463 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3464 expected=True)