]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[YouTube] Show premium state in `availability` (#209)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c224251a 27 bool_or_none,
c5e8d7af 28 clean_html,
26fe8ffe 29 dict_get,
c5e8d7af 30 ExtractorError,
b60419c5 31 format_field,
2d30521a 32 float_or_none,
dd27fd17 33 int_or_none,
94278f72 34 mimetype2ext,
6310acf5 35 parse_codecs,
7c80519c 36 parse_duration,
dca3ff4a 37 qualities,
3995d37d 38 remove_start,
cf7e015f 39 smuggle_url,
dbdaaa23 40 str_or_none,
c93d53f5 41 str_to_int,
556dbe7f 42 try_get,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
8bdd16b4 46 update_url_query,
21c340b8 47 url_or_none,
6e6bc8da 48 urlencode_postdata,
8bdd16b4 49 urljoin,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
57
58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 61
3462ffa8 62 _RESERVED_NAMES = (
cd7c66cf 63 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
64 r'movies|results|shared|hashtag|trending|feed|feeds|'
65 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
70d5c17b 71 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 72
25f14e9f
S
73 def _ids_to_results(self, ids):
74 return [
75 self.url_result(vid_id, 'Youtube', video_id=vid_id)
76 for vid_id in ids]
77
b2e8bc1b 78 def _login(self):
83317f69 79 """
80 Attempt to log in to YouTube.
81 True is returned if successful or skipped.
82 False is returned if login failed.
83
84 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
85 """
68217024 86 username, password = self._get_login_info()
b2e8bc1b
JMF
87 # No authentication to be performed
88 if username is None:
70d35d16 89 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 90 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 91 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
92 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
baf67a60
S
115 # TODO: reverse actual botguard identifier generation algo
116 'bgRequest': '["identifier",""]',
041bc3ad 117 })
e00eb564
S
118 return self._download_json(
119 url, None, note=note, errnote=errnote,
120 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121 fatal=False,
122 data=urlencode_postdata(data), headers={
123 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124 'Google-Accounts-XSRF': 1,
125 })
126
3995d37d
S
127 def warn(message):
128 self._downloader.report_warning(message)
129
130 lookup_req = [
131 username,
132 None, [], None, 'US', None, None, 2, False, True,
133 [
134 None, None,
135 [2, 1, None, 1,
136 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137 None, [], 4],
138 1, [None, None, []], None, None, None, True
139 ],
140 username,
141 ]
142
e00eb564 143 lookup_results = req(
3995d37d 144 self._LOOKUP_URL, lookup_req,
e00eb564
S
145 'Looking up account info', 'Unable to look up account info')
146
147 if lookup_results is False:
148 return False
041bc3ad 149
3995d37d
S
150 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151 if not user_hash:
152 warn('Unable to extract user hash')
153 return False
154
155 challenge_req = [
156 user_hash,
157 None, 1, None, [1, None, None, None, [password, None, True]],
158 [
159 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160 1, [None, None, []], None, None, None, True
161 ]]
83317f69 162
3995d37d
S
163 challenge_results = req(
164 self._CHALLENGE_URL, challenge_req,
165 'Logging in', 'Unable to log in')
83317f69 166
3995d37d 167 if challenge_results is False:
e00eb564 168 return
83317f69 169
3995d37d
S
170 login_res = try_get(challenge_results, lambda x: x[0][5], list)
171 if login_res:
172 login_msg = try_get(login_res, lambda x: x[5], compat_str)
173 warn(
174 'Unable to login: %s' % 'Invalid password'
175 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 return False
177
178 res = try_get(challenge_results, lambda x: x[0][-1], list)
179 if not res:
180 warn('Unable to extract result entry')
181 return False
182
9a6628aa
S
183 login_challenge = try_get(res, lambda x: x[0][0], list)
184 if login_challenge:
185 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
187 # SEND_SUCCESS - TFA code has been successfully sent to phone
188 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 189 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
190 if status == 'QUOTA_EXCEEDED':
191 warn('Exceeded the limit of TFA codes, try later')
192 return False
193
194 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195 if not tl:
196 warn('Unable to extract TL')
197 return False
198
199 tfa_code = self._get_tfa_info('2-step verification code')
200
201 if not tfa_code:
202 warn(
203 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 return False
206
207 tfa_code = remove_start(tfa_code, 'G-')
208
209 tfa_req = [
210 user_hash, None, 2, None,
211 [
212 9, None, None, None, None, None, None, None,
213 [None, tfa_code, True, 2]
214 ]]
215
216 tfa_results = req(
217 self._TFA_URL.format(tl), tfa_req,
218 'Submitting TFA code', 'Unable to submit TFA code')
219
220 if tfa_results is False:
221 return False
222
223 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224 if tfa_res:
225 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226 warn(
227 'Unable to finish TFA: %s' % 'Invalid TFA code'
228 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 return False
230
231 check_cookie_url = try_get(
232 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
233 else:
234 CHALLENGES = {
235 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238 }
239 challenge = CHALLENGES.get(
240 challenge_str,
241 '%s returned error %s.' % (self.IE_NAME, challenge_str))
242 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 return False
3995d37d
S
244 else:
245 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247 if not check_cookie_url:
248 warn('Unable to extract CheckCookie URL')
249 return False
e00eb564
S
250
251 check_cookie_results = self._download_webpage(
3995d37d
S
252 check_cookie_url, None, 'Checking cookie', fatal=False)
253
254 if check_cookie_results is False:
255 return False
e00eb564 256
3995d37d
S
257 if 'https://myaccount.google.com/' not in check_cookie_results:
258 warn('Unable to log in')
b2e8bc1b 259 return False
e00eb564 260
b2e8bc1b
JMF
261 return True
262
cce889b9 263 def _initialize_consent(self):
264 cookies = self._get_cookies('https://www.youtube.com/')
265 if cookies.get('__Secure-3PSID'):
266 return
267 consent_id = None
268 consent = cookies.get('CONSENT')
269 if consent:
270 if 'YES' in consent.value:
271 return
272 consent_id = self._search_regex(
273 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
274 if not consent_id:
275 consent_id = random.randint(100, 999)
276 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 277
b2e8bc1b 278 def _real_initialize(self):
cce889b9 279 self._initialize_consent()
b2e8bc1b
JMF
280 if self._downloader is None:
281 return
b2e8bc1b
JMF
282 if not self._login():
283 return
c5e8d7af 284
a1c5d2ca 285 _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
8bdd16b4 286 _DEFAULT_API_DATA = {
287 'context': {
288 'client': {
289 'clientName': 'WEB',
a1c5d2ca 290 'clientVersion': _YT_WEB_CLIENT_VERSION,
8bdd16b4 291 }
292 },
293 }
8377574c 294
a1c5d2ca
M
295 _DEFAULT_BASIC_API_HEADERS = {
296 'X-YouTube-Client-Name': '1',
297 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
298 }
299
a0566bbf 300 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 301 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
302 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 303
a5c56234
M
304 def _generate_sapisidhash_header(self):
305 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
306 if sapisid_cookie is None:
307 return
308 time_now = round(time.time())
309 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
310 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
311
312 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
313 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 314 data = self._DEFAULT_API_DATA.copy()
315 data.update(query)
a5c56234
M
316 headers = headers or {}
317 headers.update({'content-type': 'application/json'})
318 auth = self._generate_sapisidhash_header()
319 if auth is not None:
320 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 321 return self._download_json(
a5c56234
M
322 'https://www.youtube.com/youtubei/v1/%s' % ep,
323 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
324 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 325 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 326
8bdd16b4 327 def _extract_yt_initial_data(self, video_id, webpage):
328 return self._parse_json(
329 self._search_regex(
29f7c58a 330 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 331 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 332 video_id)
0c148415 333
a1c5d2ca
M
334 def _extract_identity_token(self, webpage, item_id):
335 ytcfg = self._extract_ytcfg(item_id, webpage)
336 if ytcfg:
337 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
338 if token:
339 return token
340 return self._search_regex(
341 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
342 'identity token', default=None)
343
344 @staticmethod
345 def _extract_account_syncid(data):
346 """Extract syncId required to download private playlists of secondary channels"""
347 sync_ids = (
348 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
349 or '').split("||")
350 if len(sync_ids) >= 2 and sync_ids[1]:
351 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
352 # and just "user_syncid||" for primary channel. We only want the channel_syncid
353 return sync_ids[0]
354
29f7c58a 355 def _extract_ytcfg(self, video_id, webpage):
356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
359 default='{}'), video_id, fatal=False)
360
30a074c2 361 def _extract_video(self, renderer):
362 video_id = renderer.get('videoId')
363 title = try_get(
364 renderer,
365 (lambda x: x['title']['runs'][0]['text'],
366 lambda x: x['title']['simpleText']), compat_str)
367 description = try_get(
368 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
369 compat_str)
370 duration = parse_duration(try_get(
371 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
372 view_count_text = try_get(
373 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
374 view_count = str_to_int(self._search_regex(
375 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
376 'view count', default=None))
377 uploader = try_get(
bc2ca1bb 378 renderer,
379 (lambda x: x['ownerText']['runs'][0]['text'],
380 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 381 return {
382 '_type': 'url_transparent',
383 'ie_key': YoutubeIE.ie_key(),
384 'id': video_id,
385 'url': video_id,
386 'title': title,
387 'description': description,
388 'duration': duration,
389 'view_count': view_count,
390 'uploader': uploader,
391 }
392
0c148415 393
360e1ca5 394class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 395 IE_DESC = 'YouTube.com'
bc2ca1bb 396 _INVIDIOUS_SITES = (
397 # invidious-redirect websites
398 r'(?:www\.)?redirect\.invidious\.io',
399 r'(?:(?:www|dev)\.)?invidio\.us',
400 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
401 r'(?:www\.)?invidious\.pussthecat\.org',
402 r'(?:www\.)?invidious\.048596\.xyz',
403 r'(?:www\.)?invidious\.zee\.li',
404 r'(?:www\.)?vid\.puffyan\.us',
405 r'(?:(?:www|au)\.)?ytprivate\.com',
406 r'(?:www\.)?invidious\.namazso\.eu',
407 r'(?:www\.)?invidious\.ethibox\.fr',
408 r'(?:www\.)?inv\.skyn3t\.in',
409 r'(?:www\.)?invidious\.himiko\.cloud',
410 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
411 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
412 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
413 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
414 # youtube-dl invidious instances list
415 r'(?:(?:www|no)\.)?invidiou\.sh',
416 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
417 r'(?:www\.)?invidious\.kabi\.tk',
418 r'(?:www\.)?invidious\.13ad\.de',
419 r'(?:www\.)?invidious\.mastodon\.host',
420 r'(?:www\.)?invidious\.zapashcanon\.fr',
421 r'(?:www\.)?invidious\.kavin\.rocks',
422 r'(?:www\.)?invidious\.tube',
423 r'(?:www\.)?invidiou\.site',
424 r'(?:www\.)?invidious\.site',
425 r'(?:www\.)?invidious\.xyz',
426 r'(?:www\.)?invidious\.nixnet\.xyz',
427 r'(?:www\.)?invidious\.drycat\.fr',
428 r'(?:www\.)?tube\.poal\.co',
429 r'(?:www\.)?tube\.connect\.cafe',
430 r'(?:www\.)?vid\.wxzm\.sx',
431 r'(?:www\.)?vid\.mint\.lgbt',
432 r'(?:www\.)?yewtu\.be',
433 r'(?:www\.)?yt\.elukerio\.org',
434 r'(?:www\.)?yt\.lelux\.fi',
435 r'(?:www\.)?invidious\.ggc-project\.de',
436 r'(?:www\.)?yt\.maisputain\.ovh',
437 r'(?:www\.)?invidious\.toot\.koeln',
438 r'(?:www\.)?invidious\.fdn\.fr',
439 r'(?:www\.)?watch\.nettohikari\.com',
440 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
441 r'(?:www\.)?qklhadlycap4cnod\.onion',
442 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
443 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
444 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
445 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
446 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
447 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
448 )
cb7dfeea 449 _VALID_URL = r"""(?x)^
c5e8d7af 450 (
edb53e2d 451 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 452 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
453 (?:www\.)?deturl\.com/www\.youtube\.com|
454 (?:www\.)?pwnyoutube\.com|
455 (?:www\.)?hooktube\.com|
456 (?:www\.)?yourepeat\.com|
457 tube\.majestyc\.net|
458 %(invidious)s|
459 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
460 (?:.*?\#/)? # handle anchor (#/) redirect urls
461 (?: # the various things that can precede the ID:
ac7553d0 462 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 463 |(?: # or the v= param in all its forms
f7000f3a 464 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 465 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 466 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
467 v=
468 )
f4b05232 469 ))
cbaed4bb
S
470 |(?:
471 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
472 vid\.plus| # or vid.plus/xxxx
473 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 474 %(invidious)s
cbaed4bb 475 )/
edb53e2d 476 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 477 )
c5e8d7af 478 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 479 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
480 (?!.*?\blist=
481 (?:
482 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
483 WL # WL are handled by the watch later IE
484 )
485 )
c5e8d7af 486 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 487 $""" % {
488 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
489 'invidious': '|'.join(_INVIDIOUS_SITES),
490 }
e40c758c 491 _PLAYER_INFO_RE = (
cc2db878 492 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
493 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 494 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 495 )
2c62dc26 496 _formats = {
c2d3cb4c 497 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
498 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
499 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
500 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
501 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
502 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
503 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
504 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 505 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 506 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
507 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
508 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
509 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
510 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
511 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 512 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 513 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
514 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 515
516
517 # 3D videos
c2d3cb4c 518 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
519 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
520 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
521 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 522 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
523 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
524 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 525
96fb5605 526 # Apple HTTP Live Streaming
11f12195 527 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 528 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
529 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
530 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
531 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
532 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 533 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
534 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
535
536 # DASH mp4 video
d23028a8
S
537 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
538 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
539 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
540 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
541 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 542 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
543 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
544 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
545 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
546 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
547 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
548 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 549
f6f1fc92 550 # Dash mp4 audio
d23028a8
S
551 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
552 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
553 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
554 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
555 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
556 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
557 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
558
559 # Dash webm
d23028a8
S
560 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
561 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
562 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
563 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
564 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
565 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
566 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
567 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
568 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
569 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
570 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
571 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
572 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
573 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
574 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 575 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
576 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
577 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
578 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
579 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
580 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
581 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
582
583 # Dash webm audio
d23028a8
S
584 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
585 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 586
0857baad 587 # Dash webm audio with opus inside
d23028a8
S
588 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
589 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
590 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 591
ce6b9a2d
PH
592 # RTMP (unnamed)
593 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
594
595 # av01 video only formats sometimes served with "unknown" codecs
596 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
597 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
598 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
599 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 600 }
29f7c58a 601 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 602
fd5c4aab
S
603 _GEO_BYPASS = False
604
78caa52a 605 IE_NAME = 'youtube'
2eb88d95
PH
606 _TESTS = [
607 {
2d3d2997 608 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
609 'info_dict': {
610 'id': 'BaW_jenozKc',
611 'ext': 'mp4',
3867038a 612 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
613 'uploader': 'Philipp Hagemeister',
614 'uploader_id': 'phihag',
ec85ded8 615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
616 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
617 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 618 'upload_date': '20121002',
3867038a 619 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 620 'categories': ['Science & Technology'],
3867038a 621 'tags': ['youtube-dl'],
556dbe7f 622 'duration': 10,
dbdaaa23 623 'view_count': int,
3e7c1224
PH
624 'like_count': int,
625 'dislike_count': int,
7c80519c 626 'start_time': 1,
297a564b 627 'end_time': 9,
2eb88d95 628 }
0e853ca4 629 },
fccd3771 630 {
4bc3a23e
PH
631 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
632 'note': 'Embed-only video (#1746)',
633 'info_dict': {
634 'id': 'yZIXLfi8CZQ',
635 'ext': 'mp4',
636 'upload_date': '20120608',
637 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
638 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
639 'uploader': 'SET India',
94bfcd23 640 'uploader_id': 'setindia',
ec85ded8 641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 642 'age_limit': 18,
545cc85d 643 },
644 'skip': 'Private video',
fccd3771 645 },
11b56058 646 {
8bdd16b4 647 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
648 'note': 'Use the first video ID in the URL',
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 656 'upload_date': '20121002',
3867038a 657 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 658 'categories': ['Science & Technology'],
3867038a 659 'tags': ['youtube-dl'],
556dbe7f 660 'duration': 10,
dbdaaa23 661 'view_count': int,
11b56058
PM
662 'like_count': int,
663 'dislike_count': int,
34a7de29
S
664 },
665 'params': {
666 'skip_download': True,
667 },
11b56058 668 },
dd27fd17 669 {
2d3d2997 670 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
671 'note': '256k DASH audio (format 141) via DASH manifest',
672 'info_dict': {
673 'id': 'a9LDPn-MO4I',
674 'ext': 'm4a',
675 'upload_date': '20121002',
676 'uploader_id': '8KVIDEO',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
678 'description': '',
679 'uploader': '8KVIDEO',
680 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 681 },
4bc3a23e
PH
682 'params': {
683 'youtube_include_dash_manifest': True,
684 'format': '141',
4919603f 685 },
de3c7fe0 686 'skip': 'format 141 not served anymore',
dd27fd17 687 },
8bdd16b4 688 # DASH manifest with encrypted signature
689 {
690 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
691 'info_dict': {
692 'id': 'IB3lcPjvWLA',
693 'ext': 'm4a',
694 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
695 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
696 'duration': 244,
697 'uploader': 'AfrojackVEVO',
698 'uploader_id': 'AfrojackVEVO',
699 'upload_date': '20131011',
cc2db878 700 'abr': 129.495,
8bdd16b4 701 },
702 'params': {
703 'youtube_include_dash_manifest': True,
704 'format': '141/bestaudio[ext=m4a]',
705 },
706 },
aa79ac0c
PH
707 # Controversy video
708 {
709 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
710 'info_dict': {
711 'id': 'T4XJQO3qol8',
712 'ext': 'mp4',
556dbe7f 713 'duration': 219,
aa79ac0c 714 'upload_date': '20100909',
4fe54c12 715 'uploader': 'Amazing Atheist',
aa79ac0c 716 'uploader_id': 'TheAmazingAtheist',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 718 'title': 'Burning Everyone\'s Koran',
545cc85d 719 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 720 }
c522adb1 721 },
dd2d55f1 722 # Normal age-gate video (embed allowed)
c522adb1 723 {
2d3d2997 724 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
725 'info_dict': {
726 'id': 'HtVdAasjOgU',
727 'ext': 'mp4',
728 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 729 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 730 'duration': 142,
c522adb1
JMF
731 'uploader': 'The Witcher',
732 'uploader_id': 'WitcherGame',
ec85ded8 733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 734 'upload_date': '20140605',
34952f09 735 'age_limit': 18,
c522adb1
JMF
736 },
737 },
8bdd16b4 738 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
739 # YouTube Red ad is not captured for creator
740 {
741 'url': '__2ABJjxzNo',
742 'info_dict': {
743 'id': '__2ABJjxzNo',
744 'ext': 'mp4',
745 'duration': 266,
746 'upload_date': '20100430',
747 'uploader_id': 'deadmau5',
748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 749 'creator': 'deadmau5',
750 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 751 'uploader': 'deadmau5',
752 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 753 'alt_title': 'Some Chords',
8bdd16b4 754 },
755 'expected_warnings': [
756 'DASH manifest missing',
757 ]
758 },
067aa17e 759 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
760 {
761 'url': 'lqQg6PlCWgI',
762 'info_dict': {
763 'id': 'lqQg6PlCWgI',
764 'ext': 'mp4',
556dbe7f 765 'duration': 6085,
90227264 766 'upload_date': '20150827',
cbe2bd91 767 'uploader_id': 'olympic',
ec85ded8 768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 769 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 770 'uploader': 'Olympic',
cbe2bd91
PH
771 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
772 },
773 'params': {
774 'skip_download': 'requires avconv',
e52a40ab 775 }
cbe2bd91 776 },
6271f1ca
PH
777 # Non-square pixels
778 {
779 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
780 'info_dict': {
781 'id': '_b-2C3KPAM0',
782 'ext': 'mp4',
783 'stretched_ratio': 16 / 9.,
556dbe7f 784 'duration': 85,
6271f1ca
PH
785 'upload_date': '20110310',
786 'uploader_id': 'AllenMeow',
ec85ded8 787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 788 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 789 'uploader': '孫ᄋᄅ',
6271f1ca
PH
790 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
791 },
06b491eb
S
792 },
793 # url_encoded_fmt_stream_map is empty string
794 {
795 'url': 'qEJwOuvDf7I',
796 'info_dict': {
797 'id': 'qEJwOuvDf7I',
f57b7835 798 'ext': 'webm',
06b491eb
S
799 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
800 'description': '',
801 'upload_date': '20150404',
802 'uploader_id': 'spbelect',
803 'uploader': 'Наблюдатели Петербурга',
804 },
805 'params': {
806 'skip_download': 'requires avconv',
e323cf3f
S
807 },
808 'skip': 'This live event has ended.',
06b491eb 809 },
067aa17e 810 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
811 {
812 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
813 'info_dict': {
814 'id': 'FIl7x6_3R5Y',
eb6793ba 815 'ext': 'webm',
da77d856
S
816 'title': 'md5:7b81415841e02ecd4313668cde88737a',
817 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 818 'duration': 220,
da77d856
S
819 'upload_date': '20150625',
820 'uploader_id': 'dorappi2000',
ec85ded8 821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 822 'uploader': 'dorappi2000',
eb6793ba 823 'formats': 'mincount:31',
da77d856 824 },
eb6793ba 825 'skip': 'not actual anymore',
2ee8f5d8 826 },
8a1a26ce
YCH
827 # DASH manifest with segment_list
828 {
829 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
830 'md5': '8ce563a1d667b599d21064e982ab9e31',
831 'info_dict': {
832 'id': 'CsmdDsKjzN8',
833 'ext': 'mp4',
17ee98e1 834 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
835 'uploader': 'Airtek',
836 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
837 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
838 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
839 },
840 'params': {
841 'youtube_include_dash_manifest': True,
842 'format': '135', # bestvideo
be49068d
S
843 },
844 'skip': 'This live event has ended.',
2ee8f5d8 845 },
cf7e015f
S
846 {
847 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 848 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 849 'info_dict': {
545cc85d 850 'id': 'jvGDaLqkpTg',
851 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
852 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
853 },
854 'playlist': [{
855 'info_dict': {
545cc85d 856 'id': 'jvGDaLqkpTg',
cf7e015f 857 'ext': 'mp4',
545cc85d 858 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
859 'description': 'md5:e03b909557865076822aa169218d6a5d',
860 'duration': 10643,
861 'upload_date': '20161111',
862 'uploader': 'Team PGP',
863 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
865 },
866 }, {
867 'info_dict': {
545cc85d 868 'id': '3AKt1R1aDnw',
cf7e015f 869 'ext': 'mp4',
545cc85d 870 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
871 'description': 'md5:e03b909557865076822aa169218d6a5d',
872 'duration': 10991,
873 'upload_date': '20161111',
874 'uploader': 'Team PGP',
875 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
876 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
877 },
878 }, {
879 'info_dict': {
545cc85d 880 'id': 'RtAMM00gpVc',
cf7e015f 881 'ext': 'mp4',
545cc85d 882 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
883 'description': 'md5:e03b909557865076822aa169218d6a5d',
884 'duration': 10995,
885 'upload_date': '20161111',
886 'uploader': 'Team PGP',
887 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
888 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
889 },
890 }, {
891 'info_dict': {
545cc85d 892 'id': '6N2fdlP3C5U',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10990,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }],
903 'params': {
904 'skip_download': True,
905 },
cbaed4bb 906 },
f9f49d87 907 {
067aa17e 908 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
909 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
910 'info_dict': {
911 'id': 'gVfLd0zydlo',
912 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
913 },
914 'playlist_count': 2,
be49068d 915 'skip': 'Not multifeed anymore',
f9f49d87 916 },
cbaed4bb 917 {
2d3d2997 918 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 919 'only_matching': True,
0e49d9a6 920 },
6d4fc66b 921 {
2d3d2997 922 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
923 'only_matching': True,
924 },
0e49d9a6 925 {
067aa17e 926 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 927 # Also tests cut-off URL expansion in video description (see
067aa17e
S
928 # https://github.com/ytdl-org/youtube-dl/issues/1892,
929 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
930 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
931 'info_dict': {
932 'id': 'lsguqyKfVQg',
933 'ext': 'mp4',
934 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 935 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 936 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 937 'duration': 133,
0e49d9a6
LL
938 'upload_date': '20151119',
939 'uploader_id': 'IronSoulElf',
ec85ded8 940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 941 'uploader': 'IronSoulElf',
eb6793ba
S
942 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
943 'track': 'Dark Walk - Position Music',
944 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 945 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
946 },
947 'params': {
948 'skip_download': True,
949 },
950 },
61f92af1 951 {
067aa17e 952 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
953 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
954 'only_matching': True,
955 },
313dfc45
LL
956 {
957 # Video with yt:stretch=17:0
958 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
959 'info_dict': {
960 'id': 'Q39EVAstoRM',
961 'ext': 'mp4',
962 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
963 'description': 'md5:ee18a25c350637c8faff806845bddee9',
964 'upload_date': '20151107',
965 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
966 'uploader': 'CH GAMER DROID',
967 },
968 'params': {
969 'skip_download': True,
970 },
be49068d 971 'skip': 'This video does not exist.',
313dfc45 972 },
7caf9830
S
973 {
974 # Video licensed under Creative Commons
975 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
976 'info_dict': {
977 'id': 'M4gD1WSo5mA',
978 'ext': 'mp4',
979 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
980 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 981 'duration': 721,
7caf9830
S
982 'upload_date': '20150127',
983 'uploader_id': 'BerkmanCenter',
ec85ded8 984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 985 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
986 'license': 'Creative Commons Attribution license (reuse allowed)',
987 },
988 'params': {
989 'skip_download': True,
990 },
991 },
fd050249
S
992 {
993 # Channel-like uploader_url
994 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
995 'info_dict': {
996 'id': 'eQcmzGIKrzg',
997 'ext': 'mp4',
998 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 999 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1000 'duration': 4060,
fd050249 1001 'upload_date': '20151119',
eb6793ba 1002 'uploader': 'Bernie Sanders',
fd050249 1003 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1005 'license': 'Creative Commons Attribution license (reuse allowed)',
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
1010 },
040ac686
S
1011 {
1012 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1013 'only_matching': True,
7f29cf54
S
1014 },
1015 {
067aa17e 1016 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1017 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1018 'only_matching': True,
6496ccb4
S
1019 },
1020 {
1021 # Rental video preview
1022 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1023 'info_dict': {
1024 'id': 'uGpuVWrhIzE',
1025 'ext': 'mp4',
1026 'title': 'Piku - Trailer',
1027 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1028 'upload_date': '20150811',
1029 'uploader': 'FlixMatrix',
1030 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1032 'license': 'Standard YouTube License',
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
eb6793ba 1037 'skip': 'This video is not available.',
022a5d66 1038 },
12afdc2a
S
1039 {
1040 # YouTube Red video with episode data
1041 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1042 'info_dict': {
1043 'id': 'iqKdEhx-dD4',
1044 'ext': 'mp4',
1045 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1046 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1047 'duration': 2085,
12afdc2a
S
1048 'upload_date': '20170118',
1049 'uploader': 'Vsauce',
1050 'uploader_id': 'Vsauce',
1051 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1052 'series': 'Mind Field',
1053 'season_number': 1,
1054 'episode_number': 1,
1055 },
1056 'params': {
1057 'skip_download': True,
1058 },
1059 'expected_warnings': [
1060 'Skipping DASH manifest',
1061 ],
1062 },
c7121fa7
S
1063 {
1064 # The following content has been identified by the YouTube community
1065 # as inappropriate or offensive to some audiences.
1066 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1067 'info_dict': {
1068 'id': '6SJNVb0GnPI',
1069 'ext': 'mp4',
1070 'title': 'Race Differences in Intelligence',
1071 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1072 'duration': 965,
1073 'upload_date': '20140124',
1074 'uploader': 'New Century Foundation',
1075 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1077 },
1078 'params': {
1079 'skip_download': True,
1080 },
545cc85d 1081 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1082 },
022a5d66
S
1083 {
1084 # itag 212
1085 'url': '1t24XAntNCY',
1086 'only_matching': True,
fd5c4aab
S
1087 },
1088 {
1089 # geo restricted to JP
1090 'url': 'sJL6WA-aGkQ',
1091 'only_matching': True,
1092 },
cd5a74a2
S
1093 {
1094 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1095 'only_matching': True,
1096 },
bc2ca1bb 1097 {
1098 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1099 'only_matching': True,
1100 },
1101 {
1102 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1103 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1104 'only_matching': True,
1105 },
825cd268
RA
1106 {
1107 # DRM protected
1108 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1109 'only_matching': True,
4fe54c12
S
1110 },
1111 {
1112 # Video with unsupported adaptive stream type formats
1113 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1114 'info_dict': {
1115 'id': 'Z4Vy8R84T1U',
1116 'ext': 'mp4',
1117 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1118 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1119 'duration': 433,
1120 'upload_date': '20130923',
1121 'uploader': 'Amelia Putri Harwita',
1122 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1124 'formats': 'maxcount:10',
1125 },
1126 'params': {
1127 'skip_download': True,
1128 'youtube_include_dash_manifest': False,
1129 },
5429d6a9 1130 'skip': 'not actual anymore',
5caabd3c 1131 },
1132 {
822b9d9c 1133 # Youtube Music Auto-generated description
5caabd3c 1134 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1135 'info_dict': {
1136 'id': 'MgNrAu2pzNs',
1137 'ext': 'mp4',
1138 'title': 'Voyeur Girl',
1139 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1140 'upload_date': '20190312',
5429d6a9
S
1141 'uploader': 'Stephen - Topic',
1142 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1143 'artist': 'Stephen',
1144 'track': 'Voyeur Girl',
1145 'album': 'it\'s too much love to know my dear',
1146 'release_date': '20190313',
1147 'release_year': 2019,
1148 },
1149 'params': {
1150 'skip_download': True,
1151 },
1152 },
66b48727
RA
1153 {
1154 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1155 'only_matching': True,
1156 },
011e75e6
S
1157 {
1158 # invalid -> valid video id redirection
1159 'url': 'DJztXj2GPfl',
1160 'info_dict': {
1161 'id': 'DJztXj2GPfk',
1162 'ext': 'mp4',
1163 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1164 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1165 'upload_date': '20090125',
1166 'uploader': 'Prochorowka',
1167 'uploader_id': 'Prochorowka',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1169 'artist': 'Panjabi MC',
1170 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1171 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1172 },
1173 'params': {
1174 'skip_download': True,
1175 },
545cc85d 1176 'skip': 'Video unavailable',
ea74e00b
DP
1177 },
1178 {
1179 # empty description results in an empty string
1180 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1181 'info_dict': {
1182 'id': 'x41yOUIvK2k',
1183 'ext': 'mp4',
1184 'title': 'IMG 3456',
1185 'description': '',
1186 'upload_date': '20170613',
1187 'uploader_id': 'ElevageOrVert',
1188 'uploader': 'ElevageOrVert',
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
a0566bbf 1194 {
29f7c58a 1195 # with '};' inside yt initial data (see [1])
1196 # see [2] for an example with '};' inside ytInitialPlayerResponse
1197 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1198 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1199 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1200 'info_dict': {
1201 'id': 'CHqg6qOn4no',
1202 'ext': 'mp4',
1203 'title': 'Part 77 Sort a list of simple types in c#',
1204 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1205 'upload_date': '20130831',
1206 'uploader_id': 'kudvenkat',
1207 'uploader': 'kudvenkat',
1208 },
1209 'params': {
1210 'skip_download': True,
1211 },
1212 },
29f7c58a 1213 {
1214 # another example of '};' in ytInitialData
1215 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1216 'only_matching': True,
1217 },
1218 {
1219 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1220 'only_matching': True,
1221 },
545cc85d 1222 {
cc2db878 1223 # https://github.com/ytdl-org/youtube-dl/pull/28094
1224 'url': 'OtqTfy26tG0',
1225 'info_dict': {
1226 'id': 'OtqTfy26tG0',
1227 'ext': 'mp4',
1228 'title': 'Burn Out',
1229 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1230 'upload_date': '20141120',
1231 'uploader': 'The Cinematic Orchestra - Topic',
1232 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1234 'artist': 'The Cinematic Orchestra',
1235 'track': 'Burn Out',
1236 'album': 'Every Day',
1237 'release_data': None,
1238 'release_year': None,
1239 },
1240 'params': {
1241 'skip_download': True,
1242 },
545cc85d 1243 },
bc2ca1bb 1244 {
1245 # controversial video, only works with bpctr when authenticated with cookies
1246 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1247 'only_matching': True,
1248 },
2eb88d95
PH
1249 ]
1250
e0df6211
PH
1251 def __init__(self, *args, **kwargs):
1252 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1253 self._code_cache = {}
83799698 1254 self._player_cache = {}
e0df6211 1255
60064c53
PH
1256 def _signature_cache_id(self, example_sig):
1257 """ Return a string representation of a signature """
78caa52a 1258 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1259
e40c758c
S
1260 @classmethod
1261 def _extract_player_info(cls, player_url):
1262 for player_re in cls._PLAYER_INFO_RE:
1263 id_m = re.search(player_re, player_url)
1264 if id_m:
1265 break
1266 else:
c081b35c 1267 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1268 return id_m.group('id')
e40c758c
S
1269
1270 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1271 player_id = self._extract_player_info(player_url)
e0df6211 1272
c4417ddb 1273 # Read from filesystem cache
545cc85d 1274 func_id = 'js_%s_%s' % (
1275 player_id, self._signature_cache_id(example_sig))
c4417ddb 1276 assert os.path.basename(func_id) == func_id
a0e07d31 1277
69ea8ca4 1278 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1279 if cache_spec is not None:
78caa52a 1280 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1281
545cc85d 1282 if player_id not in self._code_cache:
1283 self._code_cache[player_id] = self._download_webpage(
e0df6211 1284 player_url, video_id,
545cc85d 1285 note='Downloading player ' + player_id,
69ea8ca4 1286 errnote='Download of %s failed' % player_url)
545cc85d 1287 code = self._code_cache[player_id]
1288 res = self._parse_sig_js(code)
e0df6211 1289
785521bf
PH
1290 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1291 cache_res = res(test_string)
1292 cache_spec = [ord(c) for c in cache_res]
83799698 1293
69ea8ca4 1294 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1295 return res
1296
60064c53 1297 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1298 def gen_sig_code(idxs):
1299 def _genslice(start, end, step):
78caa52a 1300 starts = '' if start == 0 else str(start)
8bcc8756 1301 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1302 steps = '' if step == 1 else (':%d' % step)
78caa52a 1303 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1304
1305 step = None
7af808a5
PH
1306 # Quelch pyflakes warnings - start will be set when step is set
1307 start = '(Never used)'
edf3e38e
PH
1308 for i, prev in zip(idxs[1:], idxs[:-1]):
1309 if step is not None:
1310 if i - prev == step:
1311 continue
1312 yield _genslice(start, prev, step)
1313 step = None
1314 continue
1315 if i - prev in [-1, 1]:
1316 step = i - prev
1317 start = prev
1318 continue
1319 else:
78caa52a 1320 yield 's[%d]' % prev
edf3e38e 1321 if step is None:
78caa52a 1322 yield 's[%d]' % i
edf3e38e
PH
1323 else:
1324 yield _genslice(start, i, step)
1325
78caa52a 1326 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1327 cache_res = func(test_string)
edf3e38e 1328 cache_spec = [ord(c) for c in cache_res]
78caa52a 1329 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1330 signature_id_tuple = '(%s)' % (
1331 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1332 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1333 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1334 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1335
e0df6211
PH
1336 def _parse_sig_js(self, jscode):
1337 funcname = self._search_regex(
abefc03f
S
1338 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1340 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1341 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1342 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1343 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1344 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1345 # Obsolete patterns
1346 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1347 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1348 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1354 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1355
1356 jsi = JSInterpreter(jscode)
1357 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1358 return lambda s: initial_function([s])
1359
545cc85d 1360 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1361 """Turn the encrypted s field into a working signature"""
6b37f0be 1362
c8bf86d5 1363 if player_url is None:
69ea8ca4 1364 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1365
69ea8ca4 1366 if player_url.startswith('//'):
78caa52a 1367 player_url = 'https:' + player_url
3c90cc8b
S
1368 elif not re.match(r'https?://', player_url):
1369 player_url = compat_urlparse.urljoin(
1370 'https://www.youtube.com', player_url)
c8bf86d5 1371 try:
62af3a0e 1372 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1373 if player_id not in self._player_cache:
1374 func = self._extract_signature_function(
60064c53 1375 video_id, player_url, s
c8bf86d5
PH
1376 )
1377 self._player_cache[player_id] = func
1378 func = self._player_cache[player_id]
1379 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1380 self._print_sig_code(func, s)
c8bf86d5
PH
1381 return func(s)
1382 except Exception as e:
1383 tb = traceback.format_exc()
1384 raise ExtractorError(
78caa52a 1385 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1386
545cc85d 1387 def _mark_watched(self, video_id, player_response):
21c340b8
S
1388 playback_url = url_or_none(try_get(
1389 player_response,
545cc85d 1390 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1391 if not playback_url:
1392 return
1393 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1394 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1395
1396 # cpn generation algorithm is reverse engineered from base.js.
1397 # In fact it works even with dummy cpn.
1398 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1399 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1400
1401 qs.update({
1402 'ver': ['2'],
1403 'cpn': [cpn],
1404 })
1405 playback_url = compat_urlparse.urlunparse(
15707c7e 1406 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1407
1408 self._download_webpage(
1409 playback_url, video_id, 'Marking watched',
1410 'Unable to mark watched', fatal=False)
1411
66c9fa36
S
1412 @staticmethod
1413 def _extract_urls(webpage):
1414 # Embedded YouTube player
1415 entries = [
1416 unescapeHTML(mobj.group('url'))
1417 for mobj in re.finditer(r'''(?x)
1418 (?:
1419 <iframe[^>]+?src=|
1420 data-video-url=|
1421 <embed[^>]+?src=|
1422 embedSWF\(?:\s*|
1423 <object[^>]+data=|
1424 new\s+SWFObject\(
1425 )
1426 (["\'])
1427 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1428 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1429 \1''', webpage)]
1430
1431 # lazyYT YouTube embed
1432 entries.extend(list(map(
1433 unescapeHTML,
1434 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1435
1436 # Wordpress "YouTube Video Importer" plugin
1437 matches = re.findall(r'''(?x)<div[^>]+
1438 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1439 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1440 entries.extend(m[-1] for m in matches)
1441
1442 return entries
1443
1444 @staticmethod
1445 def _extract_url(webpage):
1446 urls = YoutubeIE._extract_urls(webpage)
1447 return urls[0] if urls else None
1448
97665381
PH
1449 @classmethod
1450 def extract_id(cls, url):
1451 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1452 if mobj is None:
69ea8ca4 1453 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1454 video_id = mobj.group(2)
1455 return video_id
1456
545cc85d 1457 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1458 chapters_list = try_get(
8bdd16b4 1459 data,
84213ea8
S
1460 lambda x: x['playerOverlays']
1461 ['playerOverlayRenderer']
1462 ['decoratedPlayerBarRenderer']
1463 ['decoratedPlayerBarRenderer']
1464 ['playerBar']
1465 ['chapteredPlayerBarRenderer']
1466 ['chapters'],
1467 list)
1468 if not chapters_list:
1469 return
1470
1471 def chapter_time(chapter):
1472 return float_or_none(
1473 try_get(
1474 chapter,
1475 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1476 int),
1477 scale=1000)
1478 chapters = []
1479 for next_num, chapter in enumerate(chapters_list, start=1):
1480 start_time = chapter_time(chapter)
1481 if start_time is None:
1482 continue
1483 end_time = (chapter_time(chapters_list[next_num])
1484 if next_num < len(chapters_list) else duration)
1485 if end_time is None:
1486 continue
1487 title = try_get(
1488 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1489 compat_str)
1490 chapters.append({
1491 'start_time': start_time,
1492 'end_time': end_time,
1493 'title': title,
1494 })
1495 return chapters
1496
545cc85d 1497 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1498 return self._parse_json(self._search_regex(
1499 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1500 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1501
a1c5d2ca
M
1502 @staticmethod
1503 def _join_text_entries(runs):
1504 text = None
1505 for run in runs:
1506 if not isinstance(run, dict):
1507 continue
1508 sub_text = try_get(run, lambda x: x['text'], compat_str)
1509 if sub_text:
1510 if not text:
1511 text = sub_text
1512 continue
1513 text += sub_text
1514 return text
1515
1516 def _extract_comment(self, comment_renderer, parent=None):
1517 comment_id = comment_renderer.get('commentId')
1518 if not comment_id:
1519 return
1520 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1521 text = self._join_text_entries(comment_text_runs) or ''
1522 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1523 time_text = self._join_text_entries(comment_time_text)
1524
1525 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1526 author_id = try_get(comment_renderer,
1527 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1528 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1529 lambda x: x['likeCount']), compat_str)) or 0
1530 author_thumbnail = try_get(comment_renderer,
1531 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1532
1533 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1534 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
1535
1536 return {
1537 'id': comment_id,
1538 'text': text,
1539 # TODO: This should be parsed to timestamp
1540 'time_text': time_text,
1541 'like_count': votes,
1542 'is_favorited': is_liked,
1543 'author': author,
1544 'author_id': author_id,
1545 'author_thumbnail': author_thumbnail,
1546 'author_is_uploader': author_is_uploader,
1547 'parent': parent or 'root'
1548 }
1549
1550 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
1551 session_token_list, parent=None, comment_counts=None):
1552
1553 def extract_thread(parent_renderer):
1554 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1555 if not parent:
1556 comment_counts[2] = 0
1557 for content in contents:
1558 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1559 comment_renderer = try_get(
1560 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1561 content, (lambda x: x['commentRenderer'], dict))
1562
1563 if not comment_renderer:
1564 continue
1565 comment = self._extract_comment(comment_renderer, parent)
1566 if not comment:
1567 continue
1568 comment_counts[0] += 1
1569 yield comment
1570 # Attempt to get the replies
1571 comment_replies_renderer = try_get(
1572 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1573
1574 if comment_replies_renderer:
1575 comment_counts[2] += 1
1576 comment_entries_iter = self._comment_entries(
1577 comment_replies_renderer, identity_token, account_syncid,
1578 parent=comment.get('id'), session_token_list=session_token_list,
1579 comment_counts=comment_counts)
1580
1581 for reply_comment in comment_entries_iter:
1582 yield reply_comment
1583
1584 if not comment_counts:
1585 # comment so far, est. total comments, current comment thread #
1586 comment_counts = [0, 0, 0]
1587 headers = self._DEFAULT_BASIC_API_HEADERS.copy()
1588
1589 # TODO: Generalize the download code with TabIE
1590 if identity_token:
1591 headers['x-youtube-identity-token'] = identity_token
1592
1593 if account_syncid:
1594 headers['X-Goog-PageId'] = account_syncid
1595 headers['X-Goog-AuthUser'] = 0
1596
1597 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1598 first_continuation = False
1599 if parent is None:
1600 first_continuation = True
1601
1602 for page_num in itertools.count(0):
1603 if not continuation:
1604 break
1605 retries = self._downloader.params.get('extractor_retries', 3)
1606 count = -1
1607 last_error = None
1608
1609 while count < retries:
1610 count += 1
1611 if last_error:
1612 self.report_warning('%s. Retrying ...' % last_error)
1613 try:
1614 query = {
1615 'ctoken': continuation['ctoken'],
1616 'pbj': 1,
1617 'type': 'next',
1618 }
1619 if parent:
1620 query['action_get_comment_replies'] = 1
1621 else:
1622 query['action_get_comments'] = 1
1623
1624 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1625 if page_num == 0:
1626 if first_continuation:
1627 note_prefix = "Downloading initial comment continuation page"
1628 else:
1629 note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
1630 else:
1631 note_prefix = "%sDownloading comment%s page %d %s" % (
1632 " " if parent else "",
1633 ' replies' if parent else '',
1634 page_num,
1635 comment_prog_str)
1636
1637 browse = self._download_json(
1638 'https://www.youtube.com/comment_service_ajax', None,
1639 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1640 headers=headers, query=query,
1641 data=urlencode_postdata({
1642 'session_token': session_token_list[0]
1643 }))
1644 except ExtractorError as e:
1645 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1646 if e.cause.code == 413:
1647 self.report_warning("Assumed end of comments (received HTTP Error 413)")
1648 return
1649 # Downloading page may result in intermittent 5xx HTTP error
1650 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1651 last_error = 'HTTP Error %s' % e.cause.code
1652 if e.cause.code == 404:
1653 last_error = last_error + " (this API is probably deprecated)"
1654 if count < retries:
1655 continue
1656 raise
1657 else:
1658 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1659 if session_token:
1660 session_token_list[0] = session_token
1661
1662 response = try_get(browse,
1663 (lambda x: x['response'],
1664 lambda x: x[1]['response'])) or {}
1665
1666 if response.get('continuationContents'):
1667 break
1668
1669 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1670 if browse.get('reload'):
1671 raise ExtractorError("Invalid or missing params in continuation request", expected=False)
1672
1673 # TODO: not tested, merged from old extractor
1674 err_msg = browse.get('externalErrorMessage')
1675 if err_msg:
1676 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1677
1678 # Youtube sometimes sends incomplete data
1679 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1680 last_error = 'Incomplete data received'
1681 if count >= retries:
1682 self._downloader.report_error(last_error)
1683
1684 if not response:
1685 break
1686
1687 known_continuation_renderers = {
1688 'itemSectionContinuation': extract_thread,
1689 'commentRepliesContinuation': extract_thread
1690 }
1691
1692 # extract next root continuation from the results
1693 continuation_contents = try_get(
1694 response, lambda x: x['continuationContents'], dict) or {}
1695
1696 for key, value in continuation_contents.items():
1697 if key not in known_continuation_renderers:
1698 continue
1699 continuation_renderer = value
1700
1701 if first_continuation:
1702 first_continuation = False
1703 expected_comment_count = try_get(
1704 continuation_renderer,
1705 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1706 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1707 compat_str)
1708
1709 if expected_comment_count:
1710 comment_counts[1] = str_to_int(expected_comment_count)
1711 self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
1712 yield comment_counts[1]
1713
1714 # TODO: cli arg.
1715 # 1/True for newest, 0/False for popular (default)
1716 comment_sort_index = int(True)
1717 sort_continuation_renderer = try_get(
1718 continuation_renderer,
1719 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1720 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1721 # If this fails, the initial continuation page
1722 # starts off with popular anyways.
1723 if sort_continuation_renderer:
1724 continuation = YoutubeTabIE._build_continuation_query(
1725 continuation=sort_continuation_renderer.get('continuation'),
1726 ctp=sort_continuation_renderer.get('clickTrackingParams'))
1727 self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
1728 break
1729
1730 for entry in known_continuation_renderers[key](continuation_renderer):
1731 yield entry
1732
1733 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1734 break
1735
1736 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1737 """Entry for comment extraction"""
1738 comments = []
1739 known_entry_comment_renderers = (
1740 'itemSectionRenderer',
1741 )
1742 estimated_total = 0
1743 for entry in contents:
1744 for key, renderer in entry.items():
1745 if key not in known_entry_comment_renderers:
1746 continue
1747
1748 comment_iter = self._comment_entries(
1749 renderer,
1750 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1751 account_syncid=self._extract_account_syncid(ytcfg),
1752 session_token_list=[xsrf_token])
1753
1754 for comment in comment_iter:
1755 if isinstance(comment, int):
1756 estimated_total = comment
1757 continue
1758 comments.append(comment)
1759 break
1760 self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
1761 return {
1762 'comments': comments,
1763 'comment_count': len(comments),
1764 }
1765
c5e8d7af 1766 def _real_extract(self, url):
cf7e015f 1767 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1768 video_id = self._match_id(url)
1769 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1770 webpage_url = base_url + 'watch?v=' + video_id
1771 webpage = self._download_webpage(
cce889b9 1772 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1773
1774 player_response = None
1775 if webpage:
1776 player_response = self._extract_yt_initial_variable(
1777 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1778 video_id, 'initial player response')
1779 if not player_response:
1780 player_response = self._call_api(
1781 'player', {'videoId': video_id}, video_id)
1782
1783 playability_status = player_response.get('playabilityStatus') or {}
1784 if playability_status.get('reason') == 'Sign in to confirm your age':
1785 pr = self._parse_json(try_get(compat_parse_qs(
1786 self._download_webpage(
1787 base_url + 'get_video_info', video_id,
1788 'Refetching age-gated info webpage',
1789 'unable to download video info webpage', query={
1790 'video_id': video_id,
7c60c33e 1791 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1792 }, fatal=False)),
1793 lambda x: x['player_response'][0],
1794 compat_str) or '{}', video_id)
1795 if pr:
1796 player_response = pr
1797
1798 trailer_video_id = try_get(
1799 playability_status,
1800 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1801 compat_str)
1802 if trailer_video_id:
1803 return self.url_result(
1804 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1805
545cc85d 1806 def get_text(x):
1807 if not x:
c2d125d9 1808 return
545cc85d 1809 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1810
545cc85d 1811 search_meta = (
1812 lambda x: self._html_search_meta(x, webpage, default=None)) \
1813 if webpage else lambda x: None
dbdaaa23 1814
545cc85d 1815 video_details = player_response.get('videoDetails') or {}
37357d21 1816 microformat = try_get(
545cc85d 1817 player_response,
1818 lambda x: x['microformat']['playerMicroformatRenderer'],
1819 dict) or {}
1820 video_title = video_details.get('title') \
1821 or get_text(microformat.get('title')) \
1822 or search_meta(['og:title', 'twitter:title', 'title'])
1823 video_description = video_details.get('shortDescription')
cf7e015f 1824
8fe10494 1825 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1826 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1827 multifeed_metadata_list = try_get(
1828 player_response,
1829 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1830 compat_str)
8fe10494
S
1831 if multifeed_metadata_list:
1832 entries = []
1833 feed_ids = []
1834 for feed in multifeed_metadata_list.split(','):
1835 # Unquote should take place before split on comma (,) since textual
1836 # fields may contain comma as well (see
067aa17e 1837 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1838 feed_data = compat_parse_qs(
1839 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1840
1841 def feed_entry(name):
545cc85d 1842 return try_get(
1843 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1844
1845 feed_id = feed_entry('id')
1846 if not feed_id:
1847 continue
1848 feed_title = feed_entry('title')
1849 title = video_title
1850 if feed_title:
1851 title += ' (%s)' % feed_title
8fe10494
S
1852 entries.append({
1853 '_type': 'url_transparent',
1854 'ie_key': 'Youtube',
1855 'url': smuggle_url(
545cc85d 1856 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1857 {'force_singlefeed': True}),
6b09401b 1858 'title': title,
8fe10494 1859 })
6b09401b 1860 feed_ids.append(feed_id)
8fe10494
S
1861 self.to_screen(
1862 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1863 % (', '.join(feed_ids), video_id))
545cc85d 1864 return self.playlist_result(
1865 entries, video_id, video_title, video_description)
8fe10494
S
1866 else:
1867 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1868
545cc85d 1869 formats = []
1870 itags = []
cc2db878 1871 itag_qualities = {}
545cc85d 1872 player_url = None
dca3ff4a 1873 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1874 streaming_data = player_response.get('streamingData') or {}
1875 streaming_formats = streaming_data.get('formats') or []
1876 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1877 for fmt in streaming_formats:
1878 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1879 continue
321bf820 1880
cc2db878 1881 itag = str_or_none(fmt.get('itag'))
1882 quality = fmt.get('quality')
1883 if itag and quality:
1884 itag_qualities[itag] = quality
1885 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1886 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1887 # number of fragment that would subsequently requested with (`&sq=N`)
1888 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1889 continue
1890
545cc85d 1891 fmt_url = fmt.get('url')
1892 if not fmt_url:
1893 sc = compat_parse_qs(fmt.get('signatureCipher'))
1894 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1895 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1896 if not (sc and fmt_url and encrypted_sig):
1897 continue
1898 if not player_url:
1899 if not webpage:
1900 continue
1901 player_url = self._search_regex(
1902 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1903 webpage, 'player URL', fatal=False)
1904 if not player_url:
201e9eaa 1905 continue
545cc85d 1906 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1907 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1908 fmt_url += '&' + sp + '=' + signature
1909
545cc85d 1910 if itag:
1911 itags.append(itag)
cc2db878 1912 tbr = float_or_none(
1913 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1914 dct = {
1915 'asr': int_or_none(fmt.get('audioSampleRate')),
1916 'filesize': int_or_none(fmt.get('contentLength')),
1917 'format_id': itag,
1918 'format_note': fmt.get('qualityLabel') or quality,
1919 'fps': int_or_none(fmt.get('fps')),
1920 'height': int_or_none(fmt.get('height')),
dca3ff4a 1921 'quality': q(quality),
cc2db878 1922 'tbr': tbr,
545cc85d 1923 'url': fmt_url,
1924 'width': fmt.get('width'),
1925 }
1926 mimetype = fmt.get('mimeType')
1927 if mimetype:
1928 mobj = re.match(
1929 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1930 if mobj:
1931 dct['ext'] = mimetype2ext(mobj.group(1))
1932 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1933 no_audio = dct.get('acodec') == 'none'
1934 no_video = dct.get('vcodec') == 'none'
1935 if no_audio:
1936 dct['vbr'] = tbr
1937 if no_video:
1938 dct['abr'] = tbr
1939 if no_audio or no_video:
545cc85d 1940 dct['downloader_options'] = {
1941 # Youtube throttles chunks >~10M
1942 'http_chunk_size': 10485760,
bf1317d2 1943 }
7c60c33e 1944 if dct.get('ext'):
1945 dct['container'] = dct['ext'] + '_dash'
545cc85d 1946 formats.append(dct)
1947
1948 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1949 if hls_manifest_url:
1950 for f in self._extract_m3u8_formats(
1951 hls_manifest_url, video_id, 'mp4', fatal=False):
1952 itag = self._search_regex(
1953 r'/itag/(\d+)', f['url'], 'itag', default=None)
1954 if itag:
1955 f['format_id'] = itag
1956 formats.append(f)
1957
1418a043 1958 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 1959 dash_manifest_url = streaming_data.get('dashManifestUrl')
1960 if dash_manifest_url:
545cc85d 1961 for f in self._extract_mpd_formats(
1962 dash_manifest_url, video_id, fatal=False):
cc2db878 1963 itag = f['format_id']
1964 if itag in itags:
1965 continue
dca3ff4a 1966 if itag in itag_qualities:
1967 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1968 # but kept to maintain feature parity (and code similarity) with youtube-dl
1969 # Remove if this causes any issues with sorting in future
1970 f['quality'] = q(itag_qualities[itag])
545cc85d 1971 filesize = int_or_none(self._search_regex(
1972 r'/clen/(\d+)', f.get('fragment_base_url')
1973 or f['url'], 'file size', default=None))
1974 if filesize:
1975 f['filesize'] = filesize
cc2db878 1976 formats.append(f)
bf1317d2 1977
545cc85d 1978 if not formats:
63ad4d43 1979 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1980 raise ExtractorError(
1981 'This video is DRM protected.', expected=True)
1982 pemr = try_get(
1983 playability_status,
1984 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1985 dict) or {}
1986 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1987 subreason = pemr.get('subreason')
1988 if subreason:
1989 subreason = clean_html(get_text(subreason))
1990 if subreason == 'The uploader has not made this video available in your country.':
1991 countries = microformat.get('availableCountries')
1992 if not countries:
1993 regions_allowed = search_meta('regionsAllowed')
1994 countries = regions_allowed.split(',') if regions_allowed else None
1995 self.raise_geo_restricted(
1996 subreason, countries)
1997 reason += '\n' + subreason
1998 if reason:
1999 raise ExtractorError(reason, expected=True)
bf1317d2 2000
545cc85d 2001 self._sort_formats(formats)
bf1317d2 2002
545cc85d 2003 keywords = video_details.get('keywords') or []
2004 if not keywords and webpage:
2005 keywords = [
2006 unescapeHTML(m.group('content'))
2007 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2008 for keyword in keywords:
2009 if keyword.startswith('yt:stretch='):
2010 w, h = keyword.split('=')[1].split(':')
2011 w, h = int(w), int(h)
2012 if w > 0 and h > 0:
2013 ratio = w / h
2014 for f in formats:
2015 if f.get('vcodec') != 'none':
2016 f['stretched_ratio'] = ratio
6449cd80 2017
545cc85d 2018 thumbnails = []
2019 for container in (video_details, microformat):
2020 for thumbnail in (try_get(
2021 container,
2022 lambda x: x['thumbnail']['thumbnails'], list) or []):
2023 thumbnail_url = thumbnail.get('url')
2024 if not thumbnail_url:
bf1317d2 2025 continue
545cc85d 2026 thumbnails.append({
2027 'height': int_or_none(thumbnail.get('height')),
2028 'url': thumbnail_url,
2029 'width': int_or_none(thumbnail.get('width')),
2030 })
2031 if thumbnails:
2032 break
a6211d23 2033 else:
545cc85d 2034 thumbnail = search_meta(['og:image', 'twitter:image'])
2035 if thumbnail:
2036 thumbnails = [{'url': thumbnail}]
2037
2038 category = microformat.get('category') or search_meta('genre')
2039 channel_id = video_details.get('channelId') \
2040 or microformat.get('externalChannelId') \
2041 or search_meta('channelId')
2042 duration = int_or_none(
2043 video_details.get('lengthSeconds')
2044 or microformat.get('lengthSeconds')) \
2045 or parse_duration(search_meta('duration'))
2046 is_live = video_details.get('isLive')
2047 owner_profile_url = microformat.get('ownerProfileUrl')
2048
2049 info = {
2050 'id': video_id,
2051 'title': self._live_title(video_title) if is_live else video_title,
2052 'formats': formats,
2053 'thumbnails': thumbnails,
2054 'description': video_description,
2055 'upload_date': unified_strdate(
2056 microformat.get('uploadDate')
2057 or search_meta('uploadDate')),
2058 'uploader': video_details['author'],
2059 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2060 'uploader_url': owner_profile_url,
2061 'channel_id': channel_id,
2062 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2063 'duration': duration,
2064 'view_count': int_or_none(
2065 video_details.get('viewCount')
2066 or microformat.get('viewCount')
2067 or search_meta('interactionCount')),
2068 'average_rating': float_or_none(video_details.get('averageRating')),
2069 'age_limit': 18 if (
2070 microformat.get('isFamilySafe') is False
2071 or search_meta('isFamilyFriendly') == 'false'
2072 or search_meta('og:restrictions:age') == '18+') else 0,
2073 'webpage_url': webpage_url,
2074 'categories': [category] if category else None,
2075 'tags': keywords,
2076 'is_live': is_live,
2077 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2078 'was_live': video_details.get('isLiveContent'),
545cc85d 2079 }
b477fc13 2080
545cc85d 2081 pctr = try_get(
2082 player_response,
2083 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2084 subtitles = {}
2085 if pctr:
2086 def process_language(container, base_url, lang_code, query):
2087 lang_subs = []
2088 for fmt in self._SUBTITLE_FORMATS:
2089 query.update({
2090 'fmt': fmt,
2091 })
2092 lang_subs.append({
2093 'ext': fmt,
2094 'url': update_url_query(base_url, query),
2095 })
2096 container[lang_code] = lang_subs
7e72694b 2097
545cc85d 2098 for caption_track in (pctr.get('captionTracks') or []):
2099 base_url = caption_track.get('baseUrl')
2100 if not base_url:
2101 continue
2102 if caption_track.get('kind') != 'asr':
2103 lang_code = caption_track.get('languageCode')
2104 if not lang_code:
2105 continue
2106 process_language(
2107 subtitles, base_url, lang_code, {})
2108 continue
2109 automatic_captions = {}
2110 for translation_language in (pctr.get('translationLanguages') or []):
2111 translation_language_code = translation_language.get('languageCode')
2112 if not translation_language_code:
2113 continue
2114 process_language(
2115 automatic_captions, base_url, translation_language_code,
2116 {'tlang': translation_language_code})
2117 info['automatic_captions'] = automatic_captions
2118 info['subtitles'] = subtitles
7e72694b 2119
545cc85d 2120 parsed_url = compat_urllib_parse_urlparse(url)
2121 for component in [parsed_url.fragment, parsed_url.query]:
2122 query = compat_parse_qs(component)
2123 for k, v in query.items():
2124 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2125 d_k += '_time'
2126 if d_k not in info and k in s_ks:
2127 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2128
2129 # Youtube Music Auto-generated description
822b9d9c 2130 if video_description:
38d70284 2131 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2132 if mobj:
822b9d9c
RA
2133 release_year = mobj.group('release_year')
2134 release_date = mobj.group('release_date')
2135 if release_date:
2136 release_date = release_date.replace('-', '')
2137 if not release_year:
545cc85d 2138 release_year = release_date[:4]
2139 info.update({
2140 'album': mobj.group('album'.strip()),
2141 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2142 'track': mobj.group('track').strip(),
2143 'release_date': release_date,
cc2db878 2144 'release_year': int_or_none(release_year),
545cc85d 2145 })
7e72694b 2146
545cc85d 2147 initial_data = None
2148 if webpage:
2149 initial_data = self._extract_yt_initial_variable(
2150 webpage, self._YT_INITIAL_DATA_RE, video_id,
2151 'yt initial data')
2152 if not initial_data:
2153 initial_data = self._call_api(
2154 'next', {'videoId': video_id}, video_id, fatal=False)
2155
2156 if not is_live:
2157 try:
2158 # This will error if there is no livechat
2159 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2160 info['subtitles']['live_chat'] = [{
394dcd44 2161 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2162 'video_id': video_id,
2163 'ext': 'json',
2164 'protocol': 'youtube_live_chat_replay',
2165 }]
2166 except (KeyError, IndexError, TypeError):
2167 pass
2168
2169 if initial_data:
2170 chapters = self._extract_chapters_from_json(
2171 initial_data, video_id, duration)
2172 if not chapters:
2173 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2174 contents = try_get(
2175 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2176 list)
2177 if not contents:
2178 continue
2179
2180 def chapter_time(mmlir):
2181 return parse_duration(
2182 get_text(mmlir.get('timeDescription')))
2183
2184 chapters = []
2185 for next_num, content in enumerate(contents, start=1):
2186 mmlir = content.get('macroMarkersListItemRenderer') or {}
2187 start_time = chapter_time(mmlir)
2188 end_time = chapter_time(try_get(
2189 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2190 if next_num < len(contents) else duration
2191 if start_time is None or end_time is None:
2192 continue
2193 chapters.append({
2194 'start_time': start_time,
2195 'end_time': end_time,
2196 'title': get_text(mmlir.get('title')),
2197 })
2198 if chapters:
2199 break
2200 if chapters:
2201 info['chapters'] = chapters
2202
2203 contents = try_get(
2204 initial_data,
2205 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2206 list) or []
2207 for content in contents:
2208 vpir = content.get('videoPrimaryInfoRenderer')
2209 if vpir:
2210 stl = vpir.get('superTitleLink')
2211 if stl:
2212 stl = get_text(stl)
2213 if try_get(
2214 vpir,
2215 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2216 info['location'] = stl
2217 else:
2218 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2219 if mobj:
2220 info.update({
2221 'series': mobj.group(1),
2222 'season_number': int(mobj.group(2)),
2223 'episode_number': int(mobj.group(3)),
2224 })
2225 for tlb in (try_get(
2226 vpir,
2227 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2228 list) or []):
2229 tbr = tlb.get('toggleButtonRenderer') or {}
2230 for getter, regex in [(
2231 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2232 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2233 lambda x: x['accessibility'],
2234 lambda x: x['accessibilityData']['accessibilityData'],
2235 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2236 label = (try_get(tbr, getter, dict) or {}).get('label')
2237 if label:
2238 mobj = re.match(regex, label)
2239 if mobj:
2240 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2241 break
2242 sbr_tooltip = try_get(
2243 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2244 if sbr_tooltip:
2245 like_count, dislike_count = sbr_tooltip.split(' / ')
2246 info.update({
2247 'like_count': str_to_int(like_count),
2248 'dislike_count': str_to_int(dislike_count),
2249 })
2250 vsir = content.get('videoSecondaryInfoRenderer')
2251 if vsir:
2252 info['channel'] = get_text(try_get(
2253 vsir,
2254 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2255 dict))
545cc85d 2256 rows = try_get(
2257 vsir,
2258 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2259 list) or []
2260 multiple_songs = False
2261 for row in rows:
2262 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2263 multiple_songs = True
2264 break
2265 for row in rows:
2266 mrr = row.get('metadataRowRenderer') or {}
2267 mrr_title = mrr.get('title')
2268 if not mrr_title:
2269 continue
2270 mrr_title = get_text(mrr['title'])
2271 mrr_contents_text = get_text(mrr['contents'][0])
2272 if mrr_title == 'License':
2273 info['license'] = mrr_contents_text
2274 elif not multiple_songs:
2275 if mrr_title == 'Album':
2276 info['album'] = mrr_contents_text
2277 elif mrr_title == 'Artist':
2278 info['artist'] = mrr_contents_text
2279 elif mrr_title == 'Song':
2280 info['track'] = mrr_contents_text
2281
2282 fallbacks = {
2283 'channel': 'uploader',
2284 'channel_id': 'uploader_id',
2285 'channel_url': 'uploader_url',
2286 }
2287 for to, frm in fallbacks.items():
2288 if not info.get(to):
2289 info[to] = info.get(frm)
2290
2291 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2292 v = info.get(s_k)
2293 if v:
2294 info[d_k] = v
b84071c0 2295
c224251a
M
2296 is_private = bool_or_none(video_details.get('isPrivate'))
2297 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2298 is_membersonly = None
b28f8d24 2299 is_premium = None
c224251a
M
2300 if initial_data and is_private is not None:
2301 is_membersonly = False
b28f8d24 2302 is_premium = False
c224251a
M
2303 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2304 for content in contents or []:
2305 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2306 for badge in badges or []:
2307 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2308 if label.lower() == 'members only':
2309 is_membersonly = True
2310 break
b28f8d24
M
2311 elif label.lower() == 'premium':
2312 is_premium = True
2313 break
2314 if is_membersonly or is_premium:
c224251a
M
2315 break
2316
2317 # TODO: Add this for playlists
2318 info['availability'] = self._availability(
2319 is_private=is_private,
b28f8d24 2320 needs_premium=is_premium,
c224251a
M
2321 needs_subscription=is_membersonly,
2322 needs_auth=info['age_limit'] >= 18,
2323 is_unlisted=None if is_private is None else is_unlisted)
2324
06167fbb 2325 # get xsrf for annotations or comments
2326 get_annotations = self._downloader.params.get('writeannotations', False)
2327 get_comments = self._downloader.params.get('getcomments', False)
2328 if get_annotations or get_comments:
29f7c58a 2329 xsrf_token = None
545cc85d 2330 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2331 if ytcfg:
2332 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2333 if not xsrf_token:
2334 xsrf_token = self._search_regex(
2335 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2336 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2337
2338 # annotations
06167fbb 2339 if get_annotations:
64b6a4e9
RA
2340 invideo_url = try_get(
2341 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2342 if xsrf_token and invideo_url:
29f7c58a 2343 xsrf_field_name = None
2344 if ytcfg:
2345 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2346 if not xsrf_field_name:
2347 xsrf_field_name = self._search_regex(
2348 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2349 webpage, 'xsrf field name',
29f7c58a 2350 group='xsrf_field_name', default='session_token')
8a784c74 2351 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2352 self._proto_relative_url(invideo_url),
2353 video_id, note='Downloading annotations',
2354 errnote='Unable to download video annotations', fatal=False,
2355 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2356
277d6ff5 2357 if get_comments:
a1c5d2ca 2358 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2359
545cc85d 2360 self.mark_watched(video_id, player_response)
d77ab8e2 2361
545cc85d 2362 return info
c5e8d7af 2363
5f6a1245 2364
8bdd16b4 2365class YoutubeTabIE(YoutubeBaseInfoExtractor):
2366 IE_DESC = 'YouTube.com tab'
70d5c17b 2367 _VALID_URL = r'''(?x)
2368 https?://
2369 (?:\w+\.)?
2370 (?:
2371 youtube(?:kids)?\.com|
2372 invidio\.us
2373 )/
2374 (?:
2375 (?:channel|c|user)/|
2376 (?P<not_channel>
9ba5705a 2377 feed/|hashtag/|
70d5c17b 2378 (?:playlist|watch)\?.*?\blist=
2379 )|
29f7c58a 2380 (?!(?:%s)\b) # Direct URLs
70d5c17b 2381 )
2382 (?P<id>[^/?\#&]+)
2383 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2384 IE_NAME = 'youtube:tab'
2385
81127aa5 2386 _TESTS = [{
8bdd16b4 2387 # playlists, multipage
2388 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2389 'playlist_mincount': 94,
2390 'info_dict': {
2391 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2392 'title': 'Игорь Клейнер - Playlists',
2393 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2394 'uploader': 'Игорь Клейнер',
2395 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2396 },
2397 }, {
2398 # playlists, multipage, different order
2399 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2400 'playlist_mincount': 94,
2401 'info_dict': {
2402 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2403 'title': 'Игорь Клейнер - Playlists',
2404 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2405 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2406 'uploader': 'Игорь Клейнер',
8bdd16b4 2407 },
2408 }, {
2409 # playlists, singlepage
2410 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2411 'playlist_mincount': 4,
2412 'info_dict': {
2413 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2414 'title': 'ThirstForScience - Playlists',
2415 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2416 'uploader': 'ThirstForScience',
2417 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2418 }
2419 }, {
2420 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2421 'only_matching': True,
2422 }, {
2423 # basic, single video playlist
0e30a7b9 2424 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2425 'info_dict': {
0e30a7b9 2426 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2427 'uploader': 'Sergey M.',
2428 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2429 'title': 'youtube-dl public playlist',
81127aa5 2430 },
0e30a7b9 2431 'playlist_count': 1,
9291475f 2432 }, {
8bdd16b4 2433 # empty playlist
0e30a7b9 2434 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2435 'info_dict': {
0e30a7b9 2436 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2437 'uploader': 'Sergey M.',
2438 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2439 'title': 'youtube-dl empty playlist',
9291475f
PH
2440 },
2441 'playlist_count': 0,
2442 }, {
8bdd16b4 2443 # Home tab
2444 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2445 'info_dict': {
8bdd16b4 2446 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2447 'title': 'lex will - Home',
2448 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2449 'uploader': 'lex will',
2450 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2451 },
8bdd16b4 2452 'playlist_mincount': 2,
9291475f 2453 }, {
8bdd16b4 2454 # Videos tab
2455 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2456 'info_dict': {
8bdd16b4 2457 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2458 'title': 'lex will - Videos',
2459 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2460 'uploader': 'lex will',
2461 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2462 },
8bdd16b4 2463 'playlist_mincount': 975,
9291475f 2464 }, {
8bdd16b4 2465 # Videos tab, sorted by popular
2466 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2467 'info_dict': {
8bdd16b4 2468 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2469 'title': 'lex will - Videos',
2470 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2471 'uploader': 'lex will',
2472 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2473 },
8bdd16b4 2474 'playlist_mincount': 199,
9291475f 2475 }, {
8bdd16b4 2476 # Playlists tab
2477 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2478 'info_dict': {
8bdd16b4 2479 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2480 'title': 'lex will - Playlists',
2481 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2482 'uploader': 'lex will',
2483 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2484 },
8bdd16b4 2485 'playlist_mincount': 17,
ac7553d0 2486 }, {
8bdd16b4 2487 # Community tab
2488 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2489 'info_dict': {
8bdd16b4 2490 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2491 'title': 'lex will - Community',
2492 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2493 'uploader': 'lex will',
2494 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2495 },
2496 'playlist_mincount': 18,
87dadd45 2497 }, {
8bdd16b4 2498 # Channels tab
2499 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2500 'info_dict': {
8bdd16b4 2501 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2502 'title': 'lex will - Channels',
2503 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2504 'uploader': 'lex will',
2505 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2506 },
deaec5af 2507 'playlist_mincount': 12,
6b08cdf6 2508 }, {
a0566bbf 2509 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2510 'only_matching': True,
2511 }, {
a0566bbf 2512 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2513 'only_matching': True,
2514 }, {
a0566bbf 2515 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2516 'only_matching': True,
2517 }, {
2518 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2519 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2520 'info_dict': {
2521 'title': '29C3: Not my department',
2522 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2523 'uploader': 'Christiaan008',
2524 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2525 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2526 },
2527 'playlist_count': 96,
2528 }, {
2529 'note': 'Large playlist',
2530 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2531 'info_dict': {
8bdd16b4 2532 'title': 'Uploads from Cauchemar',
2533 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2534 'uploader': 'Cauchemar',
2535 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2536 },
8bdd16b4 2537 'playlist_mincount': 1123,
2538 }, {
2539 # even larger playlist, 8832 videos
2540 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2541 'only_matching': True,
4b7df0d3
JMF
2542 }, {
2543 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2544 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2545 'info_dict': {
acf757f4
PH
2546 'title': 'Uploads from Interstellar Movie',
2547 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2548 'uploader': 'Interstellar Movie',
8bdd16b4 2549 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2550 },
481cc733 2551 'playlist_mincount': 21,
8bdd16b4 2552 }, {
2553 # https://github.com/ytdl-org/youtube-dl/issues/21844
2554 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2555 'info_dict': {
2556 'title': 'Data Analysis with Dr Mike Pound',
2557 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2558 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2559 'uploader': 'Computerphile',
deaec5af 2560 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2561 },
2562 'playlist_mincount': 11,
2563 }, {
a0566bbf 2564 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2565 'only_matching': True,
dacb3a86
S
2566 }, {
2567 # Playlist URL that does not actually serve a playlist
2568 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2569 'info_dict': {
2570 'id': 'FqZTN594JQw',
2571 'ext': 'webm',
2572 'title': "Smiley's People 01 detective, Adventure Series, Action",
2573 'uploader': 'STREEM',
2574 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2575 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2576 'upload_date': '20150526',
2577 'license': 'Standard YouTube License',
2578 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2579 'categories': ['People & Blogs'],
2580 'tags': list,
dbdaaa23 2581 'view_count': int,
dacb3a86
S
2582 'like_count': int,
2583 'dislike_count': int,
2584 },
2585 'params': {
2586 'skip_download': True,
2587 },
13a75688 2588 'skip': 'This video is not available.',
dacb3a86 2589 'add_ie': [YoutubeIE.ie_key()],
481cc733 2590 }, {
8bdd16b4 2591 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2592 'only_matching': True,
66b48727 2593 }, {
8bdd16b4 2594 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2595 'only_matching': True,
a0566bbf 2596 }, {
2597 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2598 'info_dict': {
2599 'id': '9Auq9mYxFEE',
2600 'ext': 'mp4',
deaec5af 2601 'title': compat_str,
a0566bbf 2602 'uploader': 'Sky News',
2603 'uploader_id': 'skynews',
2604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2605 'upload_date': '20191102',
deaec5af 2606 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2607 'categories': ['News & Politics'],
2608 'tags': list,
2609 'like_count': int,
2610 'dislike_count': int,
2611 },
2612 'params': {
2613 'skip_download': True,
2614 },
2615 }, {
2616 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2617 'info_dict': {
2618 'id': 'a48o2S1cPoo',
2619 'ext': 'mp4',
2620 'title': 'The Young Turks - Live Main Show',
2621 'uploader': 'The Young Turks',
2622 'uploader_id': 'TheYoungTurks',
2623 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2624 'upload_date': '20150715',
2625 'license': 'Standard YouTube License',
2626 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2627 'categories': ['News & Politics'],
2628 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2629 'like_count': int,
2630 'dislike_count': int,
2631 },
2632 'params': {
2633 'skip_download': True,
2634 },
2635 'only_matching': True,
2636 }, {
2637 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2638 'only_matching': True,
2639 }, {
2640 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2641 'only_matching': True,
3d3dddc9 2642 }, {
2643 'url': 'https://www.youtube.com/feed/trending',
2644 'only_matching': True,
2645 }, {
2646 # needs auth
2647 'url': 'https://www.youtube.com/feed/library',
2648 'only_matching': True,
2649 }, {
2650 # needs auth
2651 'url': 'https://www.youtube.com/feed/history',
2652 'only_matching': True,
2653 }, {
2654 # needs auth
2655 'url': 'https://www.youtube.com/feed/subscriptions',
2656 'only_matching': True,
2657 }, {
2658 # needs auth
2659 'url': 'https://www.youtube.com/feed/watch_later',
2660 'only_matching': True,
2661 }, {
2662 # no longer available?
2663 'url': 'https://www.youtube.com/feed/recommended',
2664 'only_matching': True,
29f7c58a 2665 }, {
2666 # inline playlist with not always working continuations
2667 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2668 'only_matching': True,
2669 }, {
2670 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2671 'only_matching': True,
2672 }, {
2673 'url': 'https://www.youtube.com/course',
2674 'only_matching': True,
2675 }, {
2676 'url': 'https://www.youtube.com/zsecurity',
2677 'only_matching': True,
2678 }, {
2679 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2680 'only_matching': True,
2681 }, {
2682 'url': 'https://www.youtube.com/TheYoungTurks/live',
2683 'only_matching': True,
2684 }]
2685
2686 @classmethod
2687 def suitable(cls, url):
2688 return False if YoutubeIE.suitable(url) else super(
2689 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2690
2691 def _extract_channel_id(self, webpage):
2692 channel_id = self._html_search_meta(
2693 'channelId', webpage, 'channel id', default=None)
2694 if channel_id:
2695 return channel_id
2696 channel_url = self._html_search_meta(
2697 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2698 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2699 'twitter:app:url:googleplay'), webpage, 'channel url')
2700 return self._search_regex(
2701 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2702 channel_url, 'channel id')
15f6397c 2703
8bdd16b4 2704 @staticmethod
cd7c66cf 2705 def _extract_basic_item_renderer(item):
2706 # Modified from _extract_grid_item_renderer
2707 known_renderers = (
e3c07697 2708 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2709 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2710 )
2711 for key, renderer in item.items():
2712 if key not in known_renderers:
2713 continue
2714 return renderer
8bdd16b4 2715
8bdd16b4 2716 def _grid_entries(self, grid_renderer):
2717 for item in grid_renderer['items']:
2718 if not isinstance(item, dict):
39b62db1 2719 continue
cd7c66cf 2720 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2721 if not isinstance(renderer, dict):
2722 continue
2723 title = try_get(
2724 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2725 # playlist
2726 playlist_id = renderer.get('playlistId')
2727 if playlist_id:
2728 yield self.url_result(
2729 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2730 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2731 video_title=title)
2732 # video
2733 video_id = renderer.get('videoId')
2734 if video_id:
2735 yield self._extract_video(renderer)
2736 # channel
2737 channel_id = renderer.get('channelId')
2738 if channel_id:
2739 title = try_get(
2740 renderer, lambda x: x['title']['simpleText'], compat_str)
2741 yield self.url_result(
2742 'https://www.youtube.com/channel/%s' % channel_id,
2743 ie=YoutubeTabIE.ie_key(), video_title=title)
2744
3d3dddc9 2745 def _shelf_entries_from_content(self, shelf_renderer):
2746 content = shelf_renderer.get('content')
2747 if not isinstance(content, dict):
8bdd16b4 2748 return
cd7c66cf 2749 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2750 if renderer:
2751 # TODO: add support for nested playlists so each shelf is processed
2752 # as separate playlist
2753 # TODO: this includes only first N items
2754 for entry in self._grid_entries(renderer):
2755 yield entry
2756 renderer = content.get('horizontalListRenderer')
2757 if renderer:
2758 # TODO
2759 pass
8bdd16b4 2760
29f7c58a 2761 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2762 ep = try_get(
2763 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2764 compat_str)
2765 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2766 if shelf_url:
29f7c58a 2767 # Skipping links to another channels, note that checking for
2768 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2769 # will not work
2770 if skip_channels and '/channels?' in shelf_url:
2771 return
3d3dddc9 2772 title = try_get(
2773 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2774 yield self.url_result(shelf_url, video_title=title)
2775 # Shelf may not contain shelf URL, fallback to extraction from content
2776 for entry in self._shelf_entries_from_content(shelf_renderer):
2777 yield entry
c5e8d7af 2778
8bdd16b4 2779 def _playlist_entries(self, video_list_renderer):
2780 for content in video_list_renderer['contents']:
2781 if not isinstance(content, dict):
2782 continue
2783 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2784 if not isinstance(renderer, dict):
2785 continue
2786 video_id = renderer.get('videoId')
2787 if not video_id:
2788 continue
2789 yield self._extract_video(renderer)
07aeced6 2790
3462ffa8 2791 def _rich_entries(self, rich_grid_renderer):
2792 renderer = try_get(
70d5c17b 2793 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2794 video_id = renderer.get('videoId')
2795 if not video_id:
2796 return
2797 yield self._extract_video(renderer)
2798
8bdd16b4 2799 def _video_entry(self, video_renderer):
2800 video_id = video_renderer.get('videoId')
2801 if video_id:
2802 return self._extract_video(video_renderer)
dacb3a86 2803
8bdd16b4 2804 def _post_thread_entries(self, post_thread_renderer):
2805 post_renderer = try_get(
2806 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2807 if not post_renderer:
2808 return
2809 # video attachment
2810 video_renderer = try_get(
2811 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2812 video_id = None
2813 if video_renderer:
2814 entry = self._video_entry(video_renderer)
2815 if entry:
2816 yield entry
2817 # inline video links
2818 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2819 for run in runs:
2820 if not isinstance(run, dict):
2821 continue
2822 ep_url = try_get(
2823 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2824 if not ep_url:
2825 continue
2826 if not YoutubeIE.suitable(ep_url):
2827 continue
2828 ep_video_id = YoutubeIE._match_id(ep_url)
2829 if video_id == ep_video_id:
2830 continue
2831 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2832
8bdd16b4 2833 def _post_thread_continuation_entries(self, post_thread_continuation):
2834 contents = post_thread_continuation.get('contents')
2835 if not isinstance(contents, list):
2836 return
2837 for content in contents:
2838 renderer = content.get('backstagePostThreadRenderer')
2839 if not isinstance(renderer, dict):
2840 continue
2841 for entry in self._post_thread_entries(renderer):
2842 yield entry
07aeced6 2843
29f7c58a 2844 @staticmethod
2845 def _build_continuation_query(continuation, ctp=None):
2846 query = {
2847 'ctoken': continuation,
2848 'continuation': continuation,
2849 }
2850 if ctp:
2851 query['itct'] = ctp
2852 return query
2853
8bdd16b4 2854 @staticmethod
2855 def _extract_next_continuation_data(renderer):
2856 next_continuation = try_get(
2857 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2858 if not next_continuation:
2859 return
2860 continuation = next_continuation.get('continuation')
2861 if not continuation:
2862 return
2863 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2864 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2865
8bdd16b4 2866 @classmethod
2867 def _extract_continuation(cls, renderer):
2868 next_continuation = cls._extract_next_continuation_data(renderer)
2869 if next_continuation:
2870 return next_continuation
cc2db878 2871 contents = []
2872 for key in ('contents', 'items'):
2873 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2874 for content in contents:
2875 if not isinstance(content, dict):
2876 continue
2877 continuation_ep = try_get(
2878 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2879 dict)
2880 if not continuation_ep:
2881 continue
2882 continuation = try_get(
2883 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2884 if not continuation:
2885 continue
2886 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2887 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2888
d069eca7 2889 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2890
70d5c17b 2891 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2892 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2893 for content in contents:
2894 if not isinstance(content, dict):
8bdd16b4 2895 continue
70d5c17b 2896 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2897 if not is_renderer:
70d5c17b 2898 renderer = content.get('richItemRenderer')
3462ffa8 2899 if renderer:
2900 for entry in self._rich_entries(renderer):
2901 yield entry
2902 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2903 continue
3462ffa8 2904 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2905 for isr_content in isr_contents:
2906 if not isinstance(isr_content, dict):
2907 continue
69184e41 2908
2909 known_renderers = {
2910 'playlistVideoListRenderer': self._playlist_entries,
2911 'gridRenderer': self._grid_entries,
2912 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2913 'backstagePostThreadRenderer': self._post_thread_entries,
2914 'videoRenderer': lambda x: [self._video_entry(x)],
2915 }
2916 for key, renderer in isr_content.items():
2917 if key not in known_renderers:
2918 continue
2919 for entry in known_renderers[key](renderer):
2920 if entry:
2921 yield entry
3462ffa8 2922 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2923 break
70d5c17b 2924
3462ffa8 2925 if not continuation_list[0]:
2926 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2927
2928 if not continuation_list[0]:
2929 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2930
2931 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2932 tab_content = try_get(tab, lambda x: x['content'], dict)
2933 if not tab_content:
2934 return
3462ffa8 2935 parent_renderer = (
29f7c58a 2936 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2937 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2938 for entry in extract_entries(parent_renderer):
2939 yield entry
3462ffa8 2940 continuation = continuation_list[0]
8bdd16b4 2941
2942 headers = {
2943 'x-youtube-client-name': '1',
2944 'x-youtube-client-version': '2.20201112.04.01',
2945 }
2946 if identity_token:
2947 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2948
d069eca7
M
2949 if account_syncid:
2950 headers['X-Goog-PageId'] = account_syncid
2951 headers['X-Goog-AuthUser'] = 0
2952
8bdd16b4 2953 for page_num in itertools.count(1):
2954 if not continuation:
2955 break
62bff2c1 2956 retries = self._downloader.params.get('extractor_retries', 3)
2957 count = -1
2958 last_error = None
2959 while count < retries:
2960 count += 1
2961 if last_error:
2962 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2963 try:
a5c56234
M
2964 response = self._call_api(
2965 ep="browse", fatal=True, headers=headers,
2966 video_id='%s page %s' % (item_id, page_num),
2967 query={
2968 'continuation': continuation['continuation'],
2969 'clickTracking': {'clickTrackingParams': continuation['itct']},
2970 },
2971 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2972 except ExtractorError as e:
62bff2c1 2973 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2974 # Downloading page may result in intermittent 5xx HTTP error
2975 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2976 last_error = 'HTTP Error %s' % e.cause.code
2977 if count < retries:
29f7c58a 2978 continue
2979 raise
62bff2c1 2980 else:
62bff2c1 2981 # Youtube sometimes sends incomplete data
2982 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 2983 if dict_get(response,
2984 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 2985 break
f3eaa8dd
M
2986
2987 # Youtube may send alerts if there was an issue with the continuation page
2988 self._extract_alerts(response, expected=False)
2989
2990 last_error = 'Incomplete data received'
c705177d 2991 if count >= retries:
2992 self._downloader.report_error(last_error)
a5c56234
M
2993
2994 if not response:
8bdd16b4 2995 break
ebf1b291 2996
69184e41 2997 known_continuation_renderers = {
2998 'playlistVideoListContinuation': self._playlist_entries,
2999 'gridContinuation': self._grid_entries,
3000 'itemSectionContinuation': self._post_thread_continuation_entries,
3001 'sectionListContinuation': extract_entries, # for feeds
3002 }
8bdd16b4 3003 continuation_contents = try_get(
69184e41 3004 response, lambda x: x['continuationContents'], dict) or {}
3005 continuation_renderer = None
3006 for key, value in continuation_contents.items():
3007 if key not in known_continuation_renderers:
3462ffa8 3008 continue
69184e41 3009 continuation_renderer = value
3010 continuation_list = [None]
3011 for entry in known_continuation_renderers[key](continuation_renderer):
3012 yield entry
3013 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3014 break
3015 if continuation_renderer:
3016 continue
c5e8d7af 3017
a1b535bd 3018 known_renderers = {
3019 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3020 'gridVideoRenderer': (self._grid_entries, 'items'),
3021 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3022 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3023 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3024 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3025 }
cce889b9 3026 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3027 continuation_items = try_get(
cce889b9 3028 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3029 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3030 video_items_renderer = None
3031 for key, value in continuation_item.items():
3032 if key not in known_renderers:
8bdd16b4 3033 continue
a1b535bd 3034 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3035 continuation_list = [None]
a1b535bd 3036 for entry in known_renderers[key][0](video_items_renderer):
3037 yield entry
9ba5705a 3038 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3039 break
3040 if video_items_renderer:
3041 continue
8bdd16b4 3042 break
9558dcec 3043
8bdd16b4 3044 @staticmethod
3045 def _extract_selected_tab(tabs):
3046 for tab in tabs:
3047 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3048 return tab['tabRenderer']
2b3c2546 3049 else:
8bdd16b4 3050 raise ExtractorError('Unable to find selected tab')
b82f815f 3051
8bdd16b4 3052 @staticmethod
3053 def _extract_uploader(data):
3054 uploader = {}
3055 sidebar_renderer = try_get(
3056 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3057 if sidebar_renderer:
3058 for item in sidebar_renderer:
3059 if not isinstance(item, dict):
3060 continue
3061 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3062 if not isinstance(renderer, dict):
3063 continue
3064 owner = try_get(
3065 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3066 if owner:
3067 uploader['uploader'] = owner.get('text')
3068 uploader['uploader_id'] = try_get(
3069 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3070 uploader['uploader_url'] = urljoin(
3071 'https://www.youtube.com/',
3072 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3073 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3074
d069eca7 3075 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3076 playlist_id = title = description = channel_url = channel_name = channel_id = None
3077 thumbnails_list = tags = []
3078
8bdd16b4 3079 selected_tab = self._extract_selected_tab(tabs)
3080 renderer = try_get(
3081 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3082 if renderer:
b60419c5 3083 channel_name = renderer.get('title')
3084 channel_url = renderer.get('channelUrl')
3085 channel_id = renderer.get('externalId')
64c0d954 3086
64c0d954 3087 if not renderer:
3088 renderer = try_get(
3089 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 3090 if renderer:
3091 title = renderer.get('title')
ecc97af3 3092 description = renderer.get('description', '')
b60419c5 3093 playlist_id = channel_id
3094 tags = renderer.get('keywords', '').split()
3095 thumbnails_list = (
3096 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3097 or try_get(
3098 data,
3099 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3100 list)
b60419c5 3101 or [])
3102
3103 thumbnails = []
3104 for t in thumbnails_list:
3105 if not isinstance(t, dict):
3106 continue
3107 thumbnail_url = url_or_none(t.get('url'))
3108 if not thumbnail_url:
3109 continue
3110 thumbnails.append({
3111 'url': thumbnail_url,
3112 'width': int_or_none(t.get('width')),
3113 'height': int_or_none(t.get('height')),
3114 })
64c0d954 3115
3462ffa8 3116 if playlist_id is None:
70d5c17b 3117 playlist_id = item_id
3118 if title is None:
b60419c5 3119 title = playlist_id
3120 title += format_field(selected_tab, 'title', ' - %s')
3121
3122 metadata = {
3123 'playlist_id': playlist_id,
3124 'playlist_title': title,
3125 'playlist_description': description,
3126 'uploader': channel_name,
3127 'uploader_id': channel_id,
3128 'uploader_url': channel_url,
3129 'thumbnails': thumbnails,
3130 'tags': tags,
3131 }
3132 if not channel_id:
3133 metadata.update(self._extract_uploader(data))
3134 metadata.update({
3135 'channel': metadata['uploader'],
3136 'channel_id': metadata['uploader_id'],
3137 'channel_url': metadata['uploader_url']})
3138 return self.playlist_result(
d069eca7
M
3139 self._entries(
3140 selected_tab, playlist_id,
3141 self._extract_identity_token(webpage, item_id),
3142 self._extract_account_syncid(data)),
b60419c5 3143 **metadata)
73c4ac2c 3144
cd7c66cf 3145 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3146 first_id = last_id = None
3147 for page_num in itertools.count(1):
cd7c66cf 3148 videos = list(self._playlist_entries(playlist))
3149 if not videos:
3150 return
2be71994 3151 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3152 if start >= len(videos):
3153 return
3154 for video in videos[start:]:
3155 if video['id'] == first_id:
3156 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3157 return
3158 yield video
3159 first_id = first_id or videos[0]['id']
3160 last_id = videos[-1]['id']
cd7c66cf 3161
cd7c66cf 3162 _, data = self._extract_webpage(
2be71994 3163 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3164 '%s page %d' % (playlist_id, page_num))
3165 playlist = try_get(
3166 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3167
29f7c58a 3168 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3169 title = playlist.get('title') or try_get(
3170 data, lambda x: x['titleText']['simpleText'], compat_str)
3171 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3172
3173 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3174 playlist_url = urljoin(url, try_get(
3175 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3176 compat_str))
3177 if playlist_url and playlist_url != url:
3178 return self.url_result(
3179 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3180 video_title=title)
cd7c66cf 3181
8bdd16b4 3182 return self.playlist_result(
cd7c66cf 3183 self._extract_mix_playlist(playlist, playlist_id),
3184 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3185
f3eaa8dd
M
3186 def _extract_alerts(self, data, expected=False):
3187
3188 def _real_extract_alerts():
3189 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3190 if not isinstance(alert_dict, dict):
02ced43c 3191 continue
f3eaa8dd
M
3192 for alert in alert_dict.values():
3193 alert_type = alert.get('type')
3194 if not alert_type:
3195 continue
3196 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3197 if message:
3198 yield alert_type, message
f3eaa8dd
M
3199 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3200 message = try_get(run, lambda x: x['text'], compat_str)
3201 if message:
3202 yield alert_type, message
3203
3204 err_msg = None
3205 for alert_type, alert_message in _real_extract_alerts():
3206 if alert_type.lower() == 'error':
3207 if err_msg:
3208 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3209 err_msg = alert_message
3210 else:
3211 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3212
3213 if err_msg:
3214 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3215
cd7c66cf 3216 def _extract_webpage(self, url, item_id):
62bff2c1 3217 retries = self._downloader.params.get('extractor_retries', 3)
3218 count = -1
c705177d 3219 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3220 while count < retries:
62bff2c1 3221 count += 1
14fdfea9 3222 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3223 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3224 if count:
c705177d 3225 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3226 webpage = self._download_webpage(
3227 url, item_id,
cd7c66cf 3228 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3229 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3230 self._extract_alerts(data, expected=True)
14fdfea9 3231 if data.get('contents') or data.get('currentVideoEndpoint'):
3232 break
c705177d 3233 if count >= retries:
3234 self._downloader.report_error(last_error)
cd7c66cf 3235 return webpage, data
3236
3237 def _real_extract(self, url):
3238 item_id = self._match_id(url)
3239 url = compat_urlparse.urlunparse(
3240 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3241
3242 # This is not matched in a channel page with a tab selected
3243 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3244 mobj = mobj.groupdict() if mobj else {}
3245 if mobj and not mobj.get('not_channel'):
3246 self._downloader.report_warning(
3247 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3248 'To download only the videos in the home page, add a "/featured" to the URL')
3249 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3250
3251 # Handle both video/playlist URLs
3252 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3253 video_id = qs.get('v', [None])[0]
3254 playlist_id = qs.get('list', [None])[0]
3255
3256 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3257 if not playlist_id:
3258 # If there is neither video or playlist ids,
3259 # youtube redirects to home page, which is undesirable
3260 raise ExtractorError('Unable to recognize tab page')
3261 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3262 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3263
3264 if video_id and playlist_id:
3265 if self._downloader.params.get('noplaylist'):
3266 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3267 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3268 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3269
3270 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3271
8bdd16b4 3272 tabs = try_get(
3273 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3274 if tabs:
d069eca7 3275 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3276
8bdd16b4 3277 playlist = try_get(
3278 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3279 if playlist:
29f7c58a 3280 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3281
a0566bbf 3282 video_id = try_get(
3283 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3284 compat_str) or video_id
8bdd16b4 3285 if video_id:
cd7c66cf 3286 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3287 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3288
8bdd16b4 3289 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3290
c5e8d7af 3291
8bdd16b4 3292class YoutubePlaylistIE(InfoExtractor):
3293 IE_DESC = 'YouTube.com playlists'
3294 _VALID_URL = r'''(?x)(?:
3295 (?:https?://)?
3296 (?:\w+\.)?
3297 (?:
3298 (?:
3299 youtube(?:kids)?\.com|
29f7c58a 3300 invidio\.us
8bdd16b4 3301 )
3302 /.*?\?.*?\blist=
3303 )?
3304 (?P<id>%(playlist_id)s)
3305 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3306 IE_NAME = 'youtube:playlist'
cdc628a4 3307 _TESTS = [{
8bdd16b4 3308 'note': 'issue #673',
3309 'url': 'PLBB231211A4F62143',
cdc628a4 3310 'info_dict': {
8bdd16b4 3311 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3312 'id': 'PLBB231211A4F62143',
3313 'uploader': 'Wickydoo',
3314 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3315 },
3316 'playlist_mincount': 29,
3317 }, {
3318 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3319 'info_dict': {
3320 'title': 'YDL_safe_search',
3321 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3322 },
3323 'playlist_count': 2,
3324 'skip': 'This playlist is private',
9558dcec 3325 }, {
8bdd16b4 3326 'note': 'embedded',
3327 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3328 'playlist_count': 4,
9558dcec 3329 'info_dict': {
8bdd16b4 3330 'title': 'JODA15',
3331 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3332 'uploader': 'milan',
3333 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3334 }
cdc628a4 3335 }, {
8bdd16b4 3336 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3337 'playlist_mincount': 982,
3338 'info_dict': {
3339 'title': '2018 Chinese New Singles (11/6 updated)',
3340 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3341 'uploader': 'LBK',
3342 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3343 }
daa0df9e 3344 }, {
29f7c58a 3345 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3346 'only_matching': True,
3347 }, {
3348 # music album playlist
3349 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3350 'only_matching': True,
3351 }]
3352
3353 @classmethod
3354 def suitable(cls, url):
3355 return False if YoutubeTabIE.suitable(url) else super(
3356 YoutubePlaylistIE, cls).suitable(url)
3357
3358 def _real_extract(self, url):
3359 playlist_id = self._match_id(url)
3360 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3361 if not qs:
3362 qs = {'list': playlist_id}
3363 return self.url_result(
3364 update_url_query('https://www.youtube.com/playlist', qs),
3365 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3366
3367
3368class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3369 IE_DESC = 'youtu.be'
29f7c58a 3370 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3371 _TESTS = [{
8bdd16b4 3372 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3373 'info_dict': {
3374 'id': 'yeWKywCrFtk',
3375 'ext': 'mp4',
3376 'title': 'Small Scale Baler and Braiding Rugs',
3377 'uploader': 'Backus-Page House Museum',
3378 'uploader_id': 'backuspagemuseum',
3379 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3380 'upload_date': '20161008',
3381 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3382 'categories': ['Nonprofits & Activism'],
3383 'tags': list,
3384 'like_count': int,
3385 'dislike_count': int,
3386 },
3387 'params': {
3388 'noplaylist': True,
3389 'skip_download': True,
3390 },
39e7107d 3391 }, {
8bdd16b4 3392 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3393 'only_matching': True,
cdc628a4
PH
3394 }]
3395
8bdd16b4 3396 def _real_extract(self, url):
29f7c58a 3397 mobj = re.match(self._VALID_URL, url)
3398 video_id = mobj.group('id')
3399 playlist_id = mobj.group('playlist_id')
8bdd16b4 3400 return self.url_result(
29f7c58a 3401 update_url_query('https://www.youtube.com/watch', {
3402 'v': video_id,
3403 'list': playlist_id,
3404 'feature': 'youtu.be',
3405 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3406
3407
3408class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3409 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3410 _VALID_URL = r'ytuser:(?P<id>.+)'
3411 _TESTS = [{
3412 'url': 'ytuser:phihag',
3413 'only_matching': True,
3414 }]
3415
3416 def _real_extract(self, url):
3417 user_id = self._match_id(url)
3418 return self.url_result(
3419 'https://www.youtube.com/user/%s' % user_id,
3420 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3421
b05654f0 3422
3d3dddc9 3423class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3424 IE_NAME = 'youtube:favorites'
3425 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3426 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3427 _LOGIN_REQUIRED = True
3428 _TESTS = [{
3429 'url': ':ytfav',
3430 'only_matching': True,
3431 }, {
3432 'url': ':ytfavorites',
3433 'only_matching': True,
3434 }]
3435
3436 def _real_extract(self, url):
3437 return self.url_result(
3438 'https://www.youtube.com/playlist?list=LL',
3439 ie=YoutubeTabIE.ie_key())
3440
3441
8bdd16b4 3442class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3443 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3444 # there doesn't appear to be a real limit, for example if you search for
3445 # 'python' you get more than 8.000.000 results
3446 _MAX_RESULTS = float('inf')
78caa52a 3447 IE_NAME = 'youtube:search'
b05654f0 3448 _SEARCH_KEY = 'ytsearch'
6c894ea1 3449 _SEARCH_PARAMS = None
9dd8e46a 3450 _TESTS = []
b05654f0 3451
6c894ea1 3452 def _entries(self, query, n):
a5c56234 3453 data = {'query': query}
6c894ea1
U
3454 if self._SEARCH_PARAMS:
3455 data['params'] = self._SEARCH_PARAMS
3456 total = 0
3457 for page_num in itertools.count(1):
a5c56234
M
3458 search = self._call_api(
3459 ep='search', video_id='query "%s"' % query, fatal=False,
3460 note='Downloading page %s' % page_num, query=data)
6c894ea1 3461 if not search:
b4c08069 3462 break
6c894ea1
U
3463 slr_contents = try_get(
3464 search,
3465 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3466 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3467 list)
3468 if not slr_contents:
a22b2fd1 3469 break
0366ae87 3470
0366ae87
M
3471 # Youtube sometimes adds promoted content to searches,
3472 # changing the index location of videos and token.
3473 # So we search through all entries till we find them.
30a074c2 3474 continuation_token = None
3475 for slr_content in slr_contents:
a96c6d15 3476 if continuation_token is None:
3477 continuation_token = try_get(
3478 slr_content,
3479 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3480 compat_str)
3481
30a074c2 3482 isr_contents = try_get(
3483 slr_content,
3484 lambda x: x['itemSectionRenderer']['contents'],
3485 list)
9da76d30 3486 if not isr_contents:
30a074c2 3487 continue
3488 for content in isr_contents:
3489 if not isinstance(content, dict):
3490 continue
3491 video = content.get('videoRenderer')
3492 if not isinstance(video, dict):
3493 continue
3494 video_id = video.get('videoId')
3495 if not video_id:
3496 continue
3497
3498 yield self._extract_video(video)
3499 total += 1
3500 if total == n:
3501 return
0366ae87 3502
0366ae87 3503 if not continuation_token:
6c894ea1 3504 break
0366ae87 3505 data['continuation'] = continuation_token
b05654f0 3506
6c894ea1
U
3507 def _get_n_results(self, query, n):
3508 """Get a specified number of results for a query"""
3509 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3510
c9ae7b95 3511
a3dd9248 3512class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3513 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3514 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3515 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3516 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3517
c9ae7b95 3518
386e1dd9 3519class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3520 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3521 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3522 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3523 # _MAX_RESULTS = 100
3462ffa8 3524 _TESTS = [{
3525 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3526 'playlist_mincount': 5,
3527 'info_dict': {
3528 'title': 'youtube-dl test video',
3529 }
3530 }, {
3531 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3532 'only_matching': True,
3533 }]
3534
386e1dd9 3535 @classmethod
3536 def _make_valid_url(cls):
3537 return cls._VALID_URL
3538
3462ffa8 3539 def _real_extract(self, url):
386e1dd9 3540 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3541 query = (qs.get('search_query') or qs.get('q'))[0]
3542 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3543 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3544
3545
3546class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3547 """
25f14e9f 3548 Base class for feed extractors
3d3dddc9 3549 Subclasses must define the _FEED_NAME property.
d7ae0639 3550 """
b2e8bc1b 3551 _LOGIN_REQUIRED = True
ef2f3c7f 3552 _TESTS = []
d7ae0639
JMF
3553
3554 @property
3555 def IE_NAME(self):
78caa52a 3556 return 'youtube:%s' % self._FEED_NAME
04cc9617 3557
81f0259b 3558 def _real_initialize(self):
b2e8bc1b 3559 self._login()
81f0259b 3560
3853309f 3561 def _real_extract(self, url):
3d3dddc9 3562 return self.url_result(
3563 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3564 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3565
3566
ef2f3c7f 3567class YoutubeWatchLaterIE(InfoExtractor):
3568 IE_NAME = 'youtube:watchlater'
70d5c17b 3569 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3570 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3571 _TESTS = [{
8bdd16b4 3572 'url': ':ytwatchlater',
bc7a9cd8
S
3573 'only_matching': True,
3574 }]
25f14e9f
S
3575
3576 def _real_extract(self, url):
ef2f3c7f 3577 return self.url_result(
3578 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3579
3580
25f14e9f
S
3581class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3582 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3583 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3584 _FEED_NAME = 'recommended'
3d3dddc9 3585 _TESTS = [{
3586 'url': ':ytrec',
3587 'only_matching': True,
3588 }, {
3589 'url': ':ytrecommended',
3590 'only_matching': True,
3591 }, {
3592 'url': 'https://youtube.com',
3593 'only_matching': True,
3594 }]
1ed5b5c9 3595
1ed5b5c9 3596
25f14e9f 3597class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3598 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3599 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3600 _FEED_NAME = 'subscriptions'
3d3dddc9 3601 _TESTS = [{
3602 'url': ':ytsubs',
3603 'only_matching': True,
3604 }, {
3605 'url': ':ytsubscriptions',
3606 'only_matching': True,
3607 }]
1ed5b5c9 3608
1ed5b5c9 3609
25f14e9f 3610class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3611 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3612 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3613 _FEED_NAME = 'history'
3d3dddc9 3614 _TESTS = [{
3615 'url': ':ythistory',
3616 'only_matching': True,
3617 }]
1ed5b5c9
JMF
3618
3619
15870e90
PH
3620class YoutubeTruncatedURLIE(InfoExtractor):
3621 IE_NAME = 'youtube:truncated_url'
3622 IE_DESC = False # Do not list
975d35db 3623 _VALID_URL = r'''(?x)
b95aab84
PH
3624 (?:https?://)?
3625 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3626 (?:watch\?(?:
c4808c60 3627 feature=[a-z_]+|
b95aab84
PH
3628 annotation_id=annotation_[^&]+|
3629 x-yt-cl=[0-9]+|
c1708b89 3630 hl=[^&]*|
287be8c6 3631 t=[0-9]+
b95aab84
PH
3632 )?
3633 |
3634 attribution_link\?a=[^&]+
3635 )
3636 $
975d35db 3637 '''
15870e90 3638
c4808c60 3639 _TESTS = [{
2d3d2997 3640 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3641 'only_matching': True,
dc2fc736 3642 }, {
2d3d2997 3643 'url': 'https://www.youtube.com/watch?',
dc2fc736 3644 'only_matching': True,
b95aab84
PH
3645 }, {
3646 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3647 'only_matching': True,
3648 }, {
3649 'url': 'https://www.youtube.com/watch?feature=foo',
3650 'only_matching': True,
c1708b89
PH
3651 }, {
3652 'url': 'https://www.youtube.com/watch?hl=en-GB',
3653 'only_matching': True,
287be8c6
PH
3654 }, {
3655 'url': 'https://www.youtube.com/watch?t=2372',
3656 'only_matching': True,
c4808c60
PH
3657 }]
3658
15870e90
PH
3659 def _real_extract(self, url):
3660 raise ExtractorError(
78caa52a
PH
3661 'Did you forget to quote the URL? Remember that & is a meta '
3662 'character in most shells, so you want to put the URL in quotes, '
3867038a 3663 'like youtube-dl '
2d3d2997 3664 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3665 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3666 expected=True)
772fd5cc
PH
3667
3668
3669class YoutubeTruncatedIDIE(InfoExtractor):
3670 IE_NAME = 'youtube:truncated_id'
3671 IE_DESC = False # Do not list
b95aab84 3672 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3673
3674 _TESTS = [{
3675 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3676 'only_matching': True,
3677 }]
3678
3679 def _real_extract(self, url):
3680 video_id = self._match_id(url)
3681 raise ExtractorError(
3682 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3683 expected=True)