]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Update to ytdl-commit-654b4f4
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c224251a 27 bool_or_none,
c5e8d7af 28 clean_html,
26fe8ffe 29 dict_get,
c5e8d7af 30 ExtractorError,
b60419c5 31 format_field,
2d30521a 32 float_or_none,
dd27fd17 33 int_or_none,
94278f72 34 mimetype2ext,
6310acf5 35 parse_codecs,
7c80519c 36 parse_duration,
dca3ff4a 37 qualities,
3995d37d 38 remove_start,
cf7e015f 39 smuggle_url,
dbdaaa23 40 str_or_none,
c93d53f5 41 str_to_int,
556dbe7f 42 try_get,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
8bdd16b4 46 update_url_query,
21c340b8 47 url_or_none,
6e6bc8da 48 urlencode_postdata,
8bdd16b4 49 urljoin,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
57
58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 61
3462ffa8 62 _RESERVED_NAMES = (
cd7c66cf 63 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
64 r'movies|results|shared|hashtag|trending|feed|feeds|'
65 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
70d5c17b 71 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 72
25f14e9f
S
73 def _ids_to_results(self, ids):
74 return [
75 self.url_result(vid_id, 'Youtube', video_id=vid_id)
76 for vid_id in ids]
77
b2e8bc1b 78 def _login(self):
83317f69 79 """
80 Attempt to log in to YouTube.
81 True is returned if successful or skipped.
82 False is returned if login failed.
83
84 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
85 """
68217024 86 username, password = self._get_login_info()
b2e8bc1b
JMF
87 # No authentication to be performed
88 if username is None:
70d35d16 89 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 90 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 91 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
92 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
baf67a60
S
115 # TODO: reverse actual botguard identifier generation algo
116 'bgRequest': '["identifier",""]',
041bc3ad 117 })
e00eb564
S
118 return self._download_json(
119 url, None, note=note, errnote=errnote,
120 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121 fatal=False,
122 data=urlencode_postdata(data), headers={
123 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124 'Google-Accounts-XSRF': 1,
125 })
126
3995d37d
S
127 def warn(message):
128 self._downloader.report_warning(message)
129
130 lookup_req = [
131 username,
132 None, [], None, 'US', None, None, 2, False, True,
133 [
134 None, None,
135 [2, 1, None, 1,
136 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137 None, [], 4],
138 1, [None, None, []], None, None, None, True
139 ],
140 username,
141 ]
142
e00eb564 143 lookup_results = req(
3995d37d 144 self._LOOKUP_URL, lookup_req,
e00eb564
S
145 'Looking up account info', 'Unable to look up account info')
146
147 if lookup_results is False:
148 return False
041bc3ad 149
3995d37d
S
150 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151 if not user_hash:
152 warn('Unable to extract user hash')
153 return False
154
155 challenge_req = [
156 user_hash,
157 None, 1, None, [1, None, None, None, [password, None, True]],
158 [
159 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160 1, [None, None, []], None, None, None, True
161 ]]
83317f69 162
3995d37d
S
163 challenge_results = req(
164 self._CHALLENGE_URL, challenge_req,
165 'Logging in', 'Unable to log in')
83317f69 166
3995d37d 167 if challenge_results is False:
e00eb564 168 return
83317f69 169
3995d37d
S
170 login_res = try_get(challenge_results, lambda x: x[0][5], list)
171 if login_res:
172 login_msg = try_get(login_res, lambda x: x[5], compat_str)
173 warn(
174 'Unable to login: %s' % 'Invalid password'
175 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 return False
177
178 res = try_get(challenge_results, lambda x: x[0][-1], list)
179 if not res:
180 warn('Unable to extract result entry')
181 return False
182
9a6628aa
S
183 login_challenge = try_get(res, lambda x: x[0][0], list)
184 if login_challenge:
185 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
187 # SEND_SUCCESS - TFA code has been successfully sent to phone
188 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 189 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
190 if status == 'QUOTA_EXCEEDED':
191 warn('Exceeded the limit of TFA codes, try later')
192 return False
193
194 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195 if not tl:
196 warn('Unable to extract TL')
197 return False
198
199 tfa_code = self._get_tfa_info('2-step verification code')
200
201 if not tfa_code:
202 warn(
203 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 return False
206
207 tfa_code = remove_start(tfa_code, 'G-')
208
209 tfa_req = [
210 user_hash, None, 2, None,
211 [
212 9, None, None, None, None, None, None, None,
213 [None, tfa_code, True, 2]
214 ]]
215
216 tfa_results = req(
217 self._TFA_URL.format(tl), tfa_req,
218 'Submitting TFA code', 'Unable to submit TFA code')
219
220 if tfa_results is False:
221 return False
222
223 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224 if tfa_res:
225 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226 warn(
227 'Unable to finish TFA: %s' % 'Invalid TFA code'
228 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 return False
230
231 check_cookie_url = try_get(
232 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
233 else:
234 CHALLENGES = {
235 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238 }
239 challenge = CHALLENGES.get(
240 challenge_str,
241 '%s returned error %s.' % (self.IE_NAME, challenge_str))
242 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 return False
3995d37d
S
244 else:
245 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247 if not check_cookie_url:
248 warn('Unable to extract CheckCookie URL')
249 return False
e00eb564
S
250
251 check_cookie_results = self._download_webpage(
3995d37d
S
252 check_cookie_url, None, 'Checking cookie', fatal=False)
253
254 if check_cookie_results is False:
255 return False
e00eb564 256
3995d37d
S
257 if 'https://myaccount.google.com/' not in check_cookie_results:
258 warn('Unable to log in')
b2e8bc1b 259 return False
e00eb564 260
b2e8bc1b
JMF
261 return True
262
cce889b9 263 def _initialize_consent(self):
264 cookies = self._get_cookies('https://www.youtube.com/')
265 if cookies.get('__Secure-3PSID'):
266 return
267 consent_id = None
268 consent = cookies.get('CONSENT')
269 if consent:
270 if 'YES' in consent.value:
271 return
272 consent_id = self._search_regex(
273 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
274 if not consent_id:
275 consent_id = random.randint(100, 999)
276 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 277
b2e8bc1b 278 def _real_initialize(self):
cce889b9 279 self._initialize_consent()
b2e8bc1b
JMF
280 if self._downloader is None:
281 return
b2e8bc1b
JMF
282 if not self._login():
283 return
c5e8d7af 284
a1c5d2ca 285 _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
8bdd16b4 286 _DEFAULT_API_DATA = {
287 'context': {
288 'client': {
289 'clientName': 'WEB',
a1c5d2ca 290 'clientVersion': _YT_WEB_CLIENT_VERSION,
8bdd16b4 291 }
292 },
293 }
8377574c 294
a1c5d2ca
M
295 _DEFAULT_BASIC_API_HEADERS = {
296 'X-YouTube-Client-Name': '1',
297 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
298 }
299
a0566bbf 300 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 301 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
302 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 303
a5c56234
M
304 def _generate_sapisidhash_header(self):
305 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
306 if sapisid_cookie is None:
307 return
308 time_now = round(time.time())
309 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
310 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
311
312 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
313 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 314 data = self._DEFAULT_API_DATA.copy()
315 data.update(query)
a5c56234
M
316 headers = headers or {}
317 headers.update({'content-type': 'application/json'})
318 auth = self._generate_sapisidhash_header()
319 if auth is not None:
320 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 321 return self._download_json(
a5c56234
M
322 'https://www.youtube.com/youtubei/v1/%s' % ep,
323 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
324 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 325 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 326
8bdd16b4 327 def _extract_yt_initial_data(self, video_id, webpage):
328 return self._parse_json(
329 self._search_regex(
29f7c58a 330 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 331 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 332 video_id)
0c148415 333
a1c5d2ca
M
334 def _extract_identity_token(self, webpage, item_id):
335 ytcfg = self._extract_ytcfg(item_id, webpage)
336 if ytcfg:
337 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
338 if token:
339 return token
340 return self._search_regex(
341 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
342 'identity token', default=None)
343
344 @staticmethod
345 def _extract_account_syncid(data):
346 """Extract syncId required to download private playlists of secondary channels"""
347 sync_ids = (
348 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
349 or '').split("||")
350 if len(sync_ids) >= 2 and sync_ids[1]:
351 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
352 # and just "user_syncid||" for primary channel. We only want the channel_syncid
353 return sync_ids[0]
354
29f7c58a 355 def _extract_ytcfg(self, video_id, webpage):
356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
359 default='{}'), video_id, fatal=False)
360
30a074c2 361 def _extract_video(self, renderer):
362 video_id = renderer.get('videoId')
363 title = try_get(
364 renderer,
365 (lambda x: x['title']['runs'][0]['text'],
366 lambda x: x['title']['simpleText']), compat_str)
367 description = try_get(
368 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
369 compat_str)
370 duration = parse_duration(try_get(
371 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
372 view_count_text = try_get(
373 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
374 view_count = str_to_int(self._search_regex(
375 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
376 'view count', default=None))
377 uploader = try_get(
bc2ca1bb 378 renderer,
379 (lambda x: x['ownerText']['runs'][0]['text'],
380 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 381 return {
39ed931e 382 '_type': 'url',
30a074c2 383 'ie_key': YoutubeIE.ie_key(),
384 'id': video_id,
385 'url': video_id,
386 'title': title,
387 'description': description,
388 'duration': duration,
389 'view_count': view_count,
390 'uploader': uploader,
391 }
392
0c148415 393
360e1ca5 394class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 395 IE_DESC = 'YouTube.com'
bc2ca1bb 396 _INVIDIOUS_SITES = (
397 # invidious-redirect websites
398 r'(?:www\.)?redirect\.invidious\.io',
399 r'(?:(?:www|dev)\.)?invidio\.us',
400 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
401 r'(?:www\.)?invidious\.pussthecat\.org',
402 r'(?:www\.)?invidious\.048596\.xyz',
403 r'(?:www\.)?invidious\.zee\.li',
404 r'(?:www\.)?vid\.puffyan\.us',
405 r'(?:(?:www|au)\.)?ytprivate\.com',
406 r'(?:www\.)?invidious\.namazso\.eu',
407 r'(?:www\.)?invidious\.ethibox\.fr',
408 r'(?:www\.)?inv\.skyn3t\.in',
409 r'(?:www\.)?invidious\.himiko\.cloud',
410 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
411 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
412 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
413 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
414 # youtube-dl invidious instances list
415 r'(?:(?:www|no)\.)?invidiou\.sh',
416 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
417 r'(?:www\.)?invidious\.kabi\.tk',
418 r'(?:www\.)?invidious\.13ad\.de',
419 r'(?:www\.)?invidious\.mastodon\.host',
420 r'(?:www\.)?invidious\.zapashcanon\.fr',
421 r'(?:www\.)?invidious\.kavin\.rocks',
422 r'(?:www\.)?invidious\.tube',
423 r'(?:www\.)?invidiou\.site',
424 r'(?:www\.)?invidious\.site',
425 r'(?:www\.)?invidious\.xyz',
426 r'(?:www\.)?invidious\.nixnet\.xyz',
427 r'(?:www\.)?invidious\.drycat\.fr',
428 r'(?:www\.)?tube\.poal\.co',
429 r'(?:www\.)?tube\.connect\.cafe',
430 r'(?:www\.)?vid\.wxzm\.sx',
431 r'(?:www\.)?vid\.mint\.lgbt',
432 r'(?:www\.)?yewtu\.be',
433 r'(?:www\.)?yt\.elukerio\.org',
434 r'(?:www\.)?yt\.lelux\.fi',
435 r'(?:www\.)?invidious\.ggc-project\.de',
436 r'(?:www\.)?yt\.maisputain\.ovh',
437 r'(?:www\.)?invidious\.toot\.koeln',
438 r'(?:www\.)?invidious\.fdn\.fr',
439 r'(?:www\.)?watch\.nettohikari\.com',
440 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
441 r'(?:www\.)?qklhadlycap4cnod\.onion',
442 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
443 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
444 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
445 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
446 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
447 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
448 )
cb7dfeea 449 _VALID_URL = r"""(?x)^
c5e8d7af 450 (
edb53e2d 451 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 452 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
453 (?:www\.)?deturl\.com/www\.youtube\.com|
454 (?:www\.)?pwnyoutube\.com|
455 (?:www\.)?hooktube\.com|
456 (?:www\.)?yourepeat\.com|
457 tube\.majestyc\.net|
458 %(invidious)s|
459 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
460 (?:.*?\#/)? # handle anchor (#/) redirect urls
461 (?: # the various things that can precede the ID:
ac7553d0 462 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 463 |(?: # or the v= param in all its forms
f7000f3a 464 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 465 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 466 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
467 v=
468 )
f4b05232 469 ))
cbaed4bb
S
470 |(?:
471 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
472 vid\.plus| # or vid.plus/xxxx
473 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 474 %(invidious)s
cbaed4bb 475 )/
edb53e2d 476 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 477 )
c5e8d7af 478 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 479 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
480 (?!.*?\blist=
481 (?:
482 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
483 WL # WL are handled by the watch later IE
484 )
485 )
c5e8d7af 486 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 487 $""" % {
488 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
489 'invidious': '|'.join(_INVIDIOUS_SITES),
490 }
e40c758c 491 _PLAYER_INFO_RE = (
cc2db878 492 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
493 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 494 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 495 )
2c62dc26 496 _formats = {
c2d3cb4c 497 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
498 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
499 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
500 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
501 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
502 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
503 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
504 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 505 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 506 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
507 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
508 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
509 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
510 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
511 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 512 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 513 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
514 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 515
516
517 # 3D videos
c2d3cb4c 518 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
519 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
520 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
521 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 522 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
523 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
524 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 525
96fb5605 526 # Apple HTTP Live Streaming
11f12195 527 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 528 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
529 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
530 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
531 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
532 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 533 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
534 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
535
536 # DASH mp4 video
d23028a8
S
537 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
538 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
539 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
540 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
541 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 542 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
543 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
544 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
545 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
546 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
547 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
548 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 549
f6f1fc92 550 # Dash mp4 audio
d23028a8
S
551 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
552 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
553 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
554 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
555 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
556 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
557 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
558
559 # Dash webm
d23028a8
S
560 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
561 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
562 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
563 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
564 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
565 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
566 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
567 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
568 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
569 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
570 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
571 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
572 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
573 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
574 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 575 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
576 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
577 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
578 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
579 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
580 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
581 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
582
583 # Dash webm audio
d23028a8
S
584 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
585 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 586
0857baad 587 # Dash webm audio with opus inside
d23028a8
S
588 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
589 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
590 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 591
ce6b9a2d
PH
592 # RTMP (unnamed)
593 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
594
595 # av01 video only formats sometimes served with "unknown" codecs
596 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
597 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
598 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
599 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 600 }
29f7c58a 601 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 602
fd5c4aab
S
603 _GEO_BYPASS = False
604
78caa52a 605 IE_NAME = 'youtube'
2eb88d95
PH
606 _TESTS = [
607 {
2d3d2997 608 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
609 'info_dict': {
610 'id': 'BaW_jenozKc',
611 'ext': 'mp4',
3867038a 612 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
613 'uploader': 'Philipp Hagemeister',
614 'uploader_id': 'phihag',
ec85ded8 615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
616 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
617 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 618 'upload_date': '20121002',
3867038a 619 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 620 'categories': ['Science & Technology'],
3867038a 621 'tags': ['youtube-dl'],
556dbe7f 622 'duration': 10,
dbdaaa23 623 'view_count': int,
3e7c1224
PH
624 'like_count': int,
625 'dislike_count': int,
7c80519c 626 'start_time': 1,
297a564b 627 'end_time': 9,
2eb88d95 628 }
0e853ca4 629 },
fccd3771 630 {
4bc3a23e
PH
631 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
632 'note': 'Embed-only video (#1746)',
633 'info_dict': {
634 'id': 'yZIXLfi8CZQ',
635 'ext': 'mp4',
636 'upload_date': '20120608',
637 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
638 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
639 'uploader': 'SET India',
94bfcd23 640 'uploader_id': 'setindia',
ec85ded8 641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 642 'age_limit': 18,
545cc85d 643 },
644 'skip': 'Private video',
fccd3771 645 },
11b56058 646 {
8bdd16b4 647 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
648 'note': 'Use the first video ID in the URL',
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 656 'upload_date': '20121002',
3867038a 657 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 658 'categories': ['Science & Technology'],
3867038a 659 'tags': ['youtube-dl'],
556dbe7f 660 'duration': 10,
dbdaaa23 661 'view_count': int,
11b56058
PM
662 'like_count': int,
663 'dislike_count': int,
34a7de29
S
664 },
665 'params': {
666 'skip_download': True,
667 },
11b56058 668 },
dd27fd17 669 {
2d3d2997 670 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
671 'note': '256k DASH audio (format 141) via DASH manifest',
672 'info_dict': {
673 'id': 'a9LDPn-MO4I',
674 'ext': 'm4a',
675 'upload_date': '20121002',
676 'uploader_id': '8KVIDEO',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
678 'description': '',
679 'uploader': '8KVIDEO',
680 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 681 },
4bc3a23e
PH
682 'params': {
683 'youtube_include_dash_manifest': True,
684 'format': '141',
4919603f 685 },
de3c7fe0 686 'skip': 'format 141 not served anymore',
dd27fd17 687 },
8bdd16b4 688 # DASH manifest with encrypted signature
689 {
690 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
691 'info_dict': {
692 'id': 'IB3lcPjvWLA',
693 'ext': 'm4a',
694 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
695 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
696 'duration': 244,
697 'uploader': 'AfrojackVEVO',
698 'uploader_id': 'AfrojackVEVO',
699 'upload_date': '20131011',
cc2db878 700 'abr': 129.495,
8bdd16b4 701 },
702 'params': {
703 'youtube_include_dash_manifest': True,
704 'format': '141/bestaudio[ext=m4a]',
705 },
706 },
aa79ac0c
PH
707 # Controversy video
708 {
709 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
710 'info_dict': {
711 'id': 'T4XJQO3qol8',
712 'ext': 'mp4',
556dbe7f 713 'duration': 219,
aa79ac0c 714 'upload_date': '20100909',
4fe54c12 715 'uploader': 'Amazing Atheist',
aa79ac0c 716 'uploader_id': 'TheAmazingAtheist',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 718 'title': 'Burning Everyone\'s Koran',
545cc85d 719 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 720 }
c522adb1 721 },
dd2d55f1 722 # Normal age-gate video (embed allowed)
c522adb1 723 {
2d3d2997 724 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
725 'info_dict': {
726 'id': 'HtVdAasjOgU',
727 'ext': 'mp4',
728 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 729 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 730 'duration': 142,
c522adb1
JMF
731 'uploader': 'The Witcher',
732 'uploader_id': 'WitcherGame',
ec85ded8 733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 734 'upload_date': '20140605',
34952f09 735 'age_limit': 18,
c522adb1
JMF
736 },
737 },
8bdd16b4 738 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
739 # YouTube Red ad is not captured for creator
740 {
741 'url': '__2ABJjxzNo',
742 'info_dict': {
743 'id': '__2ABJjxzNo',
744 'ext': 'mp4',
745 'duration': 266,
746 'upload_date': '20100430',
747 'uploader_id': 'deadmau5',
748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 749 'creator': 'deadmau5',
750 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 751 'uploader': 'deadmau5',
752 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 753 'alt_title': 'Some Chords',
8bdd16b4 754 },
755 'expected_warnings': [
756 'DASH manifest missing',
757 ]
758 },
067aa17e 759 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
760 {
761 'url': 'lqQg6PlCWgI',
762 'info_dict': {
763 'id': 'lqQg6PlCWgI',
764 'ext': 'mp4',
556dbe7f 765 'duration': 6085,
90227264 766 'upload_date': '20150827',
cbe2bd91 767 'uploader_id': 'olympic',
ec85ded8 768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 769 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 770 'uploader': 'Olympic',
cbe2bd91
PH
771 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
772 },
773 'params': {
774 'skip_download': 'requires avconv',
e52a40ab 775 }
cbe2bd91 776 },
6271f1ca
PH
777 # Non-square pixels
778 {
779 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
780 'info_dict': {
781 'id': '_b-2C3KPAM0',
782 'ext': 'mp4',
783 'stretched_ratio': 16 / 9.,
556dbe7f 784 'duration': 85,
6271f1ca
PH
785 'upload_date': '20110310',
786 'uploader_id': 'AllenMeow',
ec85ded8 787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 788 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 789 'uploader': '孫ᄋᄅ',
6271f1ca
PH
790 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
791 },
06b491eb
S
792 },
793 # url_encoded_fmt_stream_map is empty string
794 {
795 'url': 'qEJwOuvDf7I',
796 'info_dict': {
797 'id': 'qEJwOuvDf7I',
f57b7835 798 'ext': 'webm',
06b491eb
S
799 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
800 'description': '',
801 'upload_date': '20150404',
802 'uploader_id': 'spbelect',
803 'uploader': 'Наблюдатели Петербурга',
804 },
805 'params': {
806 'skip_download': 'requires avconv',
e323cf3f
S
807 },
808 'skip': 'This live event has ended.',
06b491eb 809 },
067aa17e 810 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
811 {
812 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
813 'info_dict': {
814 'id': 'FIl7x6_3R5Y',
eb6793ba 815 'ext': 'webm',
da77d856
S
816 'title': 'md5:7b81415841e02ecd4313668cde88737a',
817 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 818 'duration': 220,
da77d856
S
819 'upload_date': '20150625',
820 'uploader_id': 'dorappi2000',
ec85ded8 821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 822 'uploader': 'dorappi2000',
eb6793ba 823 'formats': 'mincount:31',
da77d856 824 },
eb6793ba 825 'skip': 'not actual anymore',
2ee8f5d8 826 },
8a1a26ce
YCH
827 # DASH manifest with segment_list
828 {
829 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
830 'md5': '8ce563a1d667b599d21064e982ab9e31',
831 'info_dict': {
832 'id': 'CsmdDsKjzN8',
833 'ext': 'mp4',
17ee98e1 834 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
835 'uploader': 'Airtek',
836 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
837 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
838 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
839 },
840 'params': {
841 'youtube_include_dash_manifest': True,
842 'format': '135', # bestvideo
be49068d
S
843 },
844 'skip': 'This live event has ended.',
2ee8f5d8 845 },
cf7e015f
S
846 {
847 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 848 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 849 'info_dict': {
545cc85d 850 'id': 'jvGDaLqkpTg',
851 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
852 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
853 },
854 'playlist': [{
855 'info_dict': {
545cc85d 856 'id': 'jvGDaLqkpTg',
cf7e015f 857 'ext': 'mp4',
545cc85d 858 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
859 'description': 'md5:e03b909557865076822aa169218d6a5d',
860 'duration': 10643,
861 'upload_date': '20161111',
862 'uploader': 'Team PGP',
863 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
865 },
866 }, {
867 'info_dict': {
545cc85d 868 'id': '3AKt1R1aDnw',
cf7e015f 869 'ext': 'mp4',
545cc85d 870 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
871 'description': 'md5:e03b909557865076822aa169218d6a5d',
872 'duration': 10991,
873 'upload_date': '20161111',
874 'uploader': 'Team PGP',
875 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
876 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
877 },
878 }, {
879 'info_dict': {
545cc85d 880 'id': 'RtAMM00gpVc',
cf7e015f 881 'ext': 'mp4',
545cc85d 882 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
883 'description': 'md5:e03b909557865076822aa169218d6a5d',
884 'duration': 10995,
885 'upload_date': '20161111',
886 'uploader': 'Team PGP',
887 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
888 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
889 },
890 }, {
891 'info_dict': {
545cc85d 892 'id': '6N2fdlP3C5U',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10990,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }],
903 'params': {
904 'skip_download': True,
905 },
cbaed4bb 906 },
f9f49d87 907 {
067aa17e 908 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
909 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
910 'info_dict': {
911 'id': 'gVfLd0zydlo',
912 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
913 },
914 'playlist_count': 2,
be49068d 915 'skip': 'Not multifeed anymore',
f9f49d87 916 },
cbaed4bb 917 {
2d3d2997 918 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 919 'only_matching': True,
0e49d9a6 920 },
6d4fc66b 921 {
2d3d2997 922 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
923 'only_matching': True,
924 },
0e49d9a6 925 {
067aa17e 926 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 927 # Also tests cut-off URL expansion in video description (see
067aa17e
S
928 # https://github.com/ytdl-org/youtube-dl/issues/1892,
929 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
930 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
931 'info_dict': {
932 'id': 'lsguqyKfVQg',
933 'ext': 'mp4',
934 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 935 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 936 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 937 'duration': 133,
0e49d9a6
LL
938 'upload_date': '20151119',
939 'uploader_id': 'IronSoulElf',
ec85ded8 940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 941 'uploader': 'IronSoulElf',
eb6793ba
S
942 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
943 'track': 'Dark Walk - Position Music',
944 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 945 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
946 },
947 'params': {
948 'skip_download': True,
949 },
950 },
61f92af1 951 {
067aa17e 952 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
953 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
954 'only_matching': True,
955 },
313dfc45
LL
956 {
957 # Video with yt:stretch=17:0
958 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
959 'info_dict': {
960 'id': 'Q39EVAstoRM',
961 'ext': 'mp4',
962 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
963 'description': 'md5:ee18a25c350637c8faff806845bddee9',
964 'upload_date': '20151107',
965 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
966 'uploader': 'CH GAMER DROID',
967 },
968 'params': {
969 'skip_download': True,
970 },
be49068d 971 'skip': 'This video does not exist.',
313dfc45 972 },
7caf9830
S
973 {
974 # Video licensed under Creative Commons
975 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
976 'info_dict': {
977 'id': 'M4gD1WSo5mA',
978 'ext': 'mp4',
979 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
980 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 981 'duration': 721,
7caf9830
S
982 'upload_date': '20150127',
983 'uploader_id': 'BerkmanCenter',
ec85ded8 984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 985 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
986 'license': 'Creative Commons Attribution license (reuse allowed)',
987 },
988 'params': {
989 'skip_download': True,
990 },
991 },
fd050249
S
992 {
993 # Channel-like uploader_url
994 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
995 'info_dict': {
996 'id': 'eQcmzGIKrzg',
997 'ext': 'mp4',
998 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 999 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1000 'duration': 4060,
fd050249 1001 'upload_date': '20151119',
eb6793ba 1002 'uploader': 'Bernie Sanders',
fd050249 1003 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1005 'license': 'Creative Commons Attribution license (reuse allowed)',
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
1010 },
040ac686
S
1011 {
1012 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1013 'only_matching': True,
7f29cf54
S
1014 },
1015 {
067aa17e 1016 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1017 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1018 'only_matching': True,
6496ccb4
S
1019 },
1020 {
1021 # Rental video preview
1022 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1023 'info_dict': {
1024 'id': 'uGpuVWrhIzE',
1025 'ext': 'mp4',
1026 'title': 'Piku - Trailer',
1027 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1028 'upload_date': '20150811',
1029 'uploader': 'FlixMatrix',
1030 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1032 'license': 'Standard YouTube License',
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
eb6793ba 1037 'skip': 'This video is not available.',
022a5d66 1038 },
12afdc2a
S
1039 {
1040 # YouTube Red video with episode data
1041 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1042 'info_dict': {
1043 'id': 'iqKdEhx-dD4',
1044 'ext': 'mp4',
1045 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1046 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1047 'duration': 2085,
12afdc2a
S
1048 'upload_date': '20170118',
1049 'uploader': 'Vsauce',
1050 'uploader_id': 'Vsauce',
1051 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1052 'series': 'Mind Field',
1053 'season_number': 1,
1054 'episode_number': 1,
1055 },
1056 'params': {
1057 'skip_download': True,
1058 },
1059 'expected_warnings': [
1060 'Skipping DASH manifest',
1061 ],
1062 },
c7121fa7
S
1063 {
1064 # The following content has been identified by the YouTube community
1065 # as inappropriate or offensive to some audiences.
1066 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1067 'info_dict': {
1068 'id': '6SJNVb0GnPI',
1069 'ext': 'mp4',
1070 'title': 'Race Differences in Intelligence',
1071 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1072 'duration': 965,
1073 'upload_date': '20140124',
1074 'uploader': 'New Century Foundation',
1075 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1077 },
1078 'params': {
1079 'skip_download': True,
1080 },
545cc85d 1081 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1082 },
022a5d66
S
1083 {
1084 # itag 212
1085 'url': '1t24XAntNCY',
1086 'only_matching': True,
fd5c4aab
S
1087 },
1088 {
1089 # geo restricted to JP
1090 'url': 'sJL6WA-aGkQ',
1091 'only_matching': True,
1092 },
cd5a74a2
S
1093 {
1094 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1095 'only_matching': True,
1096 },
bc2ca1bb 1097 {
1098 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1099 'only_matching': True,
1100 },
1101 {
1102 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1103 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1104 'only_matching': True,
1105 },
825cd268
RA
1106 {
1107 # DRM protected
1108 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1109 'only_matching': True,
4fe54c12
S
1110 },
1111 {
1112 # Video with unsupported adaptive stream type formats
1113 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1114 'info_dict': {
1115 'id': 'Z4Vy8R84T1U',
1116 'ext': 'mp4',
1117 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1118 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1119 'duration': 433,
1120 'upload_date': '20130923',
1121 'uploader': 'Amelia Putri Harwita',
1122 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1124 'formats': 'maxcount:10',
1125 },
1126 'params': {
1127 'skip_download': True,
1128 'youtube_include_dash_manifest': False,
1129 },
5429d6a9 1130 'skip': 'not actual anymore',
5caabd3c 1131 },
1132 {
822b9d9c 1133 # Youtube Music Auto-generated description
5caabd3c 1134 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1135 'info_dict': {
1136 'id': 'MgNrAu2pzNs',
1137 'ext': 'mp4',
1138 'title': 'Voyeur Girl',
1139 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1140 'upload_date': '20190312',
5429d6a9
S
1141 'uploader': 'Stephen - Topic',
1142 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1143 'artist': 'Stephen',
1144 'track': 'Voyeur Girl',
1145 'album': 'it\'s too much love to know my dear',
1146 'release_date': '20190313',
1147 'release_year': 2019,
1148 },
1149 'params': {
1150 'skip_download': True,
1151 },
1152 },
66b48727
RA
1153 {
1154 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1155 'only_matching': True,
1156 },
011e75e6
S
1157 {
1158 # invalid -> valid video id redirection
1159 'url': 'DJztXj2GPfl',
1160 'info_dict': {
1161 'id': 'DJztXj2GPfk',
1162 'ext': 'mp4',
1163 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1164 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1165 'upload_date': '20090125',
1166 'uploader': 'Prochorowka',
1167 'uploader_id': 'Prochorowka',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1169 'artist': 'Panjabi MC',
1170 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1171 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1172 },
1173 'params': {
1174 'skip_download': True,
1175 },
545cc85d 1176 'skip': 'Video unavailable',
ea74e00b
DP
1177 },
1178 {
1179 # empty description results in an empty string
1180 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1181 'info_dict': {
1182 'id': 'x41yOUIvK2k',
1183 'ext': 'mp4',
1184 'title': 'IMG 3456',
1185 'description': '',
1186 'upload_date': '20170613',
1187 'uploader_id': 'ElevageOrVert',
1188 'uploader': 'ElevageOrVert',
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
a0566bbf 1194 {
29f7c58a 1195 # with '};' inside yt initial data (see [1])
1196 # see [2] for an example with '};' inside ytInitialPlayerResponse
1197 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1198 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1199 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1200 'info_dict': {
1201 'id': 'CHqg6qOn4no',
1202 'ext': 'mp4',
1203 'title': 'Part 77 Sort a list of simple types in c#',
1204 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1205 'upload_date': '20130831',
1206 'uploader_id': 'kudvenkat',
1207 'uploader': 'kudvenkat',
1208 },
1209 'params': {
1210 'skip_download': True,
1211 },
1212 },
29f7c58a 1213 {
1214 # another example of '};' in ytInitialData
1215 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1216 'only_matching': True,
1217 },
1218 {
1219 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1220 'only_matching': True,
1221 },
545cc85d 1222 {
cc2db878 1223 # https://github.com/ytdl-org/youtube-dl/pull/28094
1224 'url': 'OtqTfy26tG0',
1225 'info_dict': {
1226 'id': 'OtqTfy26tG0',
1227 'ext': 'mp4',
1228 'title': 'Burn Out',
1229 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1230 'upload_date': '20141120',
1231 'uploader': 'The Cinematic Orchestra - Topic',
1232 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1234 'artist': 'The Cinematic Orchestra',
1235 'track': 'Burn Out',
1236 'album': 'Every Day',
1237 'release_data': None,
1238 'release_year': None,
1239 },
1240 'params': {
1241 'skip_download': True,
1242 },
545cc85d 1243 },
bc2ca1bb 1244 {
1245 # controversial video, only works with bpctr when authenticated with cookies
1246 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1247 'only_matching': True,
1248 },
2eb88d95
PH
1249 ]
1250
e0df6211
PH
1251 def __init__(self, *args, **kwargs):
1252 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1253 self._code_cache = {}
83799698 1254 self._player_cache = {}
e0df6211 1255
60064c53
PH
1256 def _signature_cache_id(self, example_sig):
1257 """ Return a string representation of a signature """
78caa52a 1258 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1259
e40c758c
S
1260 @classmethod
1261 def _extract_player_info(cls, player_url):
1262 for player_re in cls._PLAYER_INFO_RE:
1263 id_m = re.search(player_re, player_url)
1264 if id_m:
1265 break
1266 else:
c081b35c 1267 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1268 return id_m.group('id')
e40c758c
S
1269
1270 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1271 player_id = self._extract_player_info(player_url)
e0df6211 1272
c4417ddb 1273 # Read from filesystem cache
545cc85d 1274 func_id = 'js_%s_%s' % (
1275 player_id, self._signature_cache_id(example_sig))
c4417ddb 1276 assert os.path.basename(func_id) == func_id
a0e07d31 1277
69ea8ca4 1278 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1279 if cache_spec is not None:
78caa52a 1280 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1281
545cc85d 1282 if player_id not in self._code_cache:
1283 self._code_cache[player_id] = self._download_webpage(
e0df6211 1284 player_url, video_id,
545cc85d 1285 note='Downloading player ' + player_id,
69ea8ca4 1286 errnote='Download of %s failed' % player_url)
545cc85d 1287 code = self._code_cache[player_id]
1288 res = self._parse_sig_js(code)
e0df6211 1289
785521bf
PH
1290 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1291 cache_res = res(test_string)
1292 cache_spec = [ord(c) for c in cache_res]
83799698 1293
69ea8ca4 1294 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1295 return res
1296
60064c53 1297 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1298 def gen_sig_code(idxs):
1299 def _genslice(start, end, step):
78caa52a 1300 starts = '' if start == 0 else str(start)
8bcc8756 1301 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1302 steps = '' if step == 1 else (':%d' % step)
78caa52a 1303 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1304
1305 step = None
7af808a5
PH
1306 # Quelch pyflakes warnings - start will be set when step is set
1307 start = '(Never used)'
edf3e38e
PH
1308 for i, prev in zip(idxs[1:], idxs[:-1]):
1309 if step is not None:
1310 if i - prev == step:
1311 continue
1312 yield _genslice(start, prev, step)
1313 step = None
1314 continue
1315 if i - prev in [-1, 1]:
1316 step = i - prev
1317 start = prev
1318 continue
1319 else:
78caa52a 1320 yield 's[%d]' % prev
edf3e38e 1321 if step is None:
78caa52a 1322 yield 's[%d]' % i
edf3e38e
PH
1323 else:
1324 yield _genslice(start, i, step)
1325
78caa52a 1326 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1327 cache_res = func(test_string)
edf3e38e 1328 cache_spec = [ord(c) for c in cache_res]
78caa52a 1329 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1330 signature_id_tuple = '(%s)' % (
1331 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1332 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1333 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1334 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1335
e0df6211
PH
1336 def _parse_sig_js(self, jscode):
1337 funcname = self._search_regex(
abefc03f
S
1338 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1340 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1341 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1342 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1343 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1344 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1345 # Obsolete patterns
1346 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1347 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1348 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1354 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1355
1356 jsi = JSInterpreter(jscode)
1357 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1358 return lambda s: initial_function([s])
1359
545cc85d 1360 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1361 """Turn the encrypted s field into a working signature"""
6b37f0be 1362
c8bf86d5 1363 if player_url is None:
69ea8ca4 1364 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1365
69ea8ca4 1366 if player_url.startswith('//'):
78caa52a 1367 player_url = 'https:' + player_url
3c90cc8b
S
1368 elif not re.match(r'https?://', player_url):
1369 player_url = compat_urlparse.urljoin(
1370 'https://www.youtube.com', player_url)
c8bf86d5 1371 try:
62af3a0e 1372 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1373 if player_id not in self._player_cache:
1374 func = self._extract_signature_function(
60064c53 1375 video_id, player_url, s
c8bf86d5
PH
1376 )
1377 self._player_cache[player_id] = func
1378 func = self._player_cache[player_id]
1379 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1380 self._print_sig_code(func, s)
c8bf86d5
PH
1381 return func(s)
1382 except Exception as e:
1383 tb = traceback.format_exc()
1384 raise ExtractorError(
78caa52a 1385 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1386
545cc85d 1387 def _mark_watched(self, video_id, player_response):
21c340b8
S
1388 playback_url = url_or_none(try_get(
1389 player_response,
545cc85d 1390 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1391 if not playback_url:
1392 return
1393 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1394 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1395
1396 # cpn generation algorithm is reverse engineered from base.js.
1397 # In fact it works even with dummy cpn.
1398 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1399 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1400
1401 qs.update({
1402 'ver': ['2'],
1403 'cpn': [cpn],
1404 })
1405 playback_url = compat_urlparse.urlunparse(
15707c7e 1406 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1407
1408 self._download_webpage(
1409 playback_url, video_id, 'Marking watched',
1410 'Unable to mark watched', fatal=False)
1411
66c9fa36
S
1412 @staticmethod
1413 def _extract_urls(webpage):
1414 # Embedded YouTube player
1415 entries = [
1416 unescapeHTML(mobj.group('url'))
1417 for mobj in re.finditer(r'''(?x)
1418 (?:
1419 <iframe[^>]+?src=|
1420 data-video-url=|
1421 <embed[^>]+?src=|
1422 embedSWF\(?:\s*|
1423 <object[^>]+data=|
1424 new\s+SWFObject\(
1425 )
1426 (["\'])
1427 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1428 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1429 \1''', webpage)]
1430
1431 # lazyYT YouTube embed
1432 entries.extend(list(map(
1433 unescapeHTML,
1434 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1435
1436 # Wordpress "YouTube Video Importer" plugin
1437 matches = re.findall(r'''(?x)<div[^>]+
1438 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1439 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1440 entries.extend(m[-1] for m in matches)
1441
1442 return entries
1443
1444 @staticmethod
1445 def _extract_url(webpage):
1446 urls = YoutubeIE._extract_urls(webpage)
1447 return urls[0] if urls else None
1448
97665381
PH
1449 @classmethod
1450 def extract_id(cls, url):
1451 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1452 if mobj is None:
69ea8ca4 1453 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1454 video_id = mobj.group(2)
1455 return video_id
1456
545cc85d 1457 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1458 chapters_list = try_get(
8bdd16b4 1459 data,
84213ea8
S
1460 lambda x: x['playerOverlays']
1461 ['playerOverlayRenderer']
1462 ['decoratedPlayerBarRenderer']
1463 ['decoratedPlayerBarRenderer']
1464 ['playerBar']
1465 ['chapteredPlayerBarRenderer']
1466 ['chapters'],
1467 list)
1468 if not chapters_list:
1469 return
1470
1471 def chapter_time(chapter):
1472 return float_or_none(
1473 try_get(
1474 chapter,
1475 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1476 int),
1477 scale=1000)
1478 chapters = []
1479 for next_num, chapter in enumerate(chapters_list, start=1):
1480 start_time = chapter_time(chapter)
1481 if start_time is None:
1482 continue
1483 end_time = (chapter_time(chapters_list[next_num])
1484 if next_num < len(chapters_list) else duration)
1485 if end_time is None:
1486 continue
1487 title = try_get(
1488 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1489 compat_str)
1490 chapters.append({
1491 'start_time': start_time,
1492 'end_time': end_time,
1493 'title': title,
1494 })
1495 return chapters
1496
545cc85d 1497 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1498 return self._parse_json(self._search_regex(
1499 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1500 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1501
a1c5d2ca
M
1502 @staticmethod
1503 def _join_text_entries(runs):
1504 text = None
1505 for run in runs:
1506 if not isinstance(run, dict):
1507 continue
1508 sub_text = try_get(run, lambda x: x['text'], compat_str)
1509 if sub_text:
1510 if not text:
1511 text = sub_text
1512 continue
1513 text += sub_text
1514 return text
1515
1516 def _extract_comment(self, comment_renderer, parent=None):
1517 comment_id = comment_renderer.get('commentId')
1518 if not comment_id:
1519 return
1520 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1521 text = self._join_text_entries(comment_text_runs) or ''
1522 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1523 time_text = self._join_text_entries(comment_time_text)
1524
1525 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1526 author_id = try_get(comment_renderer,
1527 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1528 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1529 lambda x: x['likeCount']), compat_str)) or 0
1530 author_thumbnail = try_get(comment_renderer,
1531 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1532
1533 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1534 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
1535
1536 return {
1537 'id': comment_id,
1538 'text': text,
1539 # TODO: This should be parsed to timestamp
1540 'time_text': time_text,
1541 'like_count': votes,
1542 'is_favorited': is_liked,
1543 'author': author,
1544 'author_id': author_id,
1545 'author_thumbnail': author_thumbnail,
1546 'author_is_uploader': author_is_uploader,
1547 'parent': parent or 'root'
1548 }
1549
1550 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
1551 session_token_list, parent=None, comment_counts=None):
1552
1553 def extract_thread(parent_renderer):
1554 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1555 if not parent:
1556 comment_counts[2] = 0
1557 for content in contents:
1558 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1559 comment_renderer = try_get(
1560 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1561 content, (lambda x: x['commentRenderer'], dict))
1562
1563 if not comment_renderer:
1564 continue
1565 comment = self._extract_comment(comment_renderer, parent)
1566 if not comment:
1567 continue
1568 comment_counts[0] += 1
1569 yield comment
1570 # Attempt to get the replies
1571 comment_replies_renderer = try_get(
1572 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1573
1574 if comment_replies_renderer:
1575 comment_counts[2] += 1
1576 comment_entries_iter = self._comment_entries(
1577 comment_replies_renderer, identity_token, account_syncid,
1578 parent=comment.get('id'), session_token_list=session_token_list,
1579 comment_counts=comment_counts)
1580
1581 for reply_comment in comment_entries_iter:
1582 yield reply_comment
1583
1584 if not comment_counts:
1585 # comment so far, est. total comments, current comment thread #
1586 comment_counts = [0, 0, 0]
1587 headers = self._DEFAULT_BASIC_API_HEADERS.copy()
1588
1589 # TODO: Generalize the download code with TabIE
1590 if identity_token:
1591 headers['x-youtube-identity-token'] = identity_token
1592
1593 if account_syncid:
1594 headers['X-Goog-PageId'] = account_syncid
1595 headers['X-Goog-AuthUser'] = 0
1596
1597 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1598 first_continuation = False
1599 if parent is None:
1600 first_continuation = True
1601
1602 for page_num in itertools.count(0):
1603 if not continuation:
1604 break
1605 retries = self._downloader.params.get('extractor_retries', 3)
1606 count = -1
1607 last_error = None
1608
1609 while count < retries:
1610 count += 1
1611 if last_error:
1612 self.report_warning('%s. Retrying ...' % last_error)
1613 try:
1614 query = {
1615 'ctoken': continuation['ctoken'],
1616 'pbj': 1,
1617 'type': 'next',
1618 }
1619 if parent:
1620 query['action_get_comment_replies'] = 1
1621 else:
1622 query['action_get_comments'] = 1
1623
1624 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1625 if page_num == 0:
1626 if first_continuation:
1627 note_prefix = "Downloading initial comment continuation page"
1628 else:
1629 note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
1630 else:
1631 note_prefix = "%sDownloading comment%s page %d %s" % (
1632 " " if parent else "",
1633 ' replies' if parent else '',
1634 page_num,
1635 comment_prog_str)
1636
1637 browse = self._download_json(
1638 'https://www.youtube.com/comment_service_ajax', None,
1639 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1640 headers=headers, query=query,
1641 data=urlencode_postdata({
1642 'session_token': session_token_list[0]
1643 }))
1644 except ExtractorError as e:
1645 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1646 if e.cause.code == 413:
1647 self.report_warning("Assumed end of comments (received HTTP Error 413)")
1648 return
1649 # Downloading page may result in intermittent 5xx HTTP error
1650 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1651 last_error = 'HTTP Error %s' % e.cause.code
1652 if e.cause.code == 404:
1653 last_error = last_error + " (this API is probably deprecated)"
1654 if count < retries:
1655 continue
1656 raise
1657 else:
1658 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1659 if session_token:
1660 session_token_list[0] = session_token
1661
1662 response = try_get(browse,
1663 (lambda x: x['response'],
1664 lambda x: x[1]['response'])) or {}
1665
1666 if response.get('continuationContents'):
1667 break
1668
1669 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1670 if browse.get('reload'):
1671 raise ExtractorError("Invalid or missing params in continuation request", expected=False)
1672
1673 # TODO: not tested, merged from old extractor
1674 err_msg = browse.get('externalErrorMessage')
1675 if err_msg:
1676 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1677
1678 # Youtube sometimes sends incomplete data
1679 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1680 last_error = 'Incomplete data received'
1681 if count >= retries:
1682 self._downloader.report_error(last_error)
1683
1684 if not response:
1685 break
1686
1687 known_continuation_renderers = {
1688 'itemSectionContinuation': extract_thread,
1689 'commentRepliesContinuation': extract_thread
1690 }
1691
1692 # extract next root continuation from the results
1693 continuation_contents = try_get(
1694 response, lambda x: x['continuationContents'], dict) or {}
1695
1696 for key, value in continuation_contents.items():
1697 if key not in known_continuation_renderers:
1698 continue
1699 continuation_renderer = value
1700
1701 if first_continuation:
1702 first_continuation = False
1703 expected_comment_count = try_get(
1704 continuation_renderer,
1705 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1706 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1707 compat_str)
1708
1709 if expected_comment_count:
1710 comment_counts[1] = str_to_int(expected_comment_count)
1711 self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
1712 yield comment_counts[1]
1713
1714 # TODO: cli arg.
1715 # 1/True for newest, 0/False for popular (default)
1716 comment_sort_index = int(True)
1717 sort_continuation_renderer = try_get(
1718 continuation_renderer,
1719 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1720 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1721 # If this fails, the initial continuation page
1722 # starts off with popular anyways.
1723 if sort_continuation_renderer:
1724 continuation = YoutubeTabIE._build_continuation_query(
1725 continuation=sort_continuation_renderer.get('continuation'),
1726 ctp=sort_continuation_renderer.get('clickTrackingParams'))
1727 self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
1728 break
1729
1730 for entry in known_continuation_renderers[key](continuation_renderer):
1731 yield entry
1732
1733 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1734 break
1735
1736 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1737 """Entry for comment extraction"""
1738 comments = []
1739 known_entry_comment_renderers = (
1740 'itemSectionRenderer',
1741 )
1742 estimated_total = 0
1743 for entry in contents:
1744 for key, renderer in entry.items():
1745 if key not in known_entry_comment_renderers:
1746 continue
1747
1748 comment_iter = self._comment_entries(
1749 renderer,
1750 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1751 account_syncid=self._extract_account_syncid(ytcfg),
1752 session_token_list=[xsrf_token])
1753
1754 for comment in comment_iter:
1755 if isinstance(comment, int):
1756 estimated_total = comment
1757 continue
1758 comments.append(comment)
1759 break
1760 self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
1761 return {
1762 'comments': comments,
1763 'comment_count': len(comments),
1764 }
1765
c5e8d7af 1766 def _real_extract(self, url):
cf7e015f 1767 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1768 video_id = self._match_id(url)
1769 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1770 webpage_url = base_url + 'watch?v=' + video_id
1771 webpage = self._download_webpage(
cce889b9 1772 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1773
1774 player_response = None
1775 if webpage:
1776 player_response = self._extract_yt_initial_variable(
1777 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1778 video_id, 'initial player response')
1779 if not player_response:
1780 player_response = self._call_api(
1781 'player', {'videoId': video_id}, video_id)
1782
1783 playability_status = player_response.get('playabilityStatus') or {}
1784 if playability_status.get('reason') == 'Sign in to confirm your age':
1785 pr = self._parse_json(try_get(compat_parse_qs(
1786 self._download_webpage(
1787 base_url + 'get_video_info', video_id,
1788 'Refetching age-gated info webpage',
1789 'unable to download video info webpage', query={
1790 'video_id': video_id,
7c60c33e 1791 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1792 }, fatal=False)),
1793 lambda x: x['player_response'][0],
1794 compat_str) or '{}', video_id)
1795 if pr:
1796 player_response = pr
1797
1798 trailer_video_id = try_get(
1799 playability_status,
1800 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1801 compat_str)
1802 if trailer_video_id:
1803 return self.url_result(
1804 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1805
545cc85d 1806 def get_text(x):
1807 if not x:
c2d125d9 1808 return
545cc85d 1809 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1810
545cc85d 1811 search_meta = (
1812 lambda x: self._html_search_meta(x, webpage, default=None)) \
1813 if webpage else lambda x: None
dbdaaa23 1814
545cc85d 1815 video_details = player_response.get('videoDetails') or {}
37357d21 1816 microformat = try_get(
545cc85d 1817 player_response,
1818 lambda x: x['microformat']['playerMicroformatRenderer'],
1819 dict) or {}
1820 video_title = video_details.get('title') \
1821 or get_text(microformat.get('title')) \
1822 or search_meta(['og:title', 'twitter:title', 'title'])
1823 video_description = video_details.get('shortDescription')
cf7e015f 1824
8fe10494 1825 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1826 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1827 multifeed_metadata_list = try_get(
1828 player_response,
1829 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1830 compat_str)
8fe10494
S
1831 if multifeed_metadata_list:
1832 entries = []
1833 feed_ids = []
1834 for feed in multifeed_metadata_list.split(','):
1835 # Unquote should take place before split on comma (,) since textual
1836 # fields may contain comma as well (see
067aa17e 1837 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1838 feed_data = compat_parse_qs(
1839 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1840
1841 def feed_entry(name):
545cc85d 1842 return try_get(
1843 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1844
1845 feed_id = feed_entry('id')
1846 if not feed_id:
1847 continue
1848 feed_title = feed_entry('title')
1849 title = video_title
1850 if feed_title:
1851 title += ' (%s)' % feed_title
8fe10494
S
1852 entries.append({
1853 '_type': 'url_transparent',
1854 'ie_key': 'Youtube',
1855 'url': smuggle_url(
545cc85d 1856 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1857 {'force_singlefeed': True}),
6b09401b 1858 'title': title,
8fe10494 1859 })
6b09401b 1860 feed_ids.append(feed_id)
8fe10494
S
1861 self.to_screen(
1862 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1863 % (', '.join(feed_ids), video_id))
545cc85d 1864 return self.playlist_result(
1865 entries, video_id, video_title, video_description)
8fe10494
S
1866 else:
1867 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1868
545cc85d 1869 formats = []
1870 itags = []
cc2db878 1871 itag_qualities = {}
545cc85d 1872 player_url = None
dca3ff4a 1873 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1874 streaming_data = player_response.get('streamingData') or {}
1875 streaming_formats = streaming_data.get('formats') or []
1876 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1877 for fmt in streaming_formats:
1878 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1879 continue
321bf820 1880
cc2db878 1881 itag = str_or_none(fmt.get('itag'))
1882 quality = fmt.get('quality')
1883 if itag and quality:
1884 itag_qualities[itag] = quality
1885 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1886 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1887 # number of fragment that would subsequently requested with (`&sq=N`)
1888 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1889 continue
1890
545cc85d 1891 fmt_url = fmt.get('url')
1892 if not fmt_url:
1893 sc = compat_parse_qs(fmt.get('signatureCipher'))
1894 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1895 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1896 if not (sc and fmt_url and encrypted_sig):
1897 continue
1898 if not player_url:
1899 if not webpage:
1900 continue
1901 player_url = self._search_regex(
1902 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1903 webpage, 'player URL', fatal=False)
1904 if not player_url:
201e9eaa 1905 continue
545cc85d 1906 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1907 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1908 fmt_url += '&' + sp + '=' + signature
1909
545cc85d 1910 if itag:
1911 itags.append(itag)
cc2db878 1912 tbr = float_or_none(
1913 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1914 dct = {
1915 'asr': int_or_none(fmt.get('audioSampleRate')),
1916 'filesize': int_or_none(fmt.get('contentLength')),
1917 'format_id': itag,
1918 'format_note': fmt.get('qualityLabel') or quality,
1919 'fps': int_or_none(fmt.get('fps')),
1920 'height': int_or_none(fmt.get('height')),
dca3ff4a 1921 'quality': q(quality),
cc2db878 1922 'tbr': tbr,
545cc85d 1923 'url': fmt_url,
1924 'width': fmt.get('width'),
1925 }
1926 mimetype = fmt.get('mimeType')
1927 if mimetype:
1928 mobj = re.match(
1929 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1930 if mobj:
1931 dct['ext'] = mimetype2ext(mobj.group(1))
1932 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1933 no_audio = dct.get('acodec') == 'none'
1934 no_video = dct.get('vcodec') == 'none'
1935 if no_audio:
1936 dct['vbr'] = tbr
1937 if no_video:
1938 dct['abr'] = tbr
1939 if no_audio or no_video:
545cc85d 1940 dct['downloader_options'] = {
1941 # Youtube throttles chunks >~10M
1942 'http_chunk_size': 10485760,
bf1317d2 1943 }
7c60c33e 1944 if dct.get('ext'):
1945 dct['container'] = dct['ext'] + '_dash'
545cc85d 1946 formats.append(dct)
1947
1948 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1949 if hls_manifest_url:
1950 for f in self._extract_m3u8_formats(
1951 hls_manifest_url, video_id, 'mp4', fatal=False):
1952 itag = self._search_regex(
1953 r'/itag/(\d+)', f['url'], 'itag', default=None)
1954 if itag:
1955 f['format_id'] = itag
1956 formats.append(f)
1957
1418a043 1958 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 1959 dash_manifest_url = streaming_data.get('dashManifestUrl')
1960 if dash_manifest_url:
545cc85d 1961 for f in self._extract_mpd_formats(
1962 dash_manifest_url, video_id, fatal=False):
cc2db878 1963 itag = f['format_id']
1964 if itag in itags:
1965 continue
dca3ff4a 1966 if itag in itag_qualities:
1967 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1968 # but kept to maintain feature parity (and code similarity) with youtube-dl
1969 # Remove if this causes any issues with sorting in future
1970 f['quality'] = q(itag_qualities[itag])
545cc85d 1971 filesize = int_or_none(self._search_regex(
1972 r'/clen/(\d+)', f.get('fragment_base_url')
1973 or f['url'], 'file size', default=None))
1974 if filesize:
1975 f['filesize'] = filesize
cc2db878 1976 formats.append(f)
bf1317d2 1977
545cc85d 1978 if not formats:
63ad4d43 1979 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1980 raise ExtractorError(
1981 'This video is DRM protected.', expected=True)
1982 pemr = try_get(
1983 playability_status,
1984 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1985 dict) or {}
1986 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1987 subreason = pemr.get('subreason')
1988 if subreason:
1989 subreason = clean_html(get_text(subreason))
1990 if subreason == 'The uploader has not made this video available in your country.':
1991 countries = microformat.get('availableCountries')
1992 if not countries:
1993 regions_allowed = search_meta('regionsAllowed')
1994 countries = regions_allowed.split(',') if regions_allowed else None
1995 self.raise_geo_restricted(
1996 subreason, countries)
1997 reason += '\n' + subreason
1998 if reason:
1999 raise ExtractorError(reason, expected=True)
bf1317d2 2000
545cc85d 2001 self._sort_formats(formats)
bf1317d2 2002
545cc85d 2003 keywords = video_details.get('keywords') or []
2004 if not keywords and webpage:
2005 keywords = [
2006 unescapeHTML(m.group('content'))
2007 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2008 for keyword in keywords:
2009 if keyword.startswith('yt:stretch='):
2010 w, h = keyword.split('=')[1].split(':')
2011 w, h = int(w), int(h)
2012 if w > 0 and h > 0:
2013 ratio = w / h
2014 for f in formats:
2015 if f.get('vcodec') != 'none':
2016 f['stretched_ratio'] = ratio
6449cd80 2017
545cc85d 2018 thumbnails = []
2019 for container in (video_details, microformat):
2020 for thumbnail in (try_get(
2021 container,
2022 lambda x: x['thumbnail']['thumbnails'], list) or []):
2023 thumbnail_url = thumbnail.get('url')
2024 if not thumbnail_url:
bf1317d2 2025 continue
545cc85d 2026 thumbnails.append({
2027 'height': int_or_none(thumbnail.get('height')),
2028 'url': thumbnail_url,
2029 'width': int_or_none(thumbnail.get('width')),
2030 })
2031 if thumbnails:
2032 break
a6211d23 2033 else:
545cc85d 2034 thumbnail = search_meta(['og:image', 'twitter:image'])
2035 if thumbnail:
2036 thumbnails = [{'url': thumbnail}]
2037
2038 category = microformat.get('category') or search_meta('genre')
2039 channel_id = video_details.get('channelId') \
2040 or microformat.get('externalChannelId') \
2041 or search_meta('channelId')
2042 duration = int_or_none(
2043 video_details.get('lengthSeconds')
2044 or microformat.get('lengthSeconds')) \
2045 or parse_duration(search_meta('duration'))
2046 is_live = video_details.get('isLive')
2047 owner_profile_url = microformat.get('ownerProfileUrl')
2048
2049 info = {
2050 'id': video_id,
2051 'title': self._live_title(video_title) if is_live else video_title,
2052 'formats': formats,
2053 'thumbnails': thumbnails,
2054 'description': video_description,
2055 'upload_date': unified_strdate(
2056 microformat.get('uploadDate')
2057 or search_meta('uploadDate')),
2058 'uploader': video_details['author'],
2059 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2060 'uploader_url': owner_profile_url,
2061 'channel_id': channel_id,
2062 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2063 'duration': duration,
2064 'view_count': int_or_none(
2065 video_details.get('viewCount')
2066 or microformat.get('viewCount')
2067 or search_meta('interactionCount')),
2068 'average_rating': float_or_none(video_details.get('averageRating')),
2069 'age_limit': 18 if (
2070 microformat.get('isFamilySafe') is False
2071 or search_meta('isFamilyFriendly') == 'false'
2072 or search_meta('og:restrictions:age') == '18+') else 0,
2073 'webpage_url': webpage_url,
2074 'categories': [category] if category else None,
2075 'tags': keywords,
2076 'is_live': is_live,
2077 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2078 'was_live': video_details.get('isLiveContent'),
545cc85d 2079 }
b477fc13 2080
545cc85d 2081 pctr = try_get(
2082 player_response,
2083 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2084 subtitles = {}
2085 if pctr:
2086 def process_language(container, base_url, lang_code, query):
2087 lang_subs = []
2088 for fmt in self._SUBTITLE_FORMATS:
2089 query.update({
2090 'fmt': fmt,
2091 })
2092 lang_subs.append({
2093 'ext': fmt,
2094 'url': update_url_query(base_url, query),
2095 })
2096 container[lang_code] = lang_subs
7e72694b 2097
545cc85d 2098 for caption_track in (pctr.get('captionTracks') or []):
2099 base_url = caption_track.get('baseUrl')
2100 if not base_url:
2101 continue
2102 if caption_track.get('kind') != 'asr':
2103 lang_code = caption_track.get('languageCode')
2104 if not lang_code:
2105 continue
2106 process_language(
2107 subtitles, base_url, lang_code, {})
2108 continue
2109 automatic_captions = {}
2110 for translation_language in (pctr.get('translationLanguages') or []):
2111 translation_language_code = translation_language.get('languageCode')
2112 if not translation_language_code:
2113 continue
2114 process_language(
2115 automatic_captions, base_url, translation_language_code,
2116 {'tlang': translation_language_code})
2117 info['automatic_captions'] = automatic_captions
2118 info['subtitles'] = subtitles
7e72694b 2119
545cc85d 2120 parsed_url = compat_urllib_parse_urlparse(url)
2121 for component in [parsed_url.fragment, parsed_url.query]:
2122 query = compat_parse_qs(component)
2123 for k, v in query.items():
2124 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2125 d_k += '_time'
2126 if d_k not in info and k in s_ks:
2127 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2128
2129 # Youtube Music Auto-generated description
822b9d9c 2130 if video_description:
38d70284 2131 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2132 if mobj:
822b9d9c
RA
2133 release_year = mobj.group('release_year')
2134 release_date = mobj.group('release_date')
2135 if release_date:
2136 release_date = release_date.replace('-', '')
2137 if not release_year:
545cc85d 2138 release_year = release_date[:4]
2139 info.update({
2140 'album': mobj.group('album'.strip()),
2141 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2142 'track': mobj.group('track').strip(),
2143 'release_date': release_date,
cc2db878 2144 'release_year': int_or_none(release_year),
545cc85d 2145 })
7e72694b 2146
545cc85d 2147 initial_data = None
2148 if webpage:
2149 initial_data = self._extract_yt_initial_variable(
2150 webpage, self._YT_INITIAL_DATA_RE, video_id,
2151 'yt initial data')
2152 if not initial_data:
2153 initial_data = self._call_api(
2154 'next', {'videoId': video_id}, video_id, fatal=False)
2155
2156 if not is_live:
2157 try:
2158 # This will error if there is no livechat
2159 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2160 info['subtitles']['live_chat'] = [{
394dcd44 2161 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2162 'video_id': video_id,
2163 'ext': 'json',
2164 'protocol': 'youtube_live_chat_replay',
2165 }]
2166 except (KeyError, IndexError, TypeError):
2167 pass
2168
2169 if initial_data:
2170 chapters = self._extract_chapters_from_json(
2171 initial_data, video_id, duration)
2172 if not chapters:
2173 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2174 contents = try_get(
2175 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2176 list)
2177 if not contents:
2178 continue
2179
2180 def chapter_time(mmlir):
2181 return parse_duration(
2182 get_text(mmlir.get('timeDescription')))
2183
2184 chapters = []
2185 for next_num, content in enumerate(contents, start=1):
2186 mmlir = content.get('macroMarkersListItemRenderer') or {}
2187 start_time = chapter_time(mmlir)
2188 end_time = chapter_time(try_get(
2189 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2190 if next_num < len(contents) else duration
2191 if start_time is None or end_time is None:
2192 continue
2193 chapters.append({
2194 'start_time': start_time,
2195 'end_time': end_time,
2196 'title': get_text(mmlir.get('title')),
2197 })
2198 if chapters:
2199 break
2200 if chapters:
2201 info['chapters'] = chapters
2202
2203 contents = try_get(
2204 initial_data,
2205 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2206 list) or []
2207 for content in contents:
2208 vpir = content.get('videoPrimaryInfoRenderer')
2209 if vpir:
2210 stl = vpir.get('superTitleLink')
2211 if stl:
2212 stl = get_text(stl)
2213 if try_get(
2214 vpir,
2215 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2216 info['location'] = stl
2217 else:
2218 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2219 if mobj:
2220 info.update({
2221 'series': mobj.group(1),
2222 'season_number': int(mobj.group(2)),
2223 'episode_number': int(mobj.group(3)),
2224 })
2225 for tlb in (try_get(
2226 vpir,
2227 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2228 list) or []):
2229 tbr = tlb.get('toggleButtonRenderer') or {}
2230 for getter, regex in [(
2231 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2232 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2233 lambda x: x['accessibility'],
2234 lambda x: x['accessibilityData']['accessibilityData'],
2235 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2236 label = (try_get(tbr, getter, dict) or {}).get('label')
2237 if label:
2238 mobj = re.match(regex, label)
2239 if mobj:
2240 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2241 break
2242 sbr_tooltip = try_get(
2243 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2244 if sbr_tooltip:
2245 like_count, dislike_count = sbr_tooltip.split(' / ')
2246 info.update({
2247 'like_count': str_to_int(like_count),
2248 'dislike_count': str_to_int(dislike_count),
2249 })
2250 vsir = content.get('videoSecondaryInfoRenderer')
2251 if vsir:
2252 info['channel'] = get_text(try_get(
2253 vsir,
2254 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2255 dict))
545cc85d 2256 rows = try_get(
2257 vsir,
2258 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2259 list) or []
2260 multiple_songs = False
2261 for row in rows:
2262 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2263 multiple_songs = True
2264 break
2265 for row in rows:
2266 mrr = row.get('metadataRowRenderer') or {}
2267 mrr_title = mrr.get('title')
2268 if not mrr_title:
2269 continue
2270 mrr_title = get_text(mrr['title'])
2271 mrr_contents_text = get_text(mrr['contents'][0])
2272 if mrr_title == 'License':
2273 info['license'] = mrr_contents_text
2274 elif not multiple_songs:
2275 if mrr_title == 'Album':
2276 info['album'] = mrr_contents_text
2277 elif mrr_title == 'Artist':
2278 info['artist'] = mrr_contents_text
2279 elif mrr_title == 'Song':
2280 info['track'] = mrr_contents_text
2281
2282 fallbacks = {
2283 'channel': 'uploader',
2284 'channel_id': 'uploader_id',
2285 'channel_url': 'uploader_url',
2286 }
2287 for to, frm in fallbacks.items():
2288 if not info.get(to):
2289 info[to] = info.get(frm)
2290
2291 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2292 v = info.get(s_k)
2293 if v:
2294 info[d_k] = v
b84071c0 2295
c224251a
M
2296 is_private = bool_or_none(video_details.get('isPrivate'))
2297 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2298 is_membersonly = None
b28f8d24 2299 is_premium = None
c224251a
M
2300 if initial_data and is_private is not None:
2301 is_membersonly = False
b28f8d24 2302 is_premium = False
c224251a
M
2303 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2304 for content in contents or []:
2305 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2306 for badge in badges or []:
2307 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2308 if label.lower() == 'members only':
2309 is_membersonly = True
2310 break
b28f8d24
M
2311 elif label.lower() == 'premium':
2312 is_premium = True
2313 break
2314 if is_membersonly or is_premium:
c224251a
M
2315 break
2316
2317 # TODO: Add this for playlists
2318 info['availability'] = self._availability(
2319 is_private=is_private,
b28f8d24 2320 needs_premium=is_premium,
c224251a
M
2321 needs_subscription=is_membersonly,
2322 needs_auth=info['age_limit'] >= 18,
2323 is_unlisted=None if is_private is None else is_unlisted)
2324
06167fbb 2325 # get xsrf for annotations or comments
2326 get_annotations = self._downloader.params.get('writeannotations', False)
2327 get_comments = self._downloader.params.get('getcomments', False)
2328 if get_annotations or get_comments:
29f7c58a 2329 xsrf_token = None
545cc85d 2330 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2331 if ytcfg:
2332 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2333 if not xsrf_token:
2334 xsrf_token = self._search_regex(
2335 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2336 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2337
2338 # annotations
06167fbb 2339 if get_annotations:
64b6a4e9
RA
2340 invideo_url = try_get(
2341 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2342 if xsrf_token and invideo_url:
29f7c58a 2343 xsrf_field_name = None
2344 if ytcfg:
2345 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2346 if not xsrf_field_name:
2347 xsrf_field_name = self._search_regex(
2348 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2349 webpage, 'xsrf field name',
29f7c58a 2350 group='xsrf_field_name', default='session_token')
8a784c74 2351 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2352 self._proto_relative_url(invideo_url),
2353 video_id, note='Downloading annotations',
2354 errnote='Unable to download video annotations', fatal=False,
2355 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2356
277d6ff5 2357 if get_comments:
a1c5d2ca 2358 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2359
545cc85d 2360 self.mark_watched(video_id, player_response)
d77ab8e2 2361
545cc85d 2362 return info
c5e8d7af 2363
5f6a1245 2364
8bdd16b4 2365class YoutubeTabIE(YoutubeBaseInfoExtractor):
2366 IE_DESC = 'YouTube.com tab'
70d5c17b 2367 _VALID_URL = r'''(?x)
2368 https?://
2369 (?:\w+\.)?
2370 (?:
2371 youtube(?:kids)?\.com|
2372 invidio\.us
2373 )/
2374 (?:
2375 (?:channel|c|user)/|
2376 (?P<not_channel>
9ba5705a 2377 feed/|hashtag/|
70d5c17b 2378 (?:playlist|watch)\?.*?\blist=
2379 )|
29f7c58a 2380 (?!(?:%s)\b) # Direct URLs
70d5c17b 2381 )
2382 (?P<id>[^/?\#&]+)
2383 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2384 IE_NAME = 'youtube:tab'
2385
81127aa5 2386 _TESTS = [{
8bdd16b4 2387 # playlists, multipage
2388 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2389 'playlist_mincount': 94,
2390 'info_dict': {
2391 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2392 'title': 'Игорь Клейнер - Playlists',
2393 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2394 'uploader': 'Игорь Клейнер',
2395 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2396 },
2397 }, {
2398 # playlists, multipage, different order
2399 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2400 'playlist_mincount': 94,
2401 'info_dict': {
2402 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2403 'title': 'Игорь Клейнер - Playlists',
2404 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2405 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2406 'uploader': 'Игорь Клейнер',
8bdd16b4 2407 },
2408 }, {
2409 # playlists, singlepage
2410 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2411 'playlist_mincount': 4,
2412 'info_dict': {
2413 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2414 'title': 'ThirstForScience - Playlists',
2415 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2416 'uploader': 'ThirstForScience',
2417 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2418 }
2419 }, {
2420 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2421 'only_matching': True,
2422 }, {
2423 # basic, single video playlist
0e30a7b9 2424 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2425 'info_dict': {
0e30a7b9 2426 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2427 'uploader': 'Sergey M.',
2428 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2429 'title': 'youtube-dl public playlist',
81127aa5 2430 },
0e30a7b9 2431 'playlist_count': 1,
9291475f 2432 }, {
8bdd16b4 2433 # empty playlist
0e30a7b9 2434 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2435 'info_dict': {
0e30a7b9 2436 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2437 'uploader': 'Sergey M.',
2438 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2439 'title': 'youtube-dl empty playlist',
9291475f
PH
2440 },
2441 'playlist_count': 0,
2442 }, {
8bdd16b4 2443 # Home tab
2444 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2445 'info_dict': {
8bdd16b4 2446 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2447 'title': 'lex will - Home',
2448 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2449 'uploader': 'lex will',
2450 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2451 },
8bdd16b4 2452 'playlist_mincount': 2,
9291475f 2453 }, {
8bdd16b4 2454 # Videos tab
2455 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2456 'info_dict': {
8bdd16b4 2457 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2458 'title': 'lex will - Videos',
2459 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2460 'uploader': 'lex will',
2461 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2462 },
8bdd16b4 2463 'playlist_mincount': 975,
9291475f 2464 }, {
8bdd16b4 2465 # Videos tab, sorted by popular
2466 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2467 'info_dict': {
8bdd16b4 2468 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2469 'title': 'lex will - Videos',
2470 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2471 'uploader': 'lex will',
2472 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2473 },
8bdd16b4 2474 'playlist_mincount': 199,
9291475f 2475 }, {
8bdd16b4 2476 # Playlists tab
2477 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2478 'info_dict': {
8bdd16b4 2479 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2480 'title': 'lex will - Playlists',
2481 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2482 'uploader': 'lex will',
2483 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2484 },
8bdd16b4 2485 'playlist_mincount': 17,
ac7553d0 2486 }, {
8bdd16b4 2487 # Community tab
2488 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2489 'info_dict': {
8bdd16b4 2490 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2491 'title': 'lex will - Community',
2492 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2493 'uploader': 'lex will',
2494 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2495 },
2496 'playlist_mincount': 18,
87dadd45 2497 }, {
8bdd16b4 2498 # Channels tab
2499 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2500 'info_dict': {
8bdd16b4 2501 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2502 'title': 'lex will - Channels',
2503 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2504 'uploader': 'lex will',
2505 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2506 },
deaec5af 2507 'playlist_mincount': 12,
6b08cdf6 2508 }, {
a0566bbf 2509 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2510 'only_matching': True,
2511 }, {
a0566bbf 2512 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2513 'only_matching': True,
2514 }, {
a0566bbf 2515 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2516 'only_matching': True,
2517 }, {
2518 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2519 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2520 'info_dict': {
2521 'title': '29C3: Not my department',
2522 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2523 'uploader': 'Christiaan008',
2524 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2525 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2526 },
2527 'playlist_count': 96,
2528 }, {
2529 'note': 'Large playlist',
2530 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2531 'info_dict': {
8bdd16b4 2532 'title': 'Uploads from Cauchemar',
2533 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2534 'uploader': 'Cauchemar',
2535 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2536 },
8bdd16b4 2537 'playlist_mincount': 1123,
2538 }, {
2539 # even larger playlist, 8832 videos
2540 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2541 'only_matching': True,
4b7df0d3
JMF
2542 }, {
2543 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2544 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2545 'info_dict': {
acf757f4
PH
2546 'title': 'Uploads from Interstellar Movie',
2547 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2548 'uploader': 'Interstellar Movie',
8bdd16b4 2549 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2550 },
481cc733 2551 'playlist_mincount': 21,
8bdd16b4 2552 }, {
2553 # https://github.com/ytdl-org/youtube-dl/issues/21844
2554 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2555 'info_dict': {
2556 'title': 'Data Analysis with Dr Mike Pound',
2557 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2558 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2559 'uploader': 'Computerphile',
deaec5af 2560 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2561 },
2562 'playlist_mincount': 11,
2563 }, {
a0566bbf 2564 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2565 'only_matching': True,
dacb3a86
S
2566 }, {
2567 # Playlist URL that does not actually serve a playlist
2568 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2569 'info_dict': {
2570 'id': 'FqZTN594JQw',
2571 'ext': 'webm',
2572 'title': "Smiley's People 01 detective, Adventure Series, Action",
2573 'uploader': 'STREEM',
2574 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2575 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2576 'upload_date': '20150526',
2577 'license': 'Standard YouTube License',
2578 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2579 'categories': ['People & Blogs'],
2580 'tags': list,
dbdaaa23 2581 'view_count': int,
dacb3a86
S
2582 'like_count': int,
2583 'dislike_count': int,
2584 },
2585 'params': {
2586 'skip_download': True,
2587 },
13a75688 2588 'skip': 'This video is not available.',
dacb3a86 2589 'add_ie': [YoutubeIE.ie_key()],
481cc733 2590 }, {
8bdd16b4 2591 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2592 'only_matching': True,
66b48727 2593 }, {
8bdd16b4 2594 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2595 'only_matching': True,
a0566bbf 2596 }, {
2597 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2598 'info_dict': {
2599 'id': '9Auq9mYxFEE',
2600 'ext': 'mp4',
deaec5af 2601 'title': compat_str,
a0566bbf 2602 'uploader': 'Sky News',
2603 'uploader_id': 'skynews',
2604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2605 'upload_date': '20191102',
deaec5af 2606 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2607 'categories': ['News & Politics'],
2608 'tags': list,
2609 'like_count': int,
2610 'dislike_count': int,
2611 },
2612 'params': {
2613 'skip_download': True,
2614 },
2615 }, {
2616 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2617 'info_dict': {
2618 'id': 'a48o2S1cPoo',
2619 'ext': 'mp4',
2620 'title': 'The Young Turks - Live Main Show',
2621 'uploader': 'The Young Turks',
2622 'uploader_id': 'TheYoungTurks',
2623 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2624 'upload_date': '20150715',
2625 'license': 'Standard YouTube License',
2626 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2627 'categories': ['News & Politics'],
2628 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2629 'like_count': int,
2630 'dislike_count': int,
2631 },
2632 'params': {
2633 'skip_download': True,
2634 },
2635 'only_matching': True,
2636 }, {
2637 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2638 'only_matching': True,
2639 }, {
2640 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2641 'only_matching': True,
3d3dddc9 2642 }, {
2643 'url': 'https://www.youtube.com/feed/trending',
2644 'only_matching': True,
2645 }, {
2646 # needs auth
2647 'url': 'https://www.youtube.com/feed/library',
2648 'only_matching': True,
2649 }, {
2650 # needs auth
2651 'url': 'https://www.youtube.com/feed/history',
2652 'only_matching': True,
2653 }, {
2654 # needs auth
2655 'url': 'https://www.youtube.com/feed/subscriptions',
2656 'only_matching': True,
2657 }, {
2658 # needs auth
2659 'url': 'https://www.youtube.com/feed/watch_later',
2660 'only_matching': True,
2661 }, {
2662 # no longer available?
2663 'url': 'https://www.youtube.com/feed/recommended',
2664 'only_matching': True,
29f7c58a 2665 }, {
2666 # inline playlist with not always working continuations
2667 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2668 'only_matching': True,
2669 }, {
2670 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2671 'only_matching': True,
2672 }, {
2673 'url': 'https://www.youtube.com/course',
2674 'only_matching': True,
2675 }, {
2676 'url': 'https://www.youtube.com/zsecurity',
2677 'only_matching': True,
2678 }, {
2679 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2680 'only_matching': True,
2681 }, {
2682 'url': 'https://www.youtube.com/TheYoungTurks/live',
2683 'only_matching': True,
39ed931e 2684 }, {
2685 'url': 'https://www.youtube.com/hashtag/cctv9',
2686 'info_dict': {
2687 'id': 'cctv9',
2688 'title': '#cctv9',
2689 },
2690 'playlist_mincount': 350,
29f7c58a 2691 }]
2692
2693 @classmethod
2694 def suitable(cls, url):
2695 return False if YoutubeIE.suitable(url) else super(
2696 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2697
2698 def _extract_channel_id(self, webpage):
2699 channel_id = self._html_search_meta(
2700 'channelId', webpage, 'channel id', default=None)
2701 if channel_id:
2702 return channel_id
2703 channel_url = self._html_search_meta(
2704 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2705 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2706 'twitter:app:url:googleplay'), webpage, 'channel url')
2707 return self._search_regex(
2708 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2709 channel_url, 'channel id')
15f6397c 2710
8bdd16b4 2711 @staticmethod
cd7c66cf 2712 def _extract_basic_item_renderer(item):
2713 # Modified from _extract_grid_item_renderer
2714 known_renderers = (
e3c07697 2715 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2716 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2717 )
2718 for key, renderer in item.items():
2719 if key not in known_renderers:
2720 continue
2721 return renderer
8bdd16b4 2722
8bdd16b4 2723 def _grid_entries(self, grid_renderer):
2724 for item in grid_renderer['items']:
2725 if not isinstance(item, dict):
39b62db1 2726 continue
cd7c66cf 2727 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2728 if not isinstance(renderer, dict):
2729 continue
2730 title = try_get(
2731 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2732 # playlist
2733 playlist_id = renderer.get('playlistId')
2734 if playlist_id:
2735 yield self.url_result(
2736 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2737 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2738 video_title=title)
2739 # video
2740 video_id = renderer.get('videoId')
2741 if video_id:
2742 yield self._extract_video(renderer)
2743 # channel
2744 channel_id = renderer.get('channelId')
2745 if channel_id:
2746 title = try_get(
2747 renderer, lambda x: x['title']['simpleText'], compat_str)
2748 yield self.url_result(
2749 'https://www.youtube.com/channel/%s' % channel_id,
2750 ie=YoutubeTabIE.ie_key(), video_title=title)
2751
3d3dddc9 2752 def _shelf_entries_from_content(self, shelf_renderer):
2753 content = shelf_renderer.get('content')
2754 if not isinstance(content, dict):
8bdd16b4 2755 return
cd7c66cf 2756 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2757 if renderer:
2758 # TODO: add support for nested playlists so each shelf is processed
2759 # as separate playlist
2760 # TODO: this includes only first N items
2761 for entry in self._grid_entries(renderer):
2762 yield entry
2763 renderer = content.get('horizontalListRenderer')
2764 if renderer:
2765 # TODO
2766 pass
8bdd16b4 2767
29f7c58a 2768 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2769 ep = try_get(
2770 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2771 compat_str)
2772 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2773 if shelf_url:
29f7c58a 2774 # Skipping links to another channels, note that checking for
2775 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2776 # will not work
2777 if skip_channels and '/channels?' in shelf_url:
2778 return
3d3dddc9 2779 title = try_get(
2780 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2781 yield self.url_result(shelf_url, video_title=title)
2782 # Shelf may not contain shelf URL, fallback to extraction from content
2783 for entry in self._shelf_entries_from_content(shelf_renderer):
2784 yield entry
c5e8d7af 2785
8bdd16b4 2786 def _playlist_entries(self, video_list_renderer):
2787 for content in video_list_renderer['contents']:
2788 if not isinstance(content, dict):
2789 continue
2790 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2791 if not isinstance(renderer, dict):
2792 continue
2793 video_id = renderer.get('videoId')
2794 if not video_id:
2795 continue
2796 yield self._extract_video(renderer)
07aeced6 2797
3462ffa8 2798 def _rich_entries(self, rich_grid_renderer):
2799 renderer = try_get(
70d5c17b 2800 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2801 video_id = renderer.get('videoId')
2802 if not video_id:
2803 return
2804 yield self._extract_video(renderer)
2805
8bdd16b4 2806 def _video_entry(self, video_renderer):
2807 video_id = video_renderer.get('videoId')
2808 if video_id:
2809 return self._extract_video(video_renderer)
dacb3a86 2810
8bdd16b4 2811 def _post_thread_entries(self, post_thread_renderer):
2812 post_renderer = try_get(
2813 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2814 if not post_renderer:
2815 return
2816 # video attachment
2817 video_renderer = try_get(
2818 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2819 video_id = None
2820 if video_renderer:
2821 entry = self._video_entry(video_renderer)
2822 if entry:
2823 yield entry
2824 # inline video links
2825 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2826 for run in runs:
2827 if not isinstance(run, dict):
2828 continue
2829 ep_url = try_get(
2830 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2831 if not ep_url:
2832 continue
2833 if not YoutubeIE.suitable(ep_url):
2834 continue
2835 ep_video_id = YoutubeIE._match_id(ep_url)
2836 if video_id == ep_video_id:
2837 continue
2838 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2839
8bdd16b4 2840 def _post_thread_continuation_entries(self, post_thread_continuation):
2841 contents = post_thread_continuation.get('contents')
2842 if not isinstance(contents, list):
2843 return
2844 for content in contents:
2845 renderer = content.get('backstagePostThreadRenderer')
2846 if not isinstance(renderer, dict):
2847 continue
2848 for entry in self._post_thread_entries(renderer):
2849 yield entry
07aeced6 2850
39ed931e 2851 r''' # unused
2852 def _rich_grid_entries(self, contents):
2853 for content in contents:
2854 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2855 if video_renderer:
2856 entry = self._video_entry(video_renderer)
2857 if entry:
2858 yield entry
2859 '''
2860
29f7c58a 2861 @staticmethod
2862 def _build_continuation_query(continuation, ctp=None):
2863 query = {
2864 'ctoken': continuation,
2865 'continuation': continuation,
2866 }
2867 if ctp:
2868 query['itct'] = ctp
2869 return query
2870
8bdd16b4 2871 @staticmethod
2872 def _extract_next_continuation_data(renderer):
2873 next_continuation = try_get(
2874 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2875 if not next_continuation:
2876 return
2877 continuation = next_continuation.get('continuation')
2878 if not continuation:
2879 return
2880 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2881 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2882
8bdd16b4 2883 @classmethod
2884 def _extract_continuation(cls, renderer):
2885 next_continuation = cls._extract_next_continuation_data(renderer)
2886 if next_continuation:
2887 return next_continuation
cc2db878 2888 contents = []
2889 for key in ('contents', 'items'):
2890 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2891 for content in contents:
2892 if not isinstance(content, dict):
2893 continue
2894 continuation_ep = try_get(
2895 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2896 dict)
2897 if not continuation_ep:
2898 continue
2899 continuation = try_get(
2900 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2901 if not continuation:
2902 continue
2903 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2904 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2905
d069eca7 2906 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2907
70d5c17b 2908 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2909 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2910 for content in contents:
2911 if not isinstance(content, dict):
8bdd16b4 2912 continue
70d5c17b 2913 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2914 if not is_renderer:
70d5c17b 2915 renderer = content.get('richItemRenderer')
3462ffa8 2916 if renderer:
2917 for entry in self._rich_entries(renderer):
2918 yield entry
2919 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2920 continue
3462ffa8 2921 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2922 for isr_content in isr_contents:
2923 if not isinstance(isr_content, dict):
2924 continue
69184e41 2925
2926 known_renderers = {
2927 'playlistVideoListRenderer': self._playlist_entries,
2928 'gridRenderer': self._grid_entries,
2929 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2930 'backstagePostThreadRenderer': self._post_thread_entries,
2931 'videoRenderer': lambda x: [self._video_entry(x)],
2932 }
2933 for key, renderer in isr_content.items():
2934 if key not in known_renderers:
2935 continue
2936 for entry in known_renderers[key](renderer):
2937 if entry:
2938 yield entry
3462ffa8 2939 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2940 break
70d5c17b 2941
3462ffa8 2942 if not continuation_list[0]:
2943 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2944
2945 if not continuation_list[0]:
2946 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2947
2948 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2949 tab_content = try_get(tab, lambda x: x['content'], dict)
2950 if not tab_content:
2951 return
3462ffa8 2952 parent_renderer = (
29f7c58a 2953 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2954 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2955 for entry in extract_entries(parent_renderer):
2956 yield entry
3462ffa8 2957 continuation = continuation_list[0]
8bdd16b4 2958
2959 headers = {
2960 'x-youtube-client-name': '1',
2961 'x-youtube-client-version': '2.20201112.04.01',
2962 }
2963 if identity_token:
2964 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2965
d069eca7
M
2966 if account_syncid:
2967 headers['X-Goog-PageId'] = account_syncid
2968 headers['X-Goog-AuthUser'] = 0
2969
8bdd16b4 2970 for page_num in itertools.count(1):
2971 if not continuation:
2972 break
62bff2c1 2973 retries = self._downloader.params.get('extractor_retries', 3)
2974 count = -1
2975 last_error = None
2976 while count < retries:
2977 count += 1
2978 if last_error:
2979 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2980 try:
a5c56234
M
2981 response = self._call_api(
2982 ep="browse", fatal=True, headers=headers,
2983 video_id='%s page %s' % (item_id, page_num),
2984 query={
2985 'continuation': continuation['continuation'],
2986 'clickTracking': {'clickTrackingParams': continuation['itct']},
2987 },
2988 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2989 except ExtractorError as e:
62bff2c1 2990 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2991 # Downloading page may result in intermittent 5xx HTTP error
2992 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2993 last_error = 'HTTP Error %s' % e.cause.code
2994 if count < retries:
29f7c58a 2995 continue
2996 raise
62bff2c1 2997 else:
62bff2c1 2998 # Youtube sometimes sends incomplete data
2999 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 3000 if dict_get(response,
3001 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 3002 break
f3eaa8dd
M
3003
3004 # Youtube may send alerts if there was an issue with the continuation page
3005 self._extract_alerts(response, expected=False)
3006
3007 last_error = 'Incomplete data received'
c705177d 3008 if count >= retries:
3009 self._downloader.report_error(last_error)
a5c56234
M
3010
3011 if not response:
8bdd16b4 3012 break
ebf1b291 3013
69184e41 3014 known_continuation_renderers = {
3015 'playlistVideoListContinuation': self._playlist_entries,
3016 'gridContinuation': self._grid_entries,
3017 'itemSectionContinuation': self._post_thread_continuation_entries,
3018 'sectionListContinuation': extract_entries, # for feeds
3019 }
8bdd16b4 3020 continuation_contents = try_get(
69184e41 3021 response, lambda x: x['continuationContents'], dict) or {}
3022 continuation_renderer = None
3023 for key, value in continuation_contents.items():
3024 if key not in known_continuation_renderers:
3462ffa8 3025 continue
69184e41 3026 continuation_renderer = value
3027 continuation_list = [None]
3028 for entry in known_continuation_renderers[key](continuation_renderer):
3029 yield entry
3030 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3031 break
3032 if continuation_renderer:
3033 continue
c5e8d7af 3034
a1b535bd 3035 known_renderers = {
3036 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3037 'gridVideoRenderer': (self._grid_entries, 'items'),
3038 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3039 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3040 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3041 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3042 }
cce889b9 3043 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3044 continuation_items = try_get(
cce889b9 3045 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3046 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3047 video_items_renderer = None
3048 for key, value in continuation_item.items():
3049 if key not in known_renderers:
8bdd16b4 3050 continue
a1b535bd 3051 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3052 continuation_list = [None]
a1b535bd 3053 for entry in known_renderers[key][0](video_items_renderer):
3054 yield entry
9ba5705a 3055 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3056 break
3057 if video_items_renderer:
3058 continue
8bdd16b4 3059 break
9558dcec 3060
8bdd16b4 3061 @staticmethod
3062 def _extract_selected_tab(tabs):
3063 for tab in tabs:
3064 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3065 return tab['tabRenderer']
2b3c2546 3066 else:
8bdd16b4 3067 raise ExtractorError('Unable to find selected tab')
b82f815f 3068
8bdd16b4 3069 @staticmethod
3070 def _extract_uploader(data):
3071 uploader = {}
3072 sidebar_renderer = try_get(
3073 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3074 if sidebar_renderer:
3075 for item in sidebar_renderer:
3076 if not isinstance(item, dict):
3077 continue
3078 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3079 if not isinstance(renderer, dict):
3080 continue
3081 owner = try_get(
3082 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3083 if owner:
3084 uploader['uploader'] = owner.get('text')
3085 uploader['uploader_id'] = try_get(
3086 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3087 uploader['uploader_url'] = urljoin(
3088 'https://www.youtube.com/',
3089 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3090 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3091
d069eca7 3092 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3093 playlist_id = title = description = channel_url = channel_name = channel_id = None
3094 thumbnails_list = tags = []
3095
8bdd16b4 3096 selected_tab = self._extract_selected_tab(tabs)
3097 renderer = try_get(
3098 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3099 if renderer:
b60419c5 3100 channel_name = renderer.get('title')
3101 channel_url = renderer.get('channelUrl')
3102 channel_id = renderer.get('externalId')
39ed931e 3103 else:
64c0d954 3104 renderer = try_get(
3105 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3106
8bdd16b4 3107 if renderer:
3108 title = renderer.get('title')
ecc97af3 3109 description = renderer.get('description', '')
b60419c5 3110 playlist_id = channel_id
3111 tags = renderer.get('keywords', '').split()
3112 thumbnails_list = (
3113 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3114 or try_get(
3115 data,
3116 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3117 list)
b60419c5 3118 or [])
3119
3120 thumbnails = []
3121 for t in thumbnails_list:
3122 if not isinstance(t, dict):
3123 continue
3124 thumbnail_url = url_or_none(t.get('url'))
3125 if not thumbnail_url:
3126 continue
3127 thumbnails.append({
3128 'url': thumbnail_url,
3129 'width': int_or_none(t.get('width')),
3130 'height': int_or_none(t.get('height')),
3131 })
3462ffa8 3132 if playlist_id is None:
70d5c17b 3133 playlist_id = item_id
3134 if title is None:
39ed931e 3135 title = (
3136 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3137 or playlist_id)
b60419c5 3138 title += format_field(selected_tab, 'title', ' - %s')
3139
3140 metadata = {
3141 'playlist_id': playlist_id,
3142 'playlist_title': title,
3143 'playlist_description': description,
3144 'uploader': channel_name,
3145 'uploader_id': channel_id,
3146 'uploader_url': channel_url,
3147 'thumbnails': thumbnails,
3148 'tags': tags,
3149 }
3150 if not channel_id:
3151 metadata.update(self._extract_uploader(data))
3152 metadata.update({
3153 'channel': metadata['uploader'],
3154 'channel_id': metadata['uploader_id'],
3155 'channel_url': metadata['uploader_url']})
3156 return self.playlist_result(
d069eca7
M
3157 self._entries(
3158 selected_tab, playlist_id,
3159 self._extract_identity_token(webpage, item_id),
3160 self._extract_account_syncid(data)),
b60419c5 3161 **metadata)
73c4ac2c 3162
cd7c66cf 3163 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3164 first_id = last_id = None
3165 for page_num in itertools.count(1):
cd7c66cf 3166 videos = list(self._playlist_entries(playlist))
3167 if not videos:
3168 return
2be71994 3169 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3170 if start >= len(videos):
3171 return
3172 for video in videos[start:]:
3173 if video['id'] == first_id:
3174 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3175 return
3176 yield video
3177 first_id = first_id or videos[0]['id']
3178 last_id = videos[-1]['id']
cd7c66cf 3179
cd7c66cf 3180 _, data = self._extract_webpage(
2be71994 3181 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3182 '%s page %d' % (playlist_id, page_num))
3183 playlist = try_get(
3184 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3185
29f7c58a 3186 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3187 title = playlist.get('title') or try_get(
3188 data, lambda x: x['titleText']['simpleText'], compat_str)
3189 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3190
3191 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3192 playlist_url = urljoin(url, try_get(
3193 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3194 compat_str))
3195 if playlist_url and playlist_url != url:
3196 return self.url_result(
3197 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3198 video_title=title)
cd7c66cf 3199
8bdd16b4 3200 return self.playlist_result(
cd7c66cf 3201 self._extract_mix_playlist(playlist, playlist_id),
3202 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3203
f3eaa8dd
M
3204 def _extract_alerts(self, data, expected=False):
3205
3206 def _real_extract_alerts():
3207 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3208 if not isinstance(alert_dict, dict):
02ced43c 3209 continue
f3eaa8dd
M
3210 for alert in alert_dict.values():
3211 alert_type = alert.get('type')
3212 if not alert_type:
3213 continue
3214 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3215 if message:
3216 yield alert_type, message
f3eaa8dd
M
3217 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3218 message = try_get(run, lambda x: x['text'], compat_str)
3219 if message:
3220 yield alert_type, message
3221
3222 err_msg = None
3223 for alert_type, alert_message in _real_extract_alerts():
3224 if alert_type.lower() == 'error':
3225 if err_msg:
3226 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3227 err_msg = alert_message
3228 else:
3229 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3230
3231 if err_msg:
3232 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3233
cd7c66cf 3234 def _extract_webpage(self, url, item_id):
62bff2c1 3235 retries = self._downloader.params.get('extractor_retries', 3)
3236 count = -1
c705177d 3237 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3238 while count < retries:
62bff2c1 3239 count += 1
14fdfea9 3240 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3241 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3242 if count:
c705177d 3243 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3244 webpage = self._download_webpage(
3245 url, item_id,
cd7c66cf 3246 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3247 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3248 self._extract_alerts(data, expected=True)
14fdfea9 3249 if data.get('contents') or data.get('currentVideoEndpoint'):
3250 break
c705177d 3251 if count >= retries:
3252 self._downloader.report_error(last_error)
cd7c66cf 3253 return webpage, data
3254
3255 def _real_extract(self, url):
3256 item_id = self._match_id(url)
3257 url = compat_urlparse.urlunparse(
3258 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3259
3260 # This is not matched in a channel page with a tab selected
3261 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3262 mobj = mobj.groupdict() if mobj else {}
3263 if mobj and not mobj.get('not_channel'):
3264 self._downloader.report_warning(
3265 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3266 'To download only the videos in the home page, add a "/featured" to the URL')
3267 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3268
3269 # Handle both video/playlist URLs
3270 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3271 video_id = qs.get('v', [None])[0]
3272 playlist_id = qs.get('list', [None])[0]
3273
3274 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3275 if not playlist_id:
3276 # If there is neither video or playlist ids,
3277 # youtube redirects to home page, which is undesirable
3278 raise ExtractorError('Unable to recognize tab page')
3279 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3280 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3281
3282 if video_id and playlist_id:
3283 if self._downloader.params.get('noplaylist'):
3284 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3285 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3286 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3287
3288 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3289
8bdd16b4 3290 tabs = try_get(
3291 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3292 if tabs:
d069eca7 3293 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3294
8bdd16b4 3295 playlist = try_get(
3296 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3297 if playlist:
29f7c58a 3298 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3299
a0566bbf 3300 video_id = try_get(
3301 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3302 compat_str) or video_id
8bdd16b4 3303 if video_id:
cd7c66cf 3304 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3305 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3306
8bdd16b4 3307 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3308
c5e8d7af 3309
8bdd16b4 3310class YoutubePlaylistIE(InfoExtractor):
3311 IE_DESC = 'YouTube.com playlists'
3312 _VALID_URL = r'''(?x)(?:
3313 (?:https?://)?
3314 (?:\w+\.)?
3315 (?:
3316 (?:
3317 youtube(?:kids)?\.com|
29f7c58a 3318 invidio\.us
8bdd16b4 3319 )
3320 /.*?\?.*?\blist=
3321 )?
3322 (?P<id>%(playlist_id)s)
3323 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3324 IE_NAME = 'youtube:playlist'
cdc628a4 3325 _TESTS = [{
8bdd16b4 3326 'note': 'issue #673',
3327 'url': 'PLBB231211A4F62143',
cdc628a4 3328 'info_dict': {
8bdd16b4 3329 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3330 'id': 'PLBB231211A4F62143',
3331 'uploader': 'Wickydoo',
3332 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3333 },
3334 'playlist_mincount': 29,
3335 }, {
3336 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3337 'info_dict': {
3338 'title': 'YDL_safe_search',
3339 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3340 },
3341 'playlist_count': 2,
3342 'skip': 'This playlist is private',
9558dcec 3343 }, {
8bdd16b4 3344 'note': 'embedded',
3345 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3346 'playlist_count': 4,
9558dcec 3347 'info_dict': {
8bdd16b4 3348 'title': 'JODA15',
3349 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3350 'uploader': 'milan',
3351 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3352 }
cdc628a4 3353 }, {
8bdd16b4 3354 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3355 'playlist_mincount': 982,
3356 'info_dict': {
3357 'title': '2018 Chinese New Singles (11/6 updated)',
3358 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3359 'uploader': 'LBK',
3360 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3361 }
daa0df9e 3362 }, {
29f7c58a 3363 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3364 'only_matching': True,
3365 }, {
3366 # music album playlist
3367 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3368 'only_matching': True,
3369 }]
3370
3371 @classmethod
3372 def suitable(cls, url):
3373 return False if YoutubeTabIE.suitable(url) else super(
3374 YoutubePlaylistIE, cls).suitable(url)
3375
3376 def _real_extract(self, url):
3377 playlist_id = self._match_id(url)
3378 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3379 if not qs:
3380 qs = {'list': playlist_id}
3381 return self.url_result(
3382 update_url_query('https://www.youtube.com/playlist', qs),
3383 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3384
3385
3386class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3387 IE_DESC = 'youtu.be'
29f7c58a 3388 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3389 _TESTS = [{
8bdd16b4 3390 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3391 'info_dict': {
3392 'id': 'yeWKywCrFtk',
3393 'ext': 'mp4',
3394 'title': 'Small Scale Baler and Braiding Rugs',
3395 'uploader': 'Backus-Page House Museum',
3396 'uploader_id': 'backuspagemuseum',
3397 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3398 'upload_date': '20161008',
3399 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3400 'categories': ['Nonprofits & Activism'],
3401 'tags': list,
3402 'like_count': int,
3403 'dislike_count': int,
3404 },
3405 'params': {
3406 'noplaylist': True,
3407 'skip_download': True,
3408 },
39e7107d 3409 }, {
8bdd16b4 3410 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3411 'only_matching': True,
cdc628a4
PH
3412 }]
3413
8bdd16b4 3414 def _real_extract(self, url):
29f7c58a 3415 mobj = re.match(self._VALID_URL, url)
3416 video_id = mobj.group('id')
3417 playlist_id = mobj.group('playlist_id')
8bdd16b4 3418 return self.url_result(
29f7c58a 3419 update_url_query('https://www.youtube.com/watch', {
3420 'v': video_id,
3421 'list': playlist_id,
3422 'feature': 'youtu.be',
3423 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3424
3425
3426class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3427 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3428 _VALID_URL = r'ytuser:(?P<id>.+)'
3429 _TESTS = [{
3430 'url': 'ytuser:phihag',
3431 'only_matching': True,
3432 }]
3433
3434 def _real_extract(self, url):
3435 user_id = self._match_id(url)
3436 return self.url_result(
3437 'https://www.youtube.com/user/%s' % user_id,
3438 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3439
b05654f0 3440
3d3dddc9 3441class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3442 IE_NAME = 'youtube:favorites'
3443 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3444 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3445 _LOGIN_REQUIRED = True
3446 _TESTS = [{
3447 'url': ':ytfav',
3448 'only_matching': True,
3449 }, {
3450 'url': ':ytfavorites',
3451 'only_matching': True,
3452 }]
3453
3454 def _real_extract(self, url):
3455 return self.url_result(
3456 'https://www.youtube.com/playlist?list=LL',
3457 ie=YoutubeTabIE.ie_key())
3458
3459
8bdd16b4 3460class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3461 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3462 # there doesn't appear to be a real limit, for example if you search for
3463 # 'python' you get more than 8.000.000 results
3464 _MAX_RESULTS = float('inf')
78caa52a 3465 IE_NAME = 'youtube:search'
b05654f0 3466 _SEARCH_KEY = 'ytsearch'
6c894ea1 3467 _SEARCH_PARAMS = None
9dd8e46a 3468 _TESTS = []
b05654f0 3469
6c894ea1 3470 def _entries(self, query, n):
a5c56234 3471 data = {'query': query}
6c894ea1
U
3472 if self._SEARCH_PARAMS:
3473 data['params'] = self._SEARCH_PARAMS
3474 total = 0
3475 for page_num in itertools.count(1):
a5c56234
M
3476 search = self._call_api(
3477 ep='search', video_id='query "%s"' % query, fatal=False,
3478 note='Downloading page %s' % page_num, query=data)
6c894ea1 3479 if not search:
b4c08069 3480 break
6c894ea1
U
3481 slr_contents = try_get(
3482 search,
3483 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3484 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3485 list)
3486 if not slr_contents:
a22b2fd1 3487 break
0366ae87 3488
0366ae87
M
3489 # Youtube sometimes adds promoted content to searches,
3490 # changing the index location of videos and token.
3491 # So we search through all entries till we find them.
30a074c2 3492 continuation_token = None
3493 for slr_content in slr_contents:
a96c6d15 3494 if continuation_token is None:
3495 continuation_token = try_get(
3496 slr_content,
3497 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3498 compat_str)
3499
30a074c2 3500 isr_contents = try_get(
3501 slr_content,
3502 lambda x: x['itemSectionRenderer']['contents'],
3503 list)
9da76d30 3504 if not isr_contents:
30a074c2 3505 continue
3506 for content in isr_contents:
3507 if not isinstance(content, dict):
3508 continue
3509 video = content.get('videoRenderer')
3510 if not isinstance(video, dict):
3511 continue
3512 video_id = video.get('videoId')
3513 if not video_id:
3514 continue
3515
3516 yield self._extract_video(video)
3517 total += 1
3518 if total == n:
3519 return
0366ae87 3520
0366ae87 3521 if not continuation_token:
6c894ea1 3522 break
0366ae87 3523 data['continuation'] = continuation_token
b05654f0 3524
6c894ea1
U
3525 def _get_n_results(self, query, n):
3526 """Get a specified number of results for a query"""
3527 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3528
c9ae7b95 3529
a3dd9248 3530class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3531 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3532 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3533 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3534 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3535
c9ae7b95 3536
386e1dd9 3537class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3538 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3539 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3540 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3541 # _MAX_RESULTS = 100
3462ffa8 3542 _TESTS = [{
3543 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3544 'playlist_mincount': 5,
3545 'info_dict': {
3546 'title': 'youtube-dl test video',
3547 }
3548 }, {
3549 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3550 'only_matching': True,
3551 }]
3552
386e1dd9 3553 @classmethod
3554 def _make_valid_url(cls):
3555 return cls._VALID_URL
3556
3462ffa8 3557 def _real_extract(self, url):
386e1dd9 3558 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3559 query = (qs.get('search_query') or qs.get('q'))[0]
3560 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3561 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3562
3563
3564class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3565 """
25f14e9f 3566 Base class for feed extractors
3d3dddc9 3567 Subclasses must define the _FEED_NAME property.
d7ae0639 3568 """
b2e8bc1b 3569 _LOGIN_REQUIRED = True
ef2f3c7f 3570 _TESTS = []
d7ae0639
JMF
3571
3572 @property
3573 def IE_NAME(self):
78caa52a 3574 return 'youtube:%s' % self._FEED_NAME
04cc9617 3575
81f0259b 3576 def _real_initialize(self):
b2e8bc1b 3577 self._login()
81f0259b 3578
3853309f 3579 def _real_extract(self, url):
3d3dddc9 3580 return self.url_result(
3581 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3582 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3583
3584
ef2f3c7f 3585class YoutubeWatchLaterIE(InfoExtractor):
3586 IE_NAME = 'youtube:watchlater'
70d5c17b 3587 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3588 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3589 _TESTS = [{
8bdd16b4 3590 'url': ':ytwatchlater',
bc7a9cd8
S
3591 'only_matching': True,
3592 }]
25f14e9f
S
3593
3594 def _real_extract(self, url):
ef2f3c7f 3595 return self.url_result(
3596 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3597
3598
25f14e9f
S
3599class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3600 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3601 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3602 _FEED_NAME = 'recommended'
3d3dddc9 3603 _TESTS = [{
3604 'url': ':ytrec',
3605 'only_matching': True,
3606 }, {
3607 'url': ':ytrecommended',
3608 'only_matching': True,
3609 }, {
3610 'url': 'https://youtube.com',
3611 'only_matching': True,
3612 }]
1ed5b5c9 3613
1ed5b5c9 3614
25f14e9f 3615class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3616 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3617 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3618 _FEED_NAME = 'subscriptions'
3d3dddc9 3619 _TESTS = [{
3620 'url': ':ytsubs',
3621 'only_matching': True,
3622 }, {
3623 'url': ':ytsubscriptions',
3624 'only_matching': True,
3625 }]
1ed5b5c9 3626
1ed5b5c9 3627
25f14e9f 3628class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3629 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3630 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3631 _FEED_NAME = 'history'
3d3dddc9 3632 _TESTS = [{
3633 'url': ':ythistory',
3634 'only_matching': True,
3635 }]
1ed5b5c9
JMF
3636
3637
15870e90
PH
3638class YoutubeTruncatedURLIE(InfoExtractor):
3639 IE_NAME = 'youtube:truncated_url'
3640 IE_DESC = False # Do not list
975d35db 3641 _VALID_URL = r'''(?x)
b95aab84
PH
3642 (?:https?://)?
3643 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3644 (?:watch\?(?:
c4808c60 3645 feature=[a-z_]+|
b95aab84
PH
3646 annotation_id=annotation_[^&]+|
3647 x-yt-cl=[0-9]+|
c1708b89 3648 hl=[^&]*|
287be8c6 3649 t=[0-9]+
b95aab84
PH
3650 )?
3651 |
3652 attribution_link\?a=[^&]+
3653 )
3654 $
975d35db 3655 '''
15870e90 3656
c4808c60 3657 _TESTS = [{
2d3d2997 3658 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3659 'only_matching': True,
dc2fc736 3660 }, {
2d3d2997 3661 'url': 'https://www.youtube.com/watch?',
dc2fc736 3662 'only_matching': True,
b95aab84
PH
3663 }, {
3664 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3665 'only_matching': True,
3666 }, {
3667 'url': 'https://www.youtube.com/watch?feature=foo',
3668 'only_matching': True,
c1708b89
PH
3669 }, {
3670 'url': 'https://www.youtube.com/watch?hl=en-GB',
3671 'only_matching': True,
287be8c6
PH
3672 }, {
3673 'url': 'https://www.youtube.com/watch?t=2372',
3674 'only_matching': True,
c4808c60
PH
3675 }]
3676
15870e90
PH
3677 def _real_extract(self, url):
3678 raise ExtractorError(
78caa52a
PH
3679 'Did you forget to quote the URL? Remember that & is a meta '
3680 'character in most shells, so you want to put the URL in quotes, '
3867038a 3681 'like youtube-dl '
2d3d2997 3682 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3683 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3684 expected=True)
772fd5cc
PH
3685
3686
3687class YoutubeTruncatedIDIE(InfoExtractor):
3688 IE_NAME = 'youtube:truncated_id'
3689 IE_DESC = False # Do not list
b95aab84 3690 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3691
3692 _TESTS = [{
3693 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3694 'only_matching': True,
3695 }]
3696
3697 def _real_extract(self, url):
3698 video_id = self._match_id(url)
3699 raise ExtractorError(
3700 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3701 expected=True)