]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[downloaders] Fix API access
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
c5e8d7af 32 ExtractorError,
b60419c5 33 format_field,
2d30521a 34 float_or_none,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
6310acf5 37 parse_codecs,
7c80519c 38 parse_duration,
dca3ff4a 39 qualities,
3995d37d 40 remove_start,
cf7e015f 41 smuggle_url,
dbdaaa23 42 str_or_none,
c93d53f5 43 str_to_int,
556dbe7f 44 try_get,
c5e8d7af
PH
45 unescapeHTML,
46 unified_strdate,
cf7e015f 47 unsmuggle_url,
8bdd16b4 48 update_url_query,
21c340b8 49 url_or_none,
6e6bc8da 50 urlencode_postdata,
d92f5d5a 51 urljoin
c5e8d7af
PH
52)
53
5f6a1245 54
de7f3446 55class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
56 """Provide base functions for Youtube extractors"""
57 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 58 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
59
60 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
61 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
62 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 63
3462ffa8 64 _RESERVED_NAMES = (
cd7c66cf 65 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
66 r'movies|results|shared|hashtag|trending|feed|feeds|'
67 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 68
b2e8bc1b
JMF
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
72
70d5c17b 73 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d
S
129 def warn(message):
130 self._downloader.report_warning(message)
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
a1c5d2ca 287 _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
8bdd16b4 288 _DEFAULT_API_DATA = {
289 'context': {
290 'client': {
291 'clientName': 'WEB',
a1c5d2ca 292 'clientVersion': _YT_WEB_CLIENT_VERSION,
8bdd16b4 293 }
294 },
295 }
8377574c 296
a1c5d2ca
M
297 _DEFAULT_BASIC_API_HEADERS = {
298 'X-YouTube-Client-Name': '1',
299 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
300 }
301
a0566bbf 302 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 303 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
304 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 305
a5c56234
M
306 def _generate_sapisidhash_header(self):
307 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
308 if sapisid_cookie is None:
309 return
310 time_now = round(time.time())
311 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
312 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
313
314 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
315 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 316 data = self._DEFAULT_API_DATA.copy()
317 data.update(query)
a5c56234
M
318 headers = headers or {}
319 headers.update({'content-type': 'application/json'})
320 auth = self._generate_sapisidhash_header()
321 if auth is not None:
322 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 323 return self._download_json(
a5c56234
M
324 'https://www.youtube.com/youtubei/v1/%s' % ep,
325 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
326 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 327 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 328
8bdd16b4 329 def _extract_yt_initial_data(self, video_id, webpage):
330 return self._parse_json(
331 self._search_regex(
29f7c58a 332 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 333 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 334 video_id)
0c148415 335
a1c5d2ca
M
336 def _extract_identity_token(self, webpage, item_id):
337 ytcfg = self._extract_ytcfg(item_id, webpage)
338 if ytcfg:
339 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
340 if token:
341 return token
342 return self._search_regex(
343 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
344 'identity token', default=None)
345
346 @staticmethod
347 def _extract_account_syncid(data):
348 """Extract syncId required to download private playlists of secondary channels"""
349 sync_ids = (
350 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
351 or '').split("||")
352 if len(sync_ids) >= 2 and sync_ids[1]:
353 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
354 # and just "user_syncid||" for primary channel. We only want the channel_syncid
355 return sync_ids[0]
356
29f7c58a 357 def _extract_ytcfg(self, video_id, webpage):
358 return self._parse_json(
359 self._search_regex(
360 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
361 default='{}'), video_id, fatal=False)
362
30a074c2 363 def _extract_video(self, renderer):
364 video_id = renderer.get('videoId')
365 title = try_get(
366 renderer,
367 (lambda x: x['title']['runs'][0]['text'],
368 lambda x: x['title']['simpleText']), compat_str)
369 description = try_get(
370 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
371 compat_str)
372 duration = parse_duration(try_get(
373 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
374 view_count_text = try_get(
375 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
376 view_count = str_to_int(self._search_regex(
377 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
378 'view count', default=None))
379 uploader = try_get(
bc2ca1bb 380 renderer,
381 (lambda x: x['ownerText']['runs'][0]['text'],
382 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 383 return {
39ed931e 384 '_type': 'url',
30a074c2 385 'ie_key': YoutubeIE.ie_key(),
386 'id': video_id,
387 'url': video_id,
388 'title': title,
389 'description': description,
390 'duration': duration,
391 'view_count': view_count,
392 'uploader': uploader,
393 }
394
0c148415 395
360e1ca5 396class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 397 IE_DESC = 'YouTube.com'
bc2ca1bb 398 _INVIDIOUS_SITES = (
399 # invidious-redirect websites
400 r'(?:www\.)?redirect\.invidious\.io',
401 r'(?:(?:www|dev)\.)?invidio\.us',
402 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
403 r'(?:www\.)?invidious\.pussthecat\.org',
404 r'(?:www\.)?invidious\.048596\.xyz',
405 r'(?:www\.)?invidious\.zee\.li',
406 r'(?:www\.)?vid\.puffyan\.us',
407 r'(?:(?:www|au)\.)?ytprivate\.com',
408 r'(?:www\.)?invidious\.namazso\.eu',
409 r'(?:www\.)?invidious\.ethibox\.fr',
410 r'(?:www\.)?inv\.skyn3t\.in',
411 r'(?:www\.)?invidious\.himiko\.cloud',
412 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
413 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
414 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
415 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
416 # youtube-dl invidious instances list
417 r'(?:(?:www|no)\.)?invidiou\.sh',
418 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
419 r'(?:www\.)?invidious\.kabi\.tk',
420 r'(?:www\.)?invidious\.13ad\.de',
421 r'(?:www\.)?invidious\.mastodon\.host',
422 r'(?:www\.)?invidious\.zapashcanon\.fr',
423 r'(?:www\.)?invidious\.kavin\.rocks',
424 r'(?:www\.)?invidious\.tube',
425 r'(?:www\.)?invidiou\.site',
426 r'(?:www\.)?invidious\.site',
427 r'(?:www\.)?invidious\.xyz',
428 r'(?:www\.)?invidious\.nixnet\.xyz',
429 r'(?:www\.)?invidious\.drycat\.fr',
430 r'(?:www\.)?tube\.poal\.co',
431 r'(?:www\.)?tube\.connect\.cafe',
432 r'(?:www\.)?vid\.wxzm\.sx',
433 r'(?:www\.)?vid\.mint\.lgbt',
434 r'(?:www\.)?yewtu\.be',
435 r'(?:www\.)?yt\.elukerio\.org',
436 r'(?:www\.)?yt\.lelux\.fi',
437 r'(?:www\.)?invidious\.ggc-project\.de',
438 r'(?:www\.)?yt\.maisputain\.ovh',
439 r'(?:www\.)?invidious\.toot\.koeln',
440 r'(?:www\.)?invidious\.fdn\.fr',
441 r'(?:www\.)?watch\.nettohikari\.com',
442 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
443 r'(?:www\.)?qklhadlycap4cnod\.onion',
444 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
445 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
446 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
447 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
448 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
449 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
450 )
cb7dfeea 451 _VALID_URL = r"""(?x)^
c5e8d7af 452 (
edb53e2d 453 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 454 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
455 (?:www\.)?deturl\.com/www\.youtube\.com|
456 (?:www\.)?pwnyoutube\.com|
457 (?:www\.)?hooktube\.com|
458 (?:www\.)?yourepeat\.com|
459 tube\.majestyc\.net|
460 %(invidious)s|
461 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
462 (?:.*?\#/)? # handle anchor (#/) redirect urls
463 (?: # the various things that can precede the ID:
ac7553d0 464 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 465 |(?: # or the v= param in all its forms
f7000f3a 466 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 467 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 468 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
469 v=
470 )
f4b05232 471 ))
cbaed4bb
S
472 |(?:
473 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
474 vid\.plus| # or vid.plus/xxxx
475 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 476 %(invidious)s
cbaed4bb 477 )/
edb53e2d 478 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 479 )
c5e8d7af 480 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 481 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
482 (?!.*?\blist=
483 (?:
484 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
485 WL # WL are handled by the watch later IE
486 )
487 )
c5e8d7af 488 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 489 $""" % {
490 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
491 'invidious': '|'.join(_INVIDIOUS_SITES),
492 }
e40c758c 493 _PLAYER_INFO_RE = (
cc2db878 494 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
495 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 496 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 497 )
2c62dc26 498 _formats = {
c2d3cb4c 499 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
500 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
501 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
502 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
503 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
504 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
505 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
506 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 507 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 508 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
509 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
510 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
511 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
512 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
513 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 514 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 515 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
516 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 517
518
519 # 3D videos
c2d3cb4c 520 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
521 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
522 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
523 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 524 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
525 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
526 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 527
96fb5605 528 # Apple HTTP Live Streaming
11f12195 529 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 530 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
531 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
532 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
533 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
534 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 535 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
536 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
537
538 # DASH mp4 video
d23028a8
S
539 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
540 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
541 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
542 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
543 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 544 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
545 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
546 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
547 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
548 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
549 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
550 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 551
f6f1fc92 552 # Dash mp4 audio
d23028a8
S
553 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
554 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
555 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
556 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
557 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
558 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
559 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
560
561 # Dash webm
d23028a8
S
562 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
563 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
564 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
565 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
566 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
567 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
568 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
569 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
570 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
571 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
572 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
573 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
574 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
575 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
576 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 577 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
578 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
579 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
580 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
581 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
582 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
583 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
584
585 # Dash webm audio
d23028a8
S
586 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
587 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 588
0857baad 589 # Dash webm audio with opus inside
d23028a8
S
590 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
591 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
592 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 593
ce6b9a2d
PH
594 # RTMP (unnamed)
595 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
596
597 # av01 video only formats sometimes served with "unknown" codecs
598 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
599 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
600 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
601 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 602 }
29f7c58a 603 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 604
fd5c4aab
S
605 _GEO_BYPASS = False
606
78caa52a 607 IE_NAME = 'youtube'
2eb88d95
PH
608 _TESTS = [
609 {
2d3d2997 610 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
611 'info_dict': {
612 'id': 'BaW_jenozKc',
613 'ext': 'mp4',
3867038a 614 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
615 'uploader': 'Philipp Hagemeister',
616 'uploader_id': 'phihag',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
618 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
619 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 620 'upload_date': '20121002',
3867038a 621 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 622 'categories': ['Science & Technology'],
3867038a 623 'tags': ['youtube-dl'],
556dbe7f 624 'duration': 10,
dbdaaa23 625 'view_count': int,
3e7c1224
PH
626 'like_count': int,
627 'dislike_count': int,
7c80519c 628 'start_time': 1,
297a564b 629 'end_time': 9,
2eb88d95 630 }
0e853ca4 631 },
fccd3771 632 {
4bc3a23e
PH
633 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
634 'note': 'Embed-only video (#1746)',
635 'info_dict': {
636 'id': 'yZIXLfi8CZQ',
637 'ext': 'mp4',
638 'upload_date': '20120608',
639 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
640 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
641 'uploader': 'SET India',
94bfcd23 642 'uploader_id': 'setindia',
ec85ded8 643 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 644 'age_limit': 18,
545cc85d 645 },
646 'skip': 'Private video',
fccd3771 647 },
11b56058 648 {
8bdd16b4 649 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
650 'note': 'Use the first video ID in the URL',
651 'info_dict': {
652 'id': 'BaW_jenozKc',
653 'ext': 'mp4',
3867038a 654 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
655 'uploader': 'Philipp Hagemeister',
656 'uploader_id': 'phihag',
ec85ded8 657 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
11b56058
PM
664 'like_count': int,
665 'dislike_count': int,
34a7de29
S
666 },
667 'params': {
668 'skip_download': True,
669 },
11b56058 670 },
dd27fd17 671 {
2d3d2997 672 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
673 'note': '256k DASH audio (format 141) via DASH manifest',
674 'info_dict': {
675 'id': 'a9LDPn-MO4I',
676 'ext': 'm4a',
677 'upload_date': '20121002',
678 'uploader_id': '8KVIDEO',
ec85ded8 679 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
680 'description': '',
681 'uploader': '8KVIDEO',
682 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 683 },
4bc3a23e
PH
684 'params': {
685 'youtube_include_dash_manifest': True,
686 'format': '141',
4919603f 687 },
de3c7fe0 688 'skip': 'format 141 not served anymore',
dd27fd17 689 },
8bdd16b4 690 # DASH manifest with encrypted signature
691 {
692 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
693 'info_dict': {
694 'id': 'IB3lcPjvWLA',
695 'ext': 'm4a',
696 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
697 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
698 'duration': 244,
699 'uploader': 'AfrojackVEVO',
700 'uploader_id': 'AfrojackVEVO',
701 'upload_date': '20131011',
cc2db878 702 'abr': 129.495,
8bdd16b4 703 },
704 'params': {
705 'youtube_include_dash_manifest': True,
706 'format': '141/bestaudio[ext=m4a]',
707 },
708 },
aa79ac0c
PH
709 # Controversy video
710 {
711 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
712 'info_dict': {
713 'id': 'T4XJQO3qol8',
714 'ext': 'mp4',
556dbe7f 715 'duration': 219,
aa79ac0c 716 'upload_date': '20100909',
4fe54c12 717 'uploader': 'Amazing Atheist',
aa79ac0c 718 'uploader_id': 'TheAmazingAtheist',
ec85ded8 719 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 720 'title': 'Burning Everyone\'s Koran',
545cc85d 721 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 722 }
c522adb1 723 },
dd2d55f1 724 # Normal age-gate video (embed allowed)
c522adb1 725 {
2d3d2997 726 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
727 'info_dict': {
728 'id': 'HtVdAasjOgU',
729 'ext': 'mp4',
730 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 731 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 732 'duration': 142,
c522adb1
JMF
733 'uploader': 'The Witcher',
734 'uploader_id': 'WitcherGame',
ec85ded8 735 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 736 'upload_date': '20140605',
34952f09 737 'age_limit': 18,
c522adb1
JMF
738 },
739 },
8bdd16b4 740 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
741 # YouTube Red ad is not captured for creator
742 {
743 'url': '__2ABJjxzNo',
744 'info_dict': {
745 'id': '__2ABJjxzNo',
746 'ext': 'mp4',
747 'duration': 266,
748 'upload_date': '20100430',
749 'uploader_id': 'deadmau5',
750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 751 'creator': 'deadmau5',
752 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 753 'uploader': 'deadmau5',
754 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 755 'alt_title': 'Some Chords',
8bdd16b4 756 },
757 'expected_warnings': [
758 'DASH manifest missing',
759 ]
760 },
067aa17e 761 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
762 {
763 'url': 'lqQg6PlCWgI',
764 'info_dict': {
765 'id': 'lqQg6PlCWgI',
766 'ext': 'mp4',
556dbe7f 767 'duration': 6085,
90227264 768 'upload_date': '20150827',
cbe2bd91 769 'uploader_id': 'olympic',
ec85ded8 770 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 771 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 772 'uploader': 'Olympic',
cbe2bd91
PH
773 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
774 },
775 'params': {
776 'skip_download': 'requires avconv',
e52a40ab 777 }
cbe2bd91 778 },
6271f1ca
PH
779 # Non-square pixels
780 {
781 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
782 'info_dict': {
783 'id': '_b-2C3KPAM0',
784 'ext': 'mp4',
785 'stretched_ratio': 16 / 9.,
556dbe7f 786 'duration': 85,
6271f1ca
PH
787 'upload_date': '20110310',
788 'uploader_id': 'AllenMeow',
ec85ded8 789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 790 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 791 'uploader': '孫ᄋᄅ',
6271f1ca
PH
792 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
793 },
06b491eb
S
794 },
795 # url_encoded_fmt_stream_map is empty string
796 {
797 'url': 'qEJwOuvDf7I',
798 'info_dict': {
799 'id': 'qEJwOuvDf7I',
f57b7835 800 'ext': 'webm',
06b491eb
S
801 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
802 'description': '',
803 'upload_date': '20150404',
804 'uploader_id': 'spbelect',
805 'uploader': 'Наблюдатели Петербурга',
806 },
807 'params': {
808 'skip_download': 'requires avconv',
e323cf3f
S
809 },
810 'skip': 'This live event has ended.',
06b491eb 811 },
067aa17e 812 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
813 {
814 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
815 'info_dict': {
816 'id': 'FIl7x6_3R5Y',
eb6793ba 817 'ext': 'webm',
da77d856
S
818 'title': 'md5:7b81415841e02ecd4313668cde88737a',
819 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 820 'duration': 220,
da77d856
S
821 'upload_date': '20150625',
822 'uploader_id': 'dorappi2000',
ec85ded8 823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 824 'uploader': 'dorappi2000',
eb6793ba 825 'formats': 'mincount:31',
da77d856 826 },
eb6793ba 827 'skip': 'not actual anymore',
2ee8f5d8 828 },
8a1a26ce
YCH
829 # DASH manifest with segment_list
830 {
831 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
832 'md5': '8ce563a1d667b599d21064e982ab9e31',
833 'info_dict': {
834 'id': 'CsmdDsKjzN8',
835 'ext': 'mp4',
17ee98e1 836 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
837 'uploader': 'Airtek',
838 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
839 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
840 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
841 },
842 'params': {
843 'youtube_include_dash_manifest': True,
844 'format': '135', # bestvideo
be49068d
S
845 },
846 'skip': 'This live event has ended.',
2ee8f5d8 847 },
cf7e015f
S
848 {
849 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 850 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 851 'info_dict': {
545cc85d 852 'id': 'jvGDaLqkpTg',
853 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
854 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
855 },
856 'playlist': [{
857 'info_dict': {
545cc85d 858 'id': 'jvGDaLqkpTg',
cf7e015f 859 'ext': 'mp4',
545cc85d 860 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
861 'description': 'md5:e03b909557865076822aa169218d6a5d',
862 'duration': 10643,
863 'upload_date': '20161111',
864 'uploader': 'Team PGP',
865 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
866 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
867 },
868 }, {
869 'info_dict': {
545cc85d 870 'id': '3AKt1R1aDnw',
cf7e015f 871 'ext': 'mp4',
545cc85d 872 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
873 'description': 'md5:e03b909557865076822aa169218d6a5d',
874 'duration': 10991,
875 'upload_date': '20161111',
876 'uploader': 'Team PGP',
877 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
878 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
879 },
880 }, {
881 'info_dict': {
545cc85d 882 'id': 'RtAMM00gpVc',
cf7e015f 883 'ext': 'mp4',
545cc85d 884 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
885 'description': 'md5:e03b909557865076822aa169218d6a5d',
886 'duration': 10995,
887 'upload_date': '20161111',
888 'uploader': 'Team PGP',
889 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
891 },
892 }, {
893 'info_dict': {
545cc85d 894 'id': '6N2fdlP3C5U',
cf7e015f 895 'ext': 'mp4',
545cc85d 896 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
897 'description': 'md5:e03b909557865076822aa169218d6a5d',
898 'duration': 10990,
899 'upload_date': '20161111',
900 'uploader': 'Team PGP',
901 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
902 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
903 },
904 }],
905 'params': {
906 'skip_download': True,
907 },
cbaed4bb 908 },
f9f49d87 909 {
067aa17e 910 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
911 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
912 'info_dict': {
913 'id': 'gVfLd0zydlo',
914 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
915 },
916 'playlist_count': 2,
be49068d 917 'skip': 'Not multifeed anymore',
f9f49d87 918 },
cbaed4bb 919 {
2d3d2997 920 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 921 'only_matching': True,
0e49d9a6 922 },
6d4fc66b 923 {
2d3d2997 924 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
925 'only_matching': True,
926 },
0e49d9a6 927 {
067aa17e 928 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 929 # Also tests cut-off URL expansion in video description (see
067aa17e
S
930 # https://github.com/ytdl-org/youtube-dl/issues/1892,
931 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
932 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
933 'info_dict': {
934 'id': 'lsguqyKfVQg',
935 'ext': 'mp4',
936 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 937 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 938 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 939 'duration': 133,
0e49d9a6
LL
940 'upload_date': '20151119',
941 'uploader_id': 'IronSoulElf',
ec85ded8 942 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 943 'uploader': 'IronSoulElf',
eb6793ba
S
944 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
945 'track': 'Dark Walk - Position Music',
946 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 947 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
948 },
949 'params': {
950 'skip_download': True,
951 },
952 },
61f92af1 953 {
067aa17e 954 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
955 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
956 'only_matching': True,
957 },
313dfc45
LL
958 {
959 # Video with yt:stretch=17:0
960 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
961 'info_dict': {
962 'id': 'Q39EVAstoRM',
963 'ext': 'mp4',
964 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
965 'description': 'md5:ee18a25c350637c8faff806845bddee9',
966 'upload_date': '20151107',
967 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
968 'uploader': 'CH GAMER DROID',
969 },
970 'params': {
971 'skip_download': True,
972 },
be49068d 973 'skip': 'This video does not exist.',
313dfc45 974 },
7caf9830
S
975 {
976 # Video licensed under Creative Commons
977 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
978 'info_dict': {
979 'id': 'M4gD1WSo5mA',
980 'ext': 'mp4',
981 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
982 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 983 'duration': 721,
7caf9830
S
984 'upload_date': '20150127',
985 'uploader_id': 'BerkmanCenter',
ec85ded8 986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 987 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
988 'license': 'Creative Commons Attribution license (reuse allowed)',
989 },
990 'params': {
991 'skip_download': True,
992 },
993 },
fd050249
S
994 {
995 # Channel-like uploader_url
996 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
997 'info_dict': {
998 'id': 'eQcmzGIKrzg',
999 'ext': 'mp4',
1000 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1001 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1002 'duration': 4060,
fd050249 1003 'upload_date': '20151119',
eb6793ba 1004 'uploader': 'Bernie Sanders',
fd050249 1005 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1006 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1007 'license': 'Creative Commons Attribution license (reuse allowed)',
1008 },
1009 'params': {
1010 'skip_download': True,
1011 },
1012 },
040ac686
S
1013 {
1014 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1015 'only_matching': True,
7f29cf54
S
1016 },
1017 {
067aa17e 1018 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1019 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1020 'only_matching': True,
6496ccb4
S
1021 },
1022 {
1023 # Rental video preview
1024 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1025 'info_dict': {
1026 'id': 'uGpuVWrhIzE',
1027 'ext': 'mp4',
1028 'title': 'Piku - Trailer',
1029 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1030 'upload_date': '20150811',
1031 'uploader': 'FlixMatrix',
1032 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1033 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1034 'license': 'Standard YouTube License',
1035 },
1036 'params': {
1037 'skip_download': True,
1038 },
eb6793ba 1039 'skip': 'This video is not available.',
022a5d66 1040 },
12afdc2a
S
1041 {
1042 # YouTube Red video with episode data
1043 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1044 'info_dict': {
1045 'id': 'iqKdEhx-dD4',
1046 'ext': 'mp4',
1047 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1048 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1049 'duration': 2085,
12afdc2a
S
1050 'upload_date': '20170118',
1051 'uploader': 'Vsauce',
1052 'uploader_id': 'Vsauce',
1053 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1054 'series': 'Mind Field',
1055 'season_number': 1,
1056 'episode_number': 1,
1057 },
1058 'params': {
1059 'skip_download': True,
1060 },
1061 'expected_warnings': [
1062 'Skipping DASH manifest',
1063 ],
1064 },
c7121fa7
S
1065 {
1066 # The following content has been identified by the YouTube community
1067 # as inappropriate or offensive to some audiences.
1068 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1069 'info_dict': {
1070 'id': '6SJNVb0GnPI',
1071 'ext': 'mp4',
1072 'title': 'Race Differences in Intelligence',
1073 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1074 'duration': 965,
1075 'upload_date': '20140124',
1076 'uploader': 'New Century Foundation',
1077 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1078 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1079 },
1080 'params': {
1081 'skip_download': True,
1082 },
545cc85d 1083 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1084 },
022a5d66
S
1085 {
1086 # itag 212
1087 'url': '1t24XAntNCY',
1088 'only_matching': True,
fd5c4aab
S
1089 },
1090 {
1091 # geo restricted to JP
1092 'url': 'sJL6WA-aGkQ',
1093 'only_matching': True,
1094 },
cd5a74a2
S
1095 {
1096 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1097 'only_matching': True,
1098 },
bc2ca1bb 1099 {
1100 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1101 'only_matching': True,
1102 },
1103 {
1104 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1105 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1106 'only_matching': True,
1107 },
825cd268
RA
1108 {
1109 # DRM protected
1110 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1111 'only_matching': True,
4fe54c12
S
1112 },
1113 {
1114 # Video with unsupported adaptive stream type formats
1115 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1116 'info_dict': {
1117 'id': 'Z4Vy8R84T1U',
1118 'ext': 'mp4',
1119 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1120 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1121 'duration': 433,
1122 'upload_date': '20130923',
1123 'uploader': 'Amelia Putri Harwita',
1124 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1125 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1126 'formats': 'maxcount:10',
1127 },
1128 'params': {
1129 'skip_download': True,
1130 'youtube_include_dash_manifest': False,
1131 },
5429d6a9 1132 'skip': 'not actual anymore',
5caabd3c 1133 },
1134 {
822b9d9c 1135 # Youtube Music Auto-generated description
5caabd3c 1136 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1137 'info_dict': {
1138 'id': 'MgNrAu2pzNs',
1139 'ext': 'mp4',
1140 'title': 'Voyeur Girl',
1141 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1142 'upload_date': '20190312',
5429d6a9
S
1143 'uploader': 'Stephen - Topic',
1144 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1145 'artist': 'Stephen',
1146 'track': 'Voyeur Girl',
1147 'album': 'it\'s too much love to know my dear',
1148 'release_date': '20190313',
1149 'release_year': 2019,
1150 },
1151 'params': {
1152 'skip_download': True,
1153 },
1154 },
66b48727
RA
1155 {
1156 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1157 'only_matching': True,
1158 },
011e75e6
S
1159 {
1160 # invalid -> valid video id redirection
1161 'url': 'DJztXj2GPfl',
1162 'info_dict': {
1163 'id': 'DJztXj2GPfk',
1164 'ext': 'mp4',
1165 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1166 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1167 'upload_date': '20090125',
1168 'uploader': 'Prochorowka',
1169 'uploader_id': 'Prochorowka',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1171 'artist': 'Panjabi MC',
1172 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1173 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1174 },
1175 'params': {
1176 'skip_download': True,
1177 },
545cc85d 1178 'skip': 'Video unavailable',
ea74e00b
DP
1179 },
1180 {
1181 # empty description results in an empty string
1182 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1183 'info_dict': {
1184 'id': 'x41yOUIvK2k',
1185 'ext': 'mp4',
1186 'title': 'IMG 3456',
1187 'description': '',
1188 'upload_date': '20170613',
1189 'uploader_id': 'ElevageOrVert',
1190 'uploader': 'ElevageOrVert',
1191 },
1192 'params': {
1193 'skip_download': True,
1194 },
1195 },
a0566bbf 1196 {
29f7c58a 1197 # with '};' inside yt initial data (see [1])
1198 # see [2] for an example with '};' inside ytInitialPlayerResponse
1199 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1200 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1201 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1202 'info_dict': {
1203 'id': 'CHqg6qOn4no',
1204 'ext': 'mp4',
1205 'title': 'Part 77 Sort a list of simple types in c#',
1206 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1207 'upload_date': '20130831',
1208 'uploader_id': 'kudvenkat',
1209 'uploader': 'kudvenkat',
1210 },
1211 'params': {
1212 'skip_download': True,
1213 },
1214 },
29f7c58a 1215 {
1216 # another example of '};' in ytInitialData
1217 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1218 'only_matching': True,
1219 },
1220 {
1221 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1222 'only_matching': True,
1223 },
545cc85d 1224 {
cc2db878 1225 # https://github.com/ytdl-org/youtube-dl/pull/28094
1226 'url': 'OtqTfy26tG0',
1227 'info_dict': {
1228 'id': 'OtqTfy26tG0',
1229 'ext': 'mp4',
1230 'title': 'Burn Out',
1231 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1232 'upload_date': '20141120',
1233 'uploader': 'The Cinematic Orchestra - Topic',
1234 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1235 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1236 'artist': 'The Cinematic Orchestra',
1237 'track': 'Burn Out',
1238 'album': 'Every Day',
1239 'release_data': None,
1240 'release_year': None,
1241 },
1242 'params': {
1243 'skip_download': True,
1244 },
545cc85d 1245 },
bc2ca1bb 1246 {
1247 # controversial video, only works with bpctr when authenticated with cookies
1248 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1249 'only_matching': True,
1250 },
2eb88d95
PH
1251 ]
1252
e0df6211
PH
1253 def __init__(self, *args, **kwargs):
1254 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1255 self._code_cache = {}
83799698 1256 self._player_cache = {}
e0df6211 1257
60064c53
PH
1258 def _signature_cache_id(self, example_sig):
1259 """ Return a string representation of a signature """
78caa52a 1260 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1261
e40c758c
S
1262 @classmethod
1263 def _extract_player_info(cls, player_url):
1264 for player_re in cls._PLAYER_INFO_RE:
1265 id_m = re.search(player_re, player_url)
1266 if id_m:
1267 break
1268 else:
c081b35c 1269 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1270 return id_m.group('id')
e40c758c
S
1271
1272 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1273 player_id = self._extract_player_info(player_url)
e0df6211 1274
c4417ddb 1275 # Read from filesystem cache
545cc85d 1276 func_id = 'js_%s_%s' % (
1277 player_id, self._signature_cache_id(example_sig))
c4417ddb 1278 assert os.path.basename(func_id) == func_id
a0e07d31 1279
69ea8ca4 1280 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1281 if cache_spec is not None:
78caa52a 1282 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1283
545cc85d 1284 if player_id not in self._code_cache:
1285 self._code_cache[player_id] = self._download_webpage(
e0df6211 1286 player_url, video_id,
545cc85d 1287 note='Downloading player ' + player_id,
69ea8ca4 1288 errnote='Download of %s failed' % player_url)
545cc85d 1289 code = self._code_cache[player_id]
1290 res = self._parse_sig_js(code)
e0df6211 1291
785521bf
PH
1292 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1293 cache_res = res(test_string)
1294 cache_spec = [ord(c) for c in cache_res]
83799698 1295
69ea8ca4 1296 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1297 return res
1298
60064c53 1299 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1300 def gen_sig_code(idxs):
1301 def _genslice(start, end, step):
78caa52a 1302 starts = '' if start == 0 else str(start)
8bcc8756 1303 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1304 steps = '' if step == 1 else (':%d' % step)
78caa52a 1305 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1306
1307 step = None
7af808a5
PH
1308 # Quelch pyflakes warnings - start will be set when step is set
1309 start = '(Never used)'
edf3e38e
PH
1310 for i, prev in zip(idxs[1:], idxs[:-1]):
1311 if step is not None:
1312 if i - prev == step:
1313 continue
1314 yield _genslice(start, prev, step)
1315 step = None
1316 continue
1317 if i - prev in [-1, 1]:
1318 step = i - prev
1319 start = prev
1320 continue
1321 else:
78caa52a 1322 yield 's[%d]' % prev
edf3e38e 1323 if step is None:
78caa52a 1324 yield 's[%d]' % i
edf3e38e
PH
1325 else:
1326 yield _genslice(start, i, step)
1327
78caa52a 1328 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1329 cache_res = func(test_string)
edf3e38e 1330 cache_spec = [ord(c) for c in cache_res]
78caa52a 1331 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1332 signature_id_tuple = '(%s)' % (
1333 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1334 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1335 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1336 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1337
e0df6211
PH
1338 def _parse_sig_js(self, jscode):
1339 funcname = self._search_regex(
abefc03f
S
1340 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1342 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1343 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1344 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1345 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1346 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1347 # Obsolete patterns
1348 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1349 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1350 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1356 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1357
1358 jsi = JSInterpreter(jscode)
1359 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1360 return lambda s: initial_function([s])
1361
545cc85d 1362 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1363 """Turn the encrypted s field into a working signature"""
6b37f0be 1364
c8bf86d5 1365 if player_url is None:
69ea8ca4 1366 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1367
69ea8ca4 1368 if player_url.startswith('//'):
78caa52a 1369 player_url = 'https:' + player_url
3c90cc8b
S
1370 elif not re.match(r'https?://', player_url):
1371 player_url = compat_urlparse.urljoin(
1372 'https://www.youtube.com', player_url)
c8bf86d5 1373 try:
62af3a0e 1374 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1375 if player_id not in self._player_cache:
1376 func = self._extract_signature_function(
60064c53 1377 video_id, player_url, s
c8bf86d5
PH
1378 )
1379 self._player_cache[player_id] = func
1380 func = self._player_cache[player_id]
1381 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1382 self._print_sig_code(func, s)
c8bf86d5
PH
1383 return func(s)
1384 except Exception as e:
1385 tb = traceback.format_exc()
1386 raise ExtractorError(
78caa52a 1387 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1388
545cc85d 1389 def _mark_watched(self, video_id, player_response):
21c340b8
S
1390 playback_url = url_or_none(try_get(
1391 player_response,
545cc85d 1392 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1393 if not playback_url:
1394 return
1395 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1396 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1397
1398 # cpn generation algorithm is reverse engineered from base.js.
1399 # In fact it works even with dummy cpn.
1400 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1401 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1402
1403 qs.update({
1404 'ver': ['2'],
1405 'cpn': [cpn],
1406 })
1407 playback_url = compat_urlparse.urlunparse(
15707c7e 1408 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1409
1410 self._download_webpage(
1411 playback_url, video_id, 'Marking watched',
1412 'Unable to mark watched', fatal=False)
1413
66c9fa36
S
1414 @staticmethod
1415 def _extract_urls(webpage):
1416 # Embedded YouTube player
1417 entries = [
1418 unescapeHTML(mobj.group('url'))
1419 for mobj in re.finditer(r'''(?x)
1420 (?:
1421 <iframe[^>]+?src=|
1422 data-video-url=|
1423 <embed[^>]+?src=|
1424 embedSWF\(?:\s*|
1425 <object[^>]+data=|
1426 new\s+SWFObject\(
1427 )
1428 (["\'])
1429 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1430 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1431 \1''', webpage)]
1432
1433 # lazyYT YouTube embed
1434 entries.extend(list(map(
1435 unescapeHTML,
1436 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1437
1438 # Wordpress "YouTube Video Importer" plugin
1439 matches = re.findall(r'''(?x)<div[^>]+
1440 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1441 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1442 entries.extend(m[-1] for m in matches)
1443
1444 return entries
1445
1446 @staticmethod
1447 def _extract_url(webpage):
1448 urls = YoutubeIE._extract_urls(webpage)
1449 return urls[0] if urls else None
1450
97665381
PH
1451 @classmethod
1452 def extract_id(cls, url):
1453 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1454 if mobj is None:
69ea8ca4 1455 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1456 video_id = mobj.group(2)
1457 return video_id
1458
545cc85d 1459 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1460 chapters_list = try_get(
8bdd16b4 1461 data,
84213ea8
S
1462 lambda x: x['playerOverlays']
1463 ['playerOverlayRenderer']
1464 ['decoratedPlayerBarRenderer']
1465 ['decoratedPlayerBarRenderer']
1466 ['playerBar']
1467 ['chapteredPlayerBarRenderer']
1468 ['chapters'],
1469 list)
1470 if not chapters_list:
1471 return
1472
1473 def chapter_time(chapter):
1474 return float_or_none(
1475 try_get(
1476 chapter,
1477 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1478 int),
1479 scale=1000)
1480 chapters = []
1481 for next_num, chapter in enumerate(chapters_list, start=1):
1482 start_time = chapter_time(chapter)
1483 if start_time is None:
1484 continue
1485 end_time = (chapter_time(chapters_list[next_num])
1486 if next_num < len(chapters_list) else duration)
1487 if end_time is None:
1488 continue
1489 title = try_get(
1490 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1491 compat_str)
1492 chapters.append({
1493 'start_time': start_time,
1494 'end_time': end_time,
1495 'title': title,
1496 })
1497 return chapters
1498
545cc85d 1499 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1500 return self._parse_json(self._search_regex(
1501 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1502 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1503
d92f5d5a 1504 @staticmethod
1505 def parse_time_text(time_text):
1506 """
1507 Parse the comment time text
1508 time_text is in the format 'X units ago (edited)'
1509 """
1510 time_text_split = time_text.split(' ')
1511 if len(time_text_split) >= 3:
1512 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1513
a1c5d2ca
M
1514 @staticmethod
1515 def _join_text_entries(runs):
1516 text = None
1517 for run in runs:
1518 if not isinstance(run, dict):
1519 continue
1520 sub_text = try_get(run, lambda x: x['text'], compat_str)
1521 if sub_text:
1522 if not text:
1523 text = sub_text
1524 continue
1525 text += sub_text
1526 return text
1527
1528 def _extract_comment(self, comment_renderer, parent=None):
1529 comment_id = comment_renderer.get('commentId')
1530 if not comment_id:
1531 return
1532 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1533 text = self._join_text_entries(comment_text_runs) or ''
1534 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1535 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1536 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1537 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1538 author_id = try_get(comment_renderer,
1539 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1540 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1541 lambda x: x['likeCount']), compat_str)) or 0
1542 author_thumbnail = try_get(comment_renderer,
1543 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1544
1545 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1546 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1547 return {
1548 'id': comment_id,
1549 'text': text,
d92f5d5a 1550 'timestamp': timestamp,
a1c5d2ca
M
1551 'time_text': time_text,
1552 'like_count': votes,
1553 'is_favorited': is_liked,
1554 'author': author,
1555 'author_id': author_id,
1556 'author_thumbnail': author_thumbnail,
1557 'author_is_uploader': author_is_uploader,
1558 'parent': parent or 'root'
1559 }
1560
1561 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
1562 session_token_list, parent=None, comment_counts=None):
1563
1564 def extract_thread(parent_renderer):
1565 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1566 if not parent:
1567 comment_counts[2] = 0
1568 for content in contents:
1569 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1570 comment_renderer = try_get(
1571 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1572 content, (lambda x: x['commentRenderer'], dict))
1573
1574 if not comment_renderer:
1575 continue
1576 comment = self._extract_comment(comment_renderer, parent)
1577 if not comment:
1578 continue
1579 comment_counts[0] += 1
1580 yield comment
1581 # Attempt to get the replies
1582 comment_replies_renderer = try_get(
1583 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1584
1585 if comment_replies_renderer:
1586 comment_counts[2] += 1
1587 comment_entries_iter = self._comment_entries(
1588 comment_replies_renderer, identity_token, account_syncid,
1589 parent=comment.get('id'), session_token_list=session_token_list,
1590 comment_counts=comment_counts)
1591
1592 for reply_comment in comment_entries_iter:
1593 yield reply_comment
1594
1595 if not comment_counts:
1596 # comment so far, est. total comments, current comment thread #
1597 comment_counts = [0, 0, 0]
1598 headers = self._DEFAULT_BASIC_API_HEADERS.copy()
1599
1600 # TODO: Generalize the download code with TabIE
1601 if identity_token:
1602 headers['x-youtube-identity-token'] = identity_token
1603
1604 if account_syncid:
1605 headers['X-Goog-PageId'] = account_syncid
1606 headers['X-Goog-AuthUser'] = 0
1607
1608 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1609 first_continuation = False
1610 if parent is None:
1611 first_continuation = True
1612
1613 for page_num in itertools.count(0):
1614 if not continuation:
1615 break
1616 retries = self._downloader.params.get('extractor_retries', 3)
1617 count = -1
1618 last_error = None
1619
1620 while count < retries:
1621 count += 1
1622 if last_error:
1623 self.report_warning('%s. Retrying ...' % last_error)
1624 try:
1625 query = {
1626 'ctoken': continuation['ctoken'],
1627 'pbj': 1,
1628 'type': 'next',
1629 }
1630 if parent:
1631 query['action_get_comment_replies'] = 1
1632 else:
1633 query['action_get_comments'] = 1
1634
1635 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1636 if page_num == 0:
1637 if first_continuation:
d92f5d5a 1638 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1639 else:
d92f5d5a 1640 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1641 else:
d92f5d5a 1642 note_prefix = '%sDownloading comment%s page %d %s' % (
1643 ' ' if parent else '',
a1c5d2ca
M
1644 ' replies' if parent else '',
1645 page_num,
1646 comment_prog_str)
1647
1648 browse = self._download_json(
1649 'https://www.youtube.com/comment_service_ajax', None,
1650 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1651 headers=headers, query=query,
1652 data=urlencode_postdata({
1653 'session_token': session_token_list[0]
1654 }))
1655 except ExtractorError as e:
1656 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1657 if e.cause.code == 413:
d92f5d5a 1658 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1659 return
1660 # Downloading page may result in intermittent 5xx HTTP error
1661 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1662 last_error = 'HTTP Error %s' % e.cause.code
1663 if e.cause.code == 404:
d92f5d5a 1664 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1665 if count < retries:
1666 continue
1667 raise
1668 else:
1669 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1670 if session_token:
1671 session_token_list[0] = session_token
1672
1673 response = try_get(browse,
1674 (lambda x: x['response'],
1675 lambda x: x[1]['response'])) or {}
1676
1677 if response.get('continuationContents'):
1678 break
1679
1680 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1681 if browse.get('reload'):
d92f5d5a 1682 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1683
1684 # TODO: not tested, merged from old extractor
1685 err_msg = browse.get('externalErrorMessage')
1686 if err_msg:
1687 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1688
1689 # Youtube sometimes sends incomplete data
1690 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1691 last_error = 'Incomplete data received'
1692 if count >= retries:
1693 self._downloader.report_error(last_error)
1694
1695 if not response:
1696 break
1697
1698 known_continuation_renderers = {
1699 'itemSectionContinuation': extract_thread,
1700 'commentRepliesContinuation': extract_thread
1701 }
1702
1703 # extract next root continuation from the results
1704 continuation_contents = try_get(
1705 response, lambda x: x['continuationContents'], dict) or {}
1706
1707 for key, value in continuation_contents.items():
1708 if key not in known_continuation_renderers:
1709 continue
1710 continuation_renderer = value
1711
1712 if first_continuation:
1713 first_continuation = False
1714 expected_comment_count = try_get(
1715 continuation_renderer,
1716 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1717 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1718 compat_str)
1719
1720 if expected_comment_count:
1721 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1722 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1723 yield comment_counts[1]
1724
1725 # TODO: cli arg.
1726 # 1/True for newest, 0/False for popular (default)
1727 comment_sort_index = int(True)
1728 sort_continuation_renderer = try_get(
1729 continuation_renderer,
1730 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1731 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1732 # If this fails, the initial continuation page
1733 # starts off with popular anyways.
1734 if sort_continuation_renderer:
1735 continuation = YoutubeTabIE._build_continuation_query(
1736 continuation=sort_continuation_renderer.get('continuation'),
1737 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1738 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1739 break
1740
1741 for entry in known_continuation_renderers[key](continuation_renderer):
1742 yield entry
1743
1744 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1745 break
1746
1747 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1748 """Entry for comment extraction"""
1749 comments = []
1750 known_entry_comment_renderers = (
1751 'itemSectionRenderer',
1752 )
1753 estimated_total = 0
1754 for entry in contents:
1755 for key, renderer in entry.items():
1756 if key not in known_entry_comment_renderers:
1757 continue
1758
1759 comment_iter = self._comment_entries(
1760 renderer,
1761 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1762 account_syncid=self._extract_account_syncid(ytcfg),
1763 session_token_list=[xsrf_token])
1764
1765 for comment in comment_iter:
1766 if isinstance(comment, int):
1767 estimated_total = comment
1768 continue
1769 comments.append(comment)
1770 break
d92f5d5a 1771 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1772 return {
1773 'comments': comments,
1774 'comment_count': len(comments),
1775 }
1776
c5e8d7af 1777 def _real_extract(self, url):
cf7e015f 1778 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1779 video_id = self._match_id(url)
1780 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1781 webpage_url = base_url + 'watch?v=' + video_id
1782 webpage = self._download_webpage(
cce889b9 1783 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1784
1785 player_response = None
1786 if webpage:
1787 player_response = self._extract_yt_initial_variable(
1788 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1789 video_id, 'initial player response')
1790 if not player_response:
1791 player_response = self._call_api(
1792 'player', {'videoId': video_id}, video_id)
1793
1794 playability_status = player_response.get('playabilityStatus') or {}
1795 if playability_status.get('reason') == 'Sign in to confirm your age':
1796 pr = self._parse_json(try_get(compat_parse_qs(
1797 self._download_webpage(
1798 base_url + 'get_video_info', video_id,
1799 'Refetching age-gated info webpage',
1800 'unable to download video info webpage', query={
1801 'video_id': video_id,
7c60c33e 1802 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1803 }, fatal=False)),
1804 lambda x: x['player_response'][0],
1805 compat_str) or '{}', video_id)
1806 if pr:
1807 player_response = pr
1808
1809 trailer_video_id = try_get(
1810 playability_status,
1811 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1812 compat_str)
1813 if trailer_video_id:
1814 return self.url_result(
1815 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1816
545cc85d 1817 def get_text(x):
1818 if not x:
c2d125d9 1819 return
545cc85d 1820 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1821
545cc85d 1822 search_meta = (
1823 lambda x: self._html_search_meta(x, webpage, default=None)) \
1824 if webpage else lambda x: None
dbdaaa23 1825
545cc85d 1826 video_details = player_response.get('videoDetails') or {}
37357d21 1827 microformat = try_get(
545cc85d 1828 player_response,
1829 lambda x: x['microformat']['playerMicroformatRenderer'],
1830 dict) or {}
1831 video_title = video_details.get('title') \
1832 or get_text(microformat.get('title')) \
1833 or search_meta(['og:title', 'twitter:title', 'title'])
1834 video_description = video_details.get('shortDescription')
cf7e015f 1835
8fe10494 1836 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1837 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1838 multifeed_metadata_list = try_get(
1839 player_response,
1840 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1841 compat_str)
8fe10494
S
1842 if multifeed_metadata_list:
1843 entries = []
1844 feed_ids = []
1845 for feed in multifeed_metadata_list.split(','):
1846 # Unquote should take place before split on comma (,) since textual
1847 # fields may contain comma as well (see
067aa17e 1848 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1849 feed_data = compat_parse_qs(
1850 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1851
1852 def feed_entry(name):
545cc85d 1853 return try_get(
1854 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1855
1856 feed_id = feed_entry('id')
1857 if not feed_id:
1858 continue
1859 feed_title = feed_entry('title')
1860 title = video_title
1861 if feed_title:
1862 title += ' (%s)' % feed_title
8fe10494
S
1863 entries.append({
1864 '_type': 'url_transparent',
1865 'ie_key': 'Youtube',
1866 'url': smuggle_url(
545cc85d 1867 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1868 {'force_singlefeed': True}),
6b09401b 1869 'title': title,
8fe10494 1870 })
6b09401b 1871 feed_ids.append(feed_id)
8fe10494
S
1872 self.to_screen(
1873 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1874 % (', '.join(feed_ids), video_id))
545cc85d 1875 return self.playlist_result(
1876 entries, video_id, video_title, video_description)
8fe10494
S
1877 else:
1878 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1879
545cc85d 1880 formats = []
1881 itags = []
cc2db878 1882 itag_qualities = {}
545cc85d 1883 player_url = None
dca3ff4a 1884 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1885 streaming_data = player_response.get('streamingData') or {}
1886 streaming_formats = streaming_data.get('formats') or []
1887 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1888 for fmt in streaming_formats:
1889 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1890 continue
321bf820 1891
cc2db878 1892 itag = str_or_none(fmt.get('itag'))
1893 quality = fmt.get('quality')
1894 if itag and quality:
1895 itag_qualities[itag] = quality
1896 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1897 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1898 # number of fragment that would subsequently requested with (`&sq=N`)
1899 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1900 continue
1901
545cc85d 1902 fmt_url = fmt.get('url')
1903 if not fmt_url:
1904 sc = compat_parse_qs(fmt.get('signatureCipher'))
1905 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1906 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1907 if not (sc and fmt_url and encrypted_sig):
1908 continue
1909 if not player_url:
1910 if not webpage:
1911 continue
1912 player_url = self._search_regex(
1913 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1914 webpage, 'player URL', fatal=False)
1915 if not player_url:
201e9eaa 1916 continue
545cc85d 1917 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1918 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1919 fmt_url += '&' + sp + '=' + signature
1920
545cc85d 1921 if itag:
1922 itags.append(itag)
cc2db878 1923 tbr = float_or_none(
1924 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1925 dct = {
1926 'asr': int_or_none(fmt.get('audioSampleRate')),
1927 'filesize': int_or_none(fmt.get('contentLength')),
1928 'format_id': itag,
1929 'format_note': fmt.get('qualityLabel') or quality,
1930 'fps': int_or_none(fmt.get('fps')),
1931 'height': int_or_none(fmt.get('height')),
dca3ff4a 1932 'quality': q(quality),
cc2db878 1933 'tbr': tbr,
545cc85d 1934 'url': fmt_url,
1935 'width': fmt.get('width'),
1936 }
1937 mimetype = fmt.get('mimeType')
1938 if mimetype:
1939 mobj = re.match(
1940 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1941 if mobj:
1942 dct['ext'] = mimetype2ext(mobj.group(1))
1943 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1944 no_audio = dct.get('acodec') == 'none'
1945 no_video = dct.get('vcodec') == 'none'
1946 if no_audio:
1947 dct['vbr'] = tbr
1948 if no_video:
1949 dct['abr'] = tbr
1950 if no_audio or no_video:
545cc85d 1951 dct['downloader_options'] = {
1952 # Youtube throttles chunks >~10M
1953 'http_chunk_size': 10485760,
bf1317d2 1954 }
7c60c33e 1955 if dct.get('ext'):
1956 dct['container'] = dct['ext'] + '_dash'
545cc85d 1957 formats.append(dct)
1958
1959 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1960 if hls_manifest_url:
1961 for f in self._extract_m3u8_formats(
1962 hls_manifest_url, video_id, 'mp4', fatal=False):
1963 itag = self._search_regex(
1964 r'/itag/(\d+)', f['url'], 'itag', default=None)
1965 if itag:
1966 f['format_id'] = itag
1967 formats.append(f)
1968
1418a043 1969 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 1970 dash_manifest_url = streaming_data.get('dashManifestUrl')
1971 if dash_manifest_url:
545cc85d 1972 for f in self._extract_mpd_formats(
1973 dash_manifest_url, video_id, fatal=False):
cc2db878 1974 itag = f['format_id']
1975 if itag in itags:
1976 continue
dca3ff4a 1977 if itag in itag_qualities:
1978 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1979 # but kept to maintain feature parity (and code similarity) with youtube-dl
1980 # Remove if this causes any issues with sorting in future
1981 f['quality'] = q(itag_qualities[itag])
545cc85d 1982 filesize = int_or_none(self._search_regex(
1983 r'/clen/(\d+)', f.get('fragment_base_url')
1984 or f['url'], 'file size', default=None))
1985 if filesize:
1986 f['filesize'] = filesize
cc2db878 1987 formats.append(f)
bf1317d2 1988
545cc85d 1989 if not formats:
63ad4d43 1990 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1991 raise ExtractorError(
1992 'This video is DRM protected.', expected=True)
1993 pemr = try_get(
1994 playability_status,
1995 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1996 dict) or {}
1997 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1998 subreason = pemr.get('subreason')
1999 if subreason:
2000 subreason = clean_html(get_text(subreason))
2001 if subreason == 'The uploader has not made this video available in your country.':
2002 countries = microformat.get('availableCountries')
2003 if not countries:
2004 regions_allowed = search_meta('regionsAllowed')
2005 countries = regions_allowed.split(',') if regions_allowed else None
2006 self.raise_geo_restricted(
2007 subreason, countries)
2008 reason += '\n' + subreason
2009 if reason:
2010 raise ExtractorError(reason, expected=True)
bf1317d2 2011
545cc85d 2012 self._sort_formats(formats)
bf1317d2 2013
545cc85d 2014 keywords = video_details.get('keywords') or []
2015 if not keywords and webpage:
2016 keywords = [
2017 unescapeHTML(m.group('content'))
2018 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2019 for keyword in keywords:
2020 if keyword.startswith('yt:stretch='):
2021 w, h = keyword.split('=')[1].split(':')
2022 w, h = int(w), int(h)
2023 if w > 0 and h > 0:
2024 ratio = w / h
2025 for f in formats:
2026 if f.get('vcodec') != 'none':
2027 f['stretched_ratio'] = ratio
6449cd80 2028
545cc85d 2029 thumbnails = []
2030 for container in (video_details, microformat):
2031 for thumbnail in (try_get(
2032 container,
2033 lambda x: x['thumbnail']['thumbnails'], list) or []):
2034 thumbnail_url = thumbnail.get('url')
2035 if not thumbnail_url:
bf1317d2 2036 continue
545cc85d 2037 thumbnails.append({
2038 'height': int_or_none(thumbnail.get('height')),
2039 'url': thumbnail_url,
2040 'width': int_or_none(thumbnail.get('width')),
2041 })
2042 if thumbnails:
2043 break
a6211d23 2044 else:
545cc85d 2045 thumbnail = search_meta(['og:image', 'twitter:image'])
2046 if thumbnail:
2047 thumbnails = [{'url': thumbnail}]
2048
2049 category = microformat.get('category') or search_meta('genre')
2050 channel_id = video_details.get('channelId') \
2051 or microformat.get('externalChannelId') \
2052 or search_meta('channelId')
2053 duration = int_or_none(
2054 video_details.get('lengthSeconds')
2055 or microformat.get('lengthSeconds')) \
2056 or parse_duration(search_meta('duration'))
2057 is_live = video_details.get('isLive')
2058 owner_profile_url = microformat.get('ownerProfileUrl')
2059
2060 info = {
2061 'id': video_id,
2062 'title': self._live_title(video_title) if is_live else video_title,
2063 'formats': formats,
2064 'thumbnails': thumbnails,
2065 'description': video_description,
2066 'upload_date': unified_strdate(
2067 microformat.get('uploadDate')
2068 or search_meta('uploadDate')),
2069 'uploader': video_details['author'],
2070 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2071 'uploader_url': owner_profile_url,
2072 'channel_id': channel_id,
2073 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2074 'duration': duration,
2075 'view_count': int_or_none(
2076 video_details.get('viewCount')
2077 or microformat.get('viewCount')
2078 or search_meta('interactionCount')),
2079 'average_rating': float_or_none(video_details.get('averageRating')),
2080 'age_limit': 18 if (
2081 microformat.get('isFamilySafe') is False
2082 or search_meta('isFamilyFriendly') == 'false'
2083 or search_meta('og:restrictions:age') == '18+') else 0,
2084 'webpage_url': webpage_url,
2085 'categories': [category] if category else None,
2086 'tags': keywords,
2087 'is_live': is_live,
2088 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2089 'was_live': video_details.get('isLiveContent'),
545cc85d 2090 }
b477fc13 2091
545cc85d 2092 pctr = try_get(
2093 player_response,
2094 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2095 subtitles = {}
2096 if pctr:
2097 def process_language(container, base_url, lang_code, query):
2098 lang_subs = []
2099 for fmt in self._SUBTITLE_FORMATS:
2100 query.update({
2101 'fmt': fmt,
2102 })
2103 lang_subs.append({
2104 'ext': fmt,
2105 'url': update_url_query(base_url, query),
2106 })
2107 container[lang_code] = lang_subs
7e72694b 2108
545cc85d 2109 for caption_track in (pctr.get('captionTracks') or []):
2110 base_url = caption_track.get('baseUrl')
2111 if not base_url:
2112 continue
2113 if caption_track.get('kind') != 'asr':
2114 lang_code = caption_track.get('languageCode')
2115 if not lang_code:
2116 continue
2117 process_language(
2118 subtitles, base_url, lang_code, {})
2119 continue
2120 automatic_captions = {}
2121 for translation_language in (pctr.get('translationLanguages') or []):
2122 translation_language_code = translation_language.get('languageCode')
2123 if not translation_language_code:
2124 continue
2125 process_language(
2126 automatic_captions, base_url, translation_language_code,
2127 {'tlang': translation_language_code})
2128 info['automatic_captions'] = automatic_captions
2129 info['subtitles'] = subtitles
7e72694b 2130
545cc85d 2131 parsed_url = compat_urllib_parse_urlparse(url)
2132 for component in [parsed_url.fragment, parsed_url.query]:
2133 query = compat_parse_qs(component)
2134 for k, v in query.items():
2135 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2136 d_k += '_time'
2137 if d_k not in info and k in s_ks:
2138 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2139
2140 # Youtube Music Auto-generated description
822b9d9c 2141 if video_description:
38d70284 2142 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2143 if mobj:
822b9d9c
RA
2144 release_year = mobj.group('release_year')
2145 release_date = mobj.group('release_date')
2146 if release_date:
2147 release_date = release_date.replace('-', '')
2148 if not release_year:
545cc85d 2149 release_year = release_date[:4]
2150 info.update({
2151 'album': mobj.group('album'.strip()),
2152 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2153 'track': mobj.group('track').strip(),
2154 'release_date': release_date,
cc2db878 2155 'release_year': int_or_none(release_year),
545cc85d 2156 })
7e72694b 2157
545cc85d 2158 initial_data = None
2159 if webpage:
2160 initial_data = self._extract_yt_initial_variable(
2161 webpage, self._YT_INITIAL_DATA_RE, video_id,
2162 'yt initial data')
2163 if not initial_data:
2164 initial_data = self._call_api(
2165 'next', {'videoId': video_id}, video_id, fatal=False)
2166
2167 if not is_live:
2168 try:
2169 # This will error if there is no livechat
2170 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2171 info['subtitles']['live_chat'] = [{
394dcd44 2172 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2173 'video_id': video_id,
2174 'ext': 'json',
2175 'protocol': 'youtube_live_chat_replay',
2176 }]
2177 except (KeyError, IndexError, TypeError):
2178 pass
2179
2180 if initial_data:
2181 chapters = self._extract_chapters_from_json(
2182 initial_data, video_id, duration)
2183 if not chapters:
2184 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2185 contents = try_get(
2186 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2187 list)
2188 if not contents:
2189 continue
2190
2191 def chapter_time(mmlir):
2192 return parse_duration(
2193 get_text(mmlir.get('timeDescription')))
2194
2195 chapters = []
2196 for next_num, content in enumerate(contents, start=1):
2197 mmlir = content.get('macroMarkersListItemRenderer') or {}
2198 start_time = chapter_time(mmlir)
2199 end_time = chapter_time(try_get(
2200 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2201 if next_num < len(contents) else duration
2202 if start_time is None or end_time is None:
2203 continue
2204 chapters.append({
2205 'start_time': start_time,
2206 'end_time': end_time,
2207 'title': get_text(mmlir.get('title')),
2208 })
2209 if chapters:
2210 break
2211 if chapters:
2212 info['chapters'] = chapters
2213
2214 contents = try_get(
2215 initial_data,
2216 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2217 list) or []
2218 for content in contents:
2219 vpir = content.get('videoPrimaryInfoRenderer')
2220 if vpir:
2221 stl = vpir.get('superTitleLink')
2222 if stl:
2223 stl = get_text(stl)
2224 if try_get(
2225 vpir,
2226 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2227 info['location'] = stl
2228 else:
2229 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2230 if mobj:
2231 info.update({
2232 'series': mobj.group(1),
2233 'season_number': int(mobj.group(2)),
2234 'episode_number': int(mobj.group(3)),
2235 })
2236 for tlb in (try_get(
2237 vpir,
2238 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2239 list) or []):
2240 tbr = tlb.get('toggleButtonRenderer') or {}
2241 for getter, regex in [(
2242 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2243 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2244 lambda x: x['accessibility'],
2245 lambda x: x['accessibilityData']['accessibilityData'],
2246 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2247 label = (try_get(tbr, getter, dict) or {}).get('label')
2248 if label:
2249 mobj = re.match(regex, label)
2250 if mobj:
2251 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2252 break
2253 sbr_tooltip = try_get(
2254 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2255 if sbr_tooltip:
2256 like_count, dislike_count = sbr_tooltip.split(' / ')
2257 info.update({
2258 'like_count': str_to_int(like_count),
2259 'dislike_count': str_to_int(dislike_count),
2260 })
2261 vsir = content.get('videoSecondaryInfoRenderer')
2262 if vsir:
2263 info['channel'] = get_text(try_get(
2264 vsir,
2265 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2266 dict))
545cc85d 2267 rows = try_get(
2268 vsir,
2269 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2270 list) or []
2271 multiple_songs = False
2272 for row in rows:
2273 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2274 multiple_songs = True
2275 break
2276 for row in rows:
2277 mrr = row.get('metadataRowRenderer') or {}
2278 mrr_title = mrr.get('title')
2279 if not mrr_title:
2280 continue
2281 mrr_title = get_text(mrr['title'])
2282 mrr_contents_text = get_text(mrr['contents'][0])
2283 if mrr_title == 'License':
2284 info['license'] = mrr_contents_text
2285 elif not multiple_songs:
2286 if mrr_title == 'Album':
2287 info['album'] = mrr_contents_text
2288 elif mrr_title == 'Artist':
2289 info['artist'] = mrr_contents_text
2290 elif mrr_title == 'Song':
2291 info['track'] = mrr_contents_text
2292
2293 fallbacks = {
2294 'channel': 'uploader',
2295 'channel_id': 'uploader_id',
2296 'channel_url': 'uploader_url',
2297 }
2298 for to, frm in fallbacks.items():
2299 if not info.get(to):
2300 info[to] = info.get(frm)
2301
2302 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2303 v = info.get(s_k)
2304 if v:
2305 info[d_k] = v
b84071c0 2306
c224251a
M
2307 is_private = bool_or_none(video_details.get('isPrivate'))
2308 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2309 is_membersonly = None
b28f8d24 2310 is_premium = None
c224251a
M
2311 if initial_data and is_private is not None:
2312 is_membersonly = False
b28f8d24 2313 is_premium = False
c224251a
M
2314 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2315 for content in contents or []:
2316 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2317 for badge in badges or []:
2318 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2319 if label.lower() == 'members only':
2320 is_membersonly = True
2321 break
b28f8d24
M
2322 elif label.lower() == 'premium':
2323 is_premium = True
2324 break
2325 if is_membersonly or is_premium:
c224251a
M
2326 break
2327
2328 # TODO: Add this for playlists
2329 info['availability'] = self._availability(
2330 is_private=is_private,
b28f8d24 2331 needs_premium=is_premium,
c224251a
M
2332 needs_subscription=is_membersonly,
2333 needs_auth=info['age_limit'] >= 18,
2334 is_unlisted=None if is_private is None else is_unlisted)
2335
06167fbb 2336 # get xsrf for annotations or comments
2337 get_annotations = self._downloader.params.get('writeannotations', False)
2338 get_comments = self._downloader.params.get('getcomments', False)
2339 if get_annotations or get_comments:
29f7c58a 2340 xsrf_token = None
545cc85d 2341 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2342 if ytcfg:
2343 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2344 if not xsrf_token:
2345 xsrf_token = self._search_regex(
2346 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2347 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2348
2349 # annotations
06167fbb 2350 if get_annotations:
64b6a4e9
RA
2351 invideo_url = try_get(
2352 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2353 if xsrf_token and invideo_url:
29f7c58a 2354 xsrf_field_name = None
2355 if ytcfg:
2356 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2357 if not xsrf_field_name:
2358 xsrf_field_name = self._search_regex(
2359 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2360 webpage, 'xsrf field name',
29f7c58a 2361 group='xsrf_field_name', default='session_token')
8a784c74 2362 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2363 self._proto_relative_url(invideo_url),
2364 video_id, note='Downloading annotations',
2365 errnote='Unable to download video annotations', fatal=False,
2366 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2367
277d6ff5 2368 if get_comments:
a1c5d2ca 2369 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2370
545cc85d 2371 self.mark_watched(video_id, player_response)
d77ab8e2 2372
545cc85d 2373 return info
c5e8d7af 2374
5f6a1245 2375
8bdd16b4 2376class YoutubeTabIE(YoutubeBaseInfoExtractor):
2377 IE_DESC = 'YouTube.com tab'
70d5c17b 2378 _VALID_URL = r'''(?x)
2379 https?://
2380 (?:\w+\.)?
2381 (?:
2382 youtube(?:kids)?\.com|
2383 invidio\.us
2384 )/
2385 (?:
2386 (?:channel|c|user)/|
2387 (?P<not_channel>
9ba5705a 2388 feed/|hashtag/|
70d5c17b 2389 (?:playlist|watch)\?.*?\blist=
2390 )|
29f7c58a 2391 (?!(?:%s)\b) # Direct URLs
70d5c17b 2392 )
2393 (?P<id>[^/?\#&]+)
2394 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2395 IE_NAME = 'youtube:tab'
2396
81127aa5 2397 _TESTS = [{
8bdd16b4 2398 # playlists, multipage
2399 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2400 'playlist_mincount': 94,
2401 'info_dict': {
2402 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2403 'title': 'Игорь Клейнер - Playlists',
2404 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2405 'uploader': 'Игорь Клейнер',
2406 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2407 },
2408 }, {
2409 # playlists, multipage, different order
2410 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2411 'playlist_mincount': 94,
2412 'info_dict': {
2413 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2414 'title': 'Игорь Клейнер - Playlists',
2415 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2416 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2417 'uploader': 'Игорь Клейнер',
8bdd16b4 2418 },
2419 }, {
2420 # playlists, singlepage
2421 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2422 'playlist_mincount': 4,
2423 'info_dict': {
2424 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2425 'title': 'ThirstForScience - Playlists',
2426 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2427 'uploader': 'ThirstForScience',
2428 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2429 }
2430 }, {
2431 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2432 'only_matching': True,
2433 }, {
2434 # basic, single video playlist
0e30a7b9 2435 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2436 'info_dict': {
0e30a7b9 2437 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2438 'uploader': 'Sergey M.',
2439 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2440 'title': 'youtube-dl public playlist',
81127aa5 2441 },
0e30a7b9 2442 'playlist_count': 1,
9291475f 2443 }, {
8bdd16b4 2444 # empty playlist
0e30a7b9 2445 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2446 'info_dict': {
0e30a7b9 2447 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2448 'uploader': 'Sergey M.',
2449 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2450 'title': 'youtube-dl empty playlist',
9291475f
PH
2451 },
2452 'playlist_count': 0,
2453 }, {
8bdd16b4 2454 # Home tab
2455 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2456 'info_dict': {
8bdd16b4 2457 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2458 'title': 'lex will - Home',
2459 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2460 'uploader': 'lex will',
2461 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2462 },
8bdd16b4 2463 'playlist_mincount': 2,
9291475f 2464 }, {
8bdd16b4 2465 # Videos tab
2466 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2467 'info_dict': {
8bdd16b4 2468 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2469 'title': 'lex will - Videos',
2470 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2471 'uploader': 'lex will',
2472 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2473 },
8bdd16b4 2474 'playlist_mincount': 975,
9291475f 2475 }, {
8bdd16b4 2476 # Videos tab, sorted by popular
2477 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2478 'info_dict': {
8bdd16b4 2479 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2480 'title': 'lex will - Videos',
2481 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2482 'uploader': 'lex will',
2483 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2484 },
8bdd16b4 2485 'playlist_mincount': 199,
9291475f 2486 }, {
8bdd16b4 2487 # Playlists tab
2488 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2489 'info_dict': {
8bdd16b4 2490 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2491 'title': 'lex will - Playlists',
2492 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2493 'uploader': 'lex will',
2494 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2495 },
8bdd16b4 2496 'playlist_mincount': 17,
ac7553d0 2497 }, {
8bdd16b4 2498 # Community tab
2499 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2500 'info_dict': {
8bdd16b4 2501 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2502 'title': 'lex will - Community',
2503 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2504 'uploader': 'lex will',
2505 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2506 },
2507 'playlist_mincount': 18,
87dadd45 2508 }, {
8bdd16b4 2509 # Channels tab
2510 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2511 'info_dict': {
8bdd16b4 2512 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2513 'title': 'lex will - Channels',
2514 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2515 'uploader': 'lex will',
2516 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2517 },
deaec5af 2518 'playlist_mincount': 12,
6b08cdf6 2519 }, {
a0566bbf 2520 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2521 'only_matching': True,
2522 }, {
a0566bbf 2523 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2524 'only_matching': True,
2525 }, {
a0566bbf 2526 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2527 'only_matching': True,
2528 }, {
2529 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2530 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2531 'info_dict': {
2532 'title': '29C3: Not my department',
2533 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2534 'uploader': 'Christiaan008',
2535 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2536 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2537 },
2538 'playlist_count': 96,
2539 }, {
2540 'note': 'Large playlist',
2541 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2542 'info_dict': {
8bdd16b4 2543 'title': 'Uploads from Cauchemar',
2544 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2545 'uploader': 'Cauchemar',
2546 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2547 },
8bdd16b4 2548 'playlist_mincount': 1123,
2549 }, {
2550 # even larger playlist, 8832 videos
2551 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2552 'only_matching': True,
4b7df0d3
JMF
2553 }, {
2554 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2555 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2556 'info_dict': {
acf757f4
PH
2557 'title': 'Uploads from Interstellar Movie',
2558 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2559 'uploader': 'Interstellar Movie',
8bdd16b4 2560 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2561 },
481cc733 2562 'playlist_mincount': 21,
8bdd16b4 2563 }, {
2564 # https://github.com/ytdl-org/youtube-dl/issues/21844
2565 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2566 'info_dict': {
2567 'title': 'Data Analysis with Dr Mike Pound',
2568 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2569 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2570 'uploader': 'Computerphile',
deaec5af 2571 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2572 },
2573 'playlist_mincount': 11,
2574 }, {
a0566bbf 2575 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2576 'only_matching': True,
dacb3a86
S
2577 }, {
2578 # Playlist URL that does not actually serve a playlist
2579 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2580 'info_dict': {
2581 'id': 'FqZTN594JQw',
2582 'ext': 'webm',
2583 'title': "Smiley's People 01 detective, Adventure Series, Action",
2584 'uploader': 'STREEM',
2585 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2586 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2587 'upload_date': '20150526',
2588 'license': 'Standard YouTube License',
2589 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2590 'categories': ['People & Blogs'],
2591 'tags': list,
dbdaaa23 2592 'view_count': int,
dacb3a86
S
2593 'like_count': int,
2594 'dislike_count': int,
2595 },
2596 'params': {
2597 'skip_download': True,
2598 },
13a75688 2599 'skip': 'This video is not available.',
dacb3a86 2600 'add_ie': [YoutubeIE.ie_key()],
481cc733 2601 }, {
8bdd16b4 2602 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2603 'only_matching': True,
66b48727 2604 }, {
8bdd16b4 2605 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2606 'only_matching': True,
a0566bbf 2607 }, {
2608 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2609 'info_dict': {
2610 'id': '9Auq9mYxFEE',
2611 'ext': 'mp4',
deaec5af 2612 'title': compat_str,
a0566bbf 2613 'uploader': 'Sky News',
2614 'uploader_id': 'skynews',
2615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2616 'upload_date': '20191102',
deaec5af 2617 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2618 'categories': ['News & Politics'],
2619 'tags': list,
2620 'like_count': int,
2621 'dislike_count': int,
2622 },
2623 'params': {
2624 'skip_download': True,
2625 },
2626 }, {
2627 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2628 'info_dict': {
2629 'id': 'a48o2S1cPoo',
2630 'ext': 'mp4',
2631 'title': 'The Young Turks - Live Main Show',
2632 'uploader': 'The Young Turks',
2633 'uploader_id': 'TheYoungTurks',
2634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2635 'upload_date': '20150715',
2636 'license': 'Standard YouTube License',
2637 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2638 'categories': ['News & Politics'],
2639 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2640 'like_count': int,
2641 'dislike_count': int,
2642 },
2643 'params': {
2644 'skip_download': True,
2645 },
2646 'only_matching': True,
2647 }, {
2648 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2649 'only_matching': True,
2650 }, {
2651 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2652 'only_matching': True,
3d3dddc9 2653 }, {
2654 'url': 'https://www.youtube.com/feed/trending',
2655 'only_matching': True,
2656 }, {
2657 # needs auth
2658 'url': 'https://www.youtube.com/feed/library',
2659 'only_matching': True,
2660 }, {
2661 # needs auth
2662 'url': 'https://www.youtube.com/feed/history',
2663 'only_matching': True,
2664 }, {
2665 # needs auth
2666 'url': 'https://www.youtube.com/feed/subscriptions',
2667 'only_matching': True,
2668 }, {
2669 # needs auth
2670 'url': 'https://www.youtube.com/feed/watch_later',
2671 'only_matching': True,
2672 }, {
2673 # no longer available?
2674 'url': 'https://www.youtube.com/feed/recommended',
2675 'only_matching': True,
29f7c58a 2676 }, {
2677 # inline playlist with not always working continuations
2678 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2679 'only_matching': True,
2680 }, {
2681 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2682 'only_matching': True,
2683 }, {
2684 'url': 'https://www.youtube.com/course',
2685 'only_matching': True,
2686 }, {
2687 'url': 'https://www.youtube.com/zsecurity',
2688 'only_matching': True,
2689 }, {
2690 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2691 'only_matching': True,
2692 }, {
2693 'url': 'https://www.youtube.com/TheYoungTurks/live',
2694 'only_matching': True,
39ed931e 2695 }, {
2696 'url': 'https://www.youtube.com/hashtag/cctv9',
2697 'info_dict': {
2698 'id': 'cctv9',
2699 'title': '#cctv9',
2700 },
2701 'playlist_mincount': 350,
29f7c58a 2702 }]
2703
2704 @classmethod
2705 def suitable(cls, url):
2706 return False if YoutubeIE.suitable(url) else super(
2707 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2708
2709 def _extract_channel_id(self, webpage):
2710 channel_id = self._html_search_meta(
2711 'channelId', webpage, 'channel id', default=None)
2712 if channel_id:
2713 return channel_id
2714 channel_url = self._html_search_meta(
2715 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2716 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2717 'twitter:app:url:googleplay'), webpage, 'channel url')
2718 return self._search_regex(
2719 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2720 channel_url, 'channel id')
15f6397c 2721
8bdd16b4 2722 @staticmethod
cd7c66cf 2723 def _extract_basic_item_renderer(item):
2724 # Modified from _extract_grid_item_renderer
2725 known_renderers = (
e3c07697 2726 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2727 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2728 )
2729 for key, renderer in item.items():
2730 if key not in known_renderers:
2731 continue
2732 return renderer
8bdd16b4 2733
8bdd16b4 2734 def _grid_entries(self, grid_renderer):
2735 for item in grid_renderer['items']:
2736 if not isinstance(item, dict):
39b62db1 2737 continue
cd7c66cf 2738 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2739 if not isinstance(renderer, dict):
2740 continue
2741 title = try_get(
2742 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2743 # playlist
2744 playlist_id = renderer.get('playlistId')
2745 if playlist_id:
2746 yield self.url_result(
2747 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2748 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2749 video_title=title)
2750 # video
2751 video_id = renderer.get('videoId')
2752 if video_id:
2753 yield self._extract_video(renderer)
2754 # channel
2755 channel_id = renderer.get('channelId')
2756 if channel_id:
2757 title = try_get(
2758 renderer, lambda x: x['title']['simpleText'], compat_str)
2759 yield self.url_result(
2760 'https://www.youtube.com/channel/%s' % channel_id,
2761 ie=YoutubeTabIE.ie_key(), video_title=title)
2762
3d3dddc9 2763 def _shelf_entries_from_content(self, shelf_renderer):
2764 content = shelf_renderer.get('content')
2765 if not isinstance(content, dict):
8bdd16b4 2766 return
cd7c66cf 2767 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2768 if renderer:
2769 # TODO: add support for nested playlists so each shelf is processed
2770 # as separate playlist
2771 # TODO: this includes only first N items
2772 for entry in self._grid_entries(renderer):
2773 yield entry
2774 renderer = content.get('horizontalListRenderer')
2775 if renderer:
2776 # TODO
2777 pass
8bdd16b4 2778
29f7c58a 2779 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2780 ep = try_get(
2781 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2782 compat_str)
2783 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2784 if shelf_url:
29f7c58a 2785 # Skipping links to another channels, note that checking for
2786 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2787 # will not work
2788 if skip_channels and '/channels?' in shelf_url:
2789 return
3d3dddc9 2790 title = try_get(
2791 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2792 yield self.url_result(shelf_url, video_title=title)
2793 # Shelf may not contain shelf URL, fallback to extraction from content
2794 for entry in self._shelf_entries_from_content(shelf_renderer):
2795 yield entry
c5e8d7af 2796
8bdd16b4 2797 def _playlist_entries(self, video_list_renderer):
2798 for content in video_list_renderer['contents']:
2799 if not isinstance(content, dict):
2800 continue
2801 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2802 if not isinstance(renderer, dict):
2803 continue
2804 video_id = renderer.get('videoId')
2805 if not video_id:
2806 continue
2807 yield self._extract_video(renderer)
07aeced6 2808
3462ffa8 2809 def _rich_entries(self, rich_grid_renderer):
2810 renderer = try_get(
70d5c17b 2811 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2812 video_id = renderer.get('videoId')
2813 if not video_id:
2814 return
2815 yield self._extract_video(renderer)
2816
8bdd16b4 2817 def _video_entry(self, video_renderer):
2818 video_id = video_renderer.get('videoId')
2819 if video_id:
2820 return self._extract_video(video_renderer)
dacb3a86 2821
8bdd16b4 2822 def _post_thread_entries(self, post_thread_renderer):
2823 post_renderer = try_get(
2824 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2825 if not post_renderer:
2826 return
2827 # video attachment
2828 video_renderer = try_get(
2829 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2830 video_id = None
2831 if video_renderer:
2832 entry = self._video_entry(video_renderer)
2833 if entry:
2834 yield entry
2835 # inline video links
2836 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2837 for run in runs:
2838 if not isinstance(run, dict):
2839 continue
2840 ep_url = try_get(
2841 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2842 if not ep_url:
2843 continue
2844 if not YoutubeIE.suitable(ep_url):
2845 continue
2846 ep_video_id = YoutubeIE._match_id(ep_url)
2847 if video_id == ep_video_id:
2848 continue
2849 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2850
8bdd16b4 2851 def _post_thread_continuation_entries(self, post_thread_continuation):
2852 contents = post_thread_continuation.get('contents')
2853 if not isinstance(contents, list):
2854 return
2855 for content in contents:
2856 renderer = content.get('backstagePostThreadRenderer')
2857 if not isinstance(renderer, dict):
2858 continue
2859 for entry in self._post_thread_entries(renderer):
2860 yield entry
07aeced6 2861
39ed931e 2862 r''' # unused
2863 def _rich_grid_entries(self, contents):
2864 for content in contents:
2865 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2866 if video_renderer:
2867 entry = self._video_entry(video_renderer)
2868 if entry:
2869 yield entry
2870 '''
2871
29f7c58a 2872 @staticmethod
2873 def _build_continuation_query(continuation, ctp=None):
2874 query = {
2875 'ctoken': continuation,
2876 'continuation': continuation,
2877 }
2878 if ctp:
2879 query['itct'] = ctp
2880 return query
2881
8bdd16b4 2882 @staticmethod
2883 def _extract_next_continuation_data(renderer):
2884 next_continuation = try_get(
2885 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2886 if not next_continuation:
2887 return
2888 continuation = next_continuation.get('continuation')
2889 if not continuation:
2890 return
2891 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2892 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2893
8bdd16b4 2894 @classmethod
2895 def _extract_continuation(cls, renderer):
2896 next_continuation = cls._extract_next_continuation_data(renderer)
2897 if next_continuation:
2898 return next_continuation
cc2db878 2899 contents = []
2900 for key in ('contents', 'items'):
2901 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2902 for content in contents:
2903 if not isinstance(content, dict):
2904 continue
2905 continuation_ep = try_get(
2906 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2907 dict)
2908 if not continuation_ep:
2909 continue
2910 continuation = try_get(
2911 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2912 if not continuation:
2913 continue
2914 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2915 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2916
d069eca7 2917 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2918
70d5c17b 2919 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2920 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2921 for content in contents:
2922 if not isinstance(content, dict):
8bdd16b4 2923 continue
70d5c17b 2924 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2925 if not is_renderer:
70d5c17b 2926 renderer = content.get('richItemRenderer')
3462ffa8 2927 if renderer:
2928 for entry in self._rich_entries(renderer):
2929 yield entry
2930 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2931 continue
3462ffa8 2932 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2933 for isr_content in isr_contents:
2934 if not isinstance(isr_content, dict):
2935 continue
69184e41 2936
2937 known_renderers = {
2938 'playlistVideoListRenderer': self._playlist_entries,
2939 'gridRenderer': self._grid_entries,
2940 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2941 'backstagePostThreadRenderer': self._post_thread_entries,
2942 'videoRenderer': lambda x: [self._video_entry(x)],
2943 }
2944 for key, renderer in isr_content.items():
2945 if key not in known_renderers:
2946 continue
2947 for entry in known_renderers[key](renderer):
2948 if entry:
2949 yield entry
3462ffa8 2950 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2951 break
70d5c17b 2952
3462ffa8 2953 if not continuation_list[0]:
2954 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2955
2956 if not continuation_list[0]:
2957 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2958
2959 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2960 tab_content = try_get(tab, lambda x: x['content'], dict)
2961 if not tab_content:
2962 return
3462ffa8 2963 parent_renderer = (
29f7c58a 2964 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2965 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2966 for entry in extract_entries(parent_renderer):
2967 yield entry
3462ffa8 2968 continuation = continuation_list[0]
8bdd16b4 2969
2970 headers = {
2971 'x-youtube-client-name': '1',
2972 'x-youtube-client-version': '2.20201112.04.01',
2973 }
2974 if identity_token:
2975 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2976
d069eca7
M
2977 if account_syncid:
2978 headers['X-Goog-PageId'] = account_syncid
2979 headers['X-Goog-AuthUser'] = 0
2980
8bdd16b4 2981 for page_num in itertools.count(1):
2982 if not continuation:
2983 break
62bff2c1 2984 retries = self._downloader.params.get('extractor_retries', 3)
2985 count = -1
2986 last_error = None
2987 while count < retries:
2988 count += 1
2989 if last_error:
2990 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2991 try:
a5c56234 2992 response = self._call_api(
d92f5d5a 2993 ep='browse', fatal=True, headers=headers,
a5c56234
M
2994 video_id='%s page %s' % (item_id, page_num),
2995 query={
2996 'continuation': continuation['continuation'],
2997 'clickTracking': {'clickTrackingParams': continuation['itct']},
2998 },
2999 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 3000 except ExtractorError as e:
62bff2c1 3001 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3002 # Downloading page may result in intermittent 5xx HTTP error
3003 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3004 last_error = 'HTTP Error %s' % e.cause.code
3005 if count < retries:
29f7c58a 3006 continue
3007 raise
62bff2c1 3008 else:
62bff2c1 3009 # Youtube sometimes sends incomplete data
3010 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 3011 if dict_get(response,
3012 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 3013 break
f3eaa8dd
M
3014
3015 # Youtube may send alerts if there was an issue with the continuation page
3016 self._extract_alerts(response, expected=False)
3017
3018 last_error = 'Incomplete data received'
c705177d 3019 if count >= retries:
3020 self._downloader.report_error(last_error)
a5c56234
M
3021
3022 if not response:
8bdd16b4 3023 break
ebf1b291 3024
69184e41 3025 known_continuation_renderers = {
3026 'playlistVideoListContinuation': self._playlist_entries,
3027 'gridContinuation': self._grid_entries,
3028 'itemSectionContinuation': self._post_thread_continuation_entries,
3029 'sectionListContinuation': extract_entries, # for feeds
3030 }
8bdd16b4 3031 continuation_contents = try_get(
69184e41 3032 response, lambda x: x['continuationContents'], dict) or {}
3033 continuation_renderer = None
3034 for key, value in continuation_contents.items():
3035 if key not in known_continuation_renderers:
3462ffa8 3036 continue
69184e41 3037 continuation_renderer = value
3038 continuation_list = [None]
3039 for entry in known_continuation_renderers[key](continuation_renderer):
3040 yield entry
3041 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3042 break
3043 if continuation_renderer:
3044 continue
c5e8d7af 3045
a1b535bd 3046 known_renderers = {
3047 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3048 'gridVideoRenderer': (self._grid_entries, 'items'),
3049 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3050 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3051 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3052 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3053 }
cce889b9 3054 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3055 continuation_items = try_get(
cce889b9 3056 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3057 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3058 video_items_renderer = None
3059 for key, value in continuation_item.items():
3060 if key not in known_renderers:
8bdd16b4 3061 continue
a1b535bd 3062 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3063 continuation_list = [None]
a1b535bd 3064 for entry in known_renderers[key][0](video_items_renderer):
3065 yield entry
9ba5705a 3066 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3067 break
3068 if video_items_renderer:
3069 continue
8bdd16b4 3070 break
9558dcec 3071
8bdd16b4 3072 @staticmethod
3073 def _extract_selected_tab(tabs):
3074 for tab in tabs:
3075 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3076 return tab['tabRenderer']
2b3c2546 3077 else:
8bdd16b4 3078 raise ExtractorError('Unable to find selected tab')
b82f815f 3079
8bdd16b4 3080 @staticmethod
3081 def _extract_uploader(data):
3082 uploader = {}
3083 sidebar_renderer = try_get(
3084 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3085 if sidebar_renderer:
3086 for item in sidebar_renderer:
3087 if not isinstance(item, dict):
3088 continue
3089 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3090 if not isinstance(renderer, dict):
3091 continue
3092 owner = try_get(
3093 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3094 if owner:
3095 uploader['uploader'] = owner.get('text')
3096 uploader['uploader_id'] = try_get(
3097 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3098 uploader['uploader_url'] = urljoin(
3099 'https://www.youtube.com/',
3100 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3101 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3102
d069eca7 3103 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3104 playlist_id = title = description = channel_url = channel_name = channel_id = None
3105 thumbnails_list = tags = []
3106
8bdd16b4 3107 selected_tab = self._extract_selected_tab(tabs)
3108 renderer = try_get(
3109 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3110 if renderer:
b60419c5 3111 channel_name = renderer.get('title')
3112 channel_url = renderer.get('channelUrl')
3113 channel_id = renderer.get('externalId')
39ed931e 3114 else:
64c0d954 3115 renderer = try_get(
3116 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3117
8bdd16b4 3118 if renderer:
3119 title = renderer.get('title')
ecc97af3 3120 description = renderer.get('description', '')
b60419c5 3121 playlist_id = channel_id
3122 tags = renderer.get('keywords', '').split()
3123 thumbnails_list = (
3124 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3125 or try_get(
3126 data,
3127 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3128 list)
b60419c5 3129 or [])
3130
3131 thumbnails = []
3132 for t in thumbnails_list:
3133 if not isinstance(t, dict):
3134 continue
3135 thumbnail_url = url_or_none(t.get('url'))
3136 if not thumbnail_url:
3137 continue
3138 thumbnails.append({
3139 'url': thumbnail_url,
3140 'width': int_or_none(t.get('width')),
3141 'height': int_or_none(t.get('height')),
3142 })
3462ffa8 3143 if playlist_id is None:
70d5c17b 3144 playlist_id = item_id
3145 if title is None:
39ed931e 3146 title = (
3147 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3148 or playlist_id)
b60419c5 3149 title += format_field(selected_tab, 'title', ' - %s')
3150
3151 metadata = {
3152 'playlist_id': playlist_id,
3153 'playlist_title': title,
3154 'playlist_description': description,
3155 'uploader': channel_name,
3156 'uploader_id': channel_id,
3157 'uploader_url': channel_url,
3158 'thumbnails': thumbnails,
3159 'tags': tags,
3160 }
3161 if not channel_id:
3162 metadata.update(self._extract_uploader(data))
3163 metadata.update({
3164 'channel': metadata['uploader'],
3165 'channel_id': metadata['uploader_id'],
3166 'channel_url': metadata['uploader_url']})
3167 return self.playlist_result(
d069eca7
M
3168 self._entries(
3169 selected_tab, playlist_id,
3170 self._extract_identity_token(webpage, item_id),
3171 self._extract_account_syncid(data)),
b60419c5 3172 **metadata)
73c4ac2c 3173
cd7c66cf 3174 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3175 first_id = last_id = None
3176 for page_num in itertools.count(1):
cd7c66cf 3177 videos = list(self._playlist_entries(playlist))
3178 if not videos:
3179 return
2be71994 3180 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3181 if start >= len(videos):
3182 return
3183 for video in videos[start:]:
3184 if video['id'] == first_id:
3185 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3186 return
3187 yield video
3188 first_id = first_id or videos[0]['id']
3189 last_id = videos[-1]['id']
cd7c66cf 3190
cd7c66cf 3191 _, data = self._extract_webpage(
2be71994 3192 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3193 '%s page %d' % (playlist_id, page_num))
3194 playlist = try_get(
3195 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3196
29f7c58a 3197 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3198 title = playlist.get('title') or try_get(
3199 data, lambda x: x['titleText']['simpleText'], compat_str)
3200 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3201
3202 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3203 playlist_url = urljoin(url, try_get(
3204 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3205 compat_str))
3206 if playlist_url and playlist_url != url:
3207 return self.url_result(
3208 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3209 video_title=title)
cd7c66cf 3210
8bdd16b4 3211 return self.playlist_result(
cd7c66cf 3212 self._extract_mix_playlist(playlist, playlist_id),
3213 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3214
f3eaa8dd
M
3215 def _extract_alerts(self, data, expected=False):
3216
3217 def _real_extract_alerts():
3218 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3219 if not isinstance(alert_dict, dict):
02ced43c 3220 continue
f3eaa8dd
M
3221 for alert in alert_dict.values():
3222 alert_type = alert.get('type')
3223 if not alert_type:
3224 continue
3ffc7c89 3225 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3226 if message:
3227 yield alert_type, message
f3eaa8dd 3228 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3229 message += try_get(run, lambda x: x['text'], compat_str)
3230 if message:
3231 yield alert_type, message
f3eaa8dd 3232
3ffc7c89 3233 errors = []
3234 warnings = []
f3eaa8dd
M
3235 for alert_type, alert_message in _real_extract_alerts():
3236 if alert_type.lower() == 'error':
3ffc7c89 3237 errors.append([alert_type, alert_message])
f3eaa8dd 3238 else:
3ffc7c89 3239 warnings.append([alert_type, alert_message])
f3eaa8dd 3240
3ffc7c89 3241 for alert_type, alert_message in (warnings + errors[:-1]):
3242 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3243 if errors:
3244 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3245
cd7c66cf 3246 def _extract_webpage(self, url, item_id):
62bff2c1 3247 retries = self._downloader.params.get('extractor_retries', 3)
3248 count = -1
c705177d 3249 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3250 while count < retries:
62bff2c1 3251 count += 1
14fdfea9 3252 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3253 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3254 if count:
c705177d 3255 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3256 webpage = self._download_webpage(
3257 url, item_id,
cd7c66cf 3258 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3259 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3260 self._extract_alerts(data, expected=True)
14fdfea9 3261 if data.get('contents') or data.get('currentVideoEndpoint'):
3262 break
c705177d 3263 if count >= retries:
3264 self._downloader.report_error(last_error)
cd7c66cf 3265 return webpage, data
3266
3267 def _real_extract(self, url):
3268 item_id = self._match_id(url)
3269 url = compat_urlparse.urlunparse(
3270 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3271
3272 # This is not matched in a channel page with a tab selected
3273 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3274 mobj = mobj.groupdict() if mobj else {}
3275 if mobj and not mobj.get('not_channel'):
3276 self._downloader.report_warning(
3277 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3278 'To download only the videos in the home page, add a "/featured" to the URL')
3279 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3280
3281 # Handle both video/playlist URLs
3282 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3283 video_id = qs.get('v', [None])[0]
3284 playlist_id = qs.get('list', [None])[0]
3285
3286 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3287 if not playlist_id:
3288 # If there is neither video or playlist ids,
3289 # youtube redirects to home page, which is undesirable
3290 raise ExtractorError('Unable to recognize tab page')
3291 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3292 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3293
3294 if video_id and playlist_id:
3295 if self._downloader.params.get('noplaylist'):
3296 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3297 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3298 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3299
3300 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3301
8bdd16b4 3302 tabs = try_get(
3303 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3304 if tabs:
d069eca7 3305 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3306
8bdd16b4 3307 playlist = try_get(
3308 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3309 if playlist:
29f7c58a 3310 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3311
a0566bbf 3312 video_id = try_get(
3313 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3314 compat_str) or video_id
8bdd16b4 3315 if video_id:
cd7c66cf 3316 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3317 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3318
8bdd16b4 3319 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3320
c5e8d7af 3321
8bdd16b4 3322class YoutubePlaylistIE(InfoExtractor):
3323 IE_DESC = 'YouTube.com playlists'
3324 _VALID_URL = r'''(?x)(?:
3325 (?:https?://)?
3326 (?:\w+\.)?
3327 (?:
3328 (?:
3329 youtube(?:kids)?\.com|
29f7c58a 3330 invidio\.us
8bdd16b4 3331 )
3332 /.*?\?.*?\blist=
3333 )?
3334 (?P<id>%(playlist_id)s)
3335 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3336 IE_NAME = 'youtube:playlist'
cdc628a4 3337 _TESTS = [{
8bdd16b4 3338 'note': 'issue #673',
3339 'url': 'PLBB231211A4F62143',
cdc628a4 3340 'info_dict': {
8bdd16b4 3341 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3342 'id': 'PLBB231211A4F62143',
3343 'uploader': 'Wickydoo',
3344 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3345 },
3346 'playlist_mincount': 29,
3347 }, {
3348 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3349 'info_dict': {
3350 'title': 'YDL_safe_search',
3351 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3352 },
3353 'playlist_count': 2,
3354 'skip': 'This playlist is private',
9558dcec 3355 }, {
8bdd16b4 3356 'note': 'embedded',
3357 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3358 'playlist_count': 4,
9558dcec 3359 'info_dict': {
8bdd16b4 3360 'title': 'JODA15',
3361 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3362 'uploader': 'milan',
3363 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3364 }
cdc628a4 3365 }, {
8bdd16b4 3366 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3367 'playlist_mincount': 982,
3368 'info_dict': {
3369 'title': '2018 Chinese New Singles (11/6 updated)',
3370 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3371 'uploader': 'LBK',
3372 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3373 }
daa0df9e 3374 }, {
29f7c58a 3375 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3376 'only_matching': True,
3377 }, {
3378 # music album playlist
3379 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3380 'only_matching': True,
3381 }]
3382
3383 @classmethod
3384 def suitable(cls, url):
3385 return False if YoutubeTabIE.suitable(url) else super(
3386 YoutubePlaylistIE, cls).suitable(url)
3387
3388 def _real_extract(self, url):
3389 playlist_id = self._match_id(url)
3390 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3391 if not qs:
3392 qs = {'list': playlist_id}
3393 return self.url_result(
3394 update_url_query('https://www.youtube.com/playlist', qs),
3395 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3396
3397
3398class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3399 IE_DESC = 'youtu.be'
29f7c58a 3400 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3401 _TESTS = [{
8bdd16b4 3402 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3403 'info_dict': {
3404 'id': 'yeWKywCrFtk',
3405 'ext': 'mp4',
3406 'title': 'Small Scale Baler and Braiding Rugs',
3407 'uploader': 'Backus-Page House Museum',
3408 'uploader_id': 'backuspagemuseum',
3409 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3410 'upload_date': '20161008',
3411 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3412 'categories': ['Nonprofits & Activism'],
3413 'tags': list,
3414 'like_count': int,
3415 'dislike_count': int,
3416 },
3417 'params': {
3418 'noplaylist': True,
3419 'skip_download': True,
3420 },
39e7107d 3421 }, {
8bdd16b4 3422 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3423 'only_matching': True,
cdc628a4
PH
3424 }]
3425
8bdd16b4 3426 def _real_extract(self, url):
29f7c58a 3427 mobj = re.match(self._VALID_URL, url)
3428 video_id = mobj.group('id')
3429 playlist_id = mobj.group('playlist_id')
8bdd16b4 3430 return self.url_result(
29f7c58a 3431 update_url_query('https://www.youtube.com/watch', {
3432 'v': video_id,
3433 'list': playlist_id,
3434 'feature': 'youtu.be',
3435 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3436
3437
3438class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3439 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3440 _VALID_URL = r'ytuser:(?P<id>.+)'
3441 _TESTS = [{
3442 'url': 'ytuser:phihag',
3443 'only_matching': True,
3444 }]
3445
3446 def _real_extract(self, url):
3447 user_id = self._match_id(url)
3448 return self.url_result(
3449 'https://www.youtube.com/user/%s' % user_id,
3450 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3451
b05654f0 3452
3d3dddc9 3453class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3454 IE_NAME = 'youtube:favorites'
3455 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3456 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3457 _LOGIN_REQUIRED = True
3458 _TESTS = [{
3459 'url': ':ytfav',
3460 'only_matching': True,
3461 }, {
3462 'url': ':ytfavorites',
3463 'only_matching': True,
3464 }]
3465
3466 def _real_extract(self, url):
3467 return self.url_result(
3468 'https://www.youtube.com/playlist?list=LL',
3469 ie=YoutubeTabIE.ie_key())
3470
3471
8bdd16b4 3472class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3473 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3474 # there doesn't appear to be a real limit, for example if you search for
3475 # 'python' you get more than 8.000.000 results
3476 _MAX_RESULTS = float('inf')
78caa52a 3477 IE_NAME = 'youtube:search'
b05654f0 3478 _SEARCH_KEY = 'ytsearch'
6c894ea1 3479 _SEARCH_PARAMS = None
9dd8e46a 3480 _TESTS = []
b05654f0 3481
6c894ea1 3482 def _entries(self, query, n):
a5c56234 3483 data = {'query': query}
6c894ea1
U
3484 if self._SEARCH_PARAMS:
3485 data['params'] = self._SEARCH_PARAMS
3486 total = 0
3487 for page_num in itertools.count(1):
a5c56234
M
3488 search = self._call_api(
3489 ep='search', video_id='query "%s"' % query, fatal=False,
3490 note='Downloading page %s' % page_num, query=data)
6c894ea1 3491 if not search:
b4c08069 3492 break
6c894ea1
U
3493 slr_contents = try_get(
3494 search,
3495 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3496 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3497 list)
3498 if not slr_contents:
a22b2fd1 3499 break
0366ae87 3500
0366ae87
M
3501 # Youtube sometimes adds promoted content to searches,
3502 # changing the index location of videos and token.
3503 # So we search through all entries till we find them.
30a074c2 3504 continuation_token = None
3505 for slr_content in slr_contents:
a96c6d15 3506 if continuation_token is None:
3507 continuation_token = try_get(
3508 slr_content,
3509 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3510 compat_str)
3511
30a074c2 3512 isr_contents = try_get(
3513 slr_content,
3514 lambda x: x['itemSectionRenderer']['contents'],
3515 list)
9da76d30 3516 if not isr_contents:
30a074c2 3517 continue
3518 for content in isr_contents:
3519 if not isinstance(content, dict):
3520 continue
3521 video = content.get('videoRenderer')
3522 if not isinstance(video, dict):
3523 continue
3524 video_id = video.get('videoId')
3525 if not video_id:
3526 continue
3527
3528 yield self._extract_video(video)
3529 total += 1
3530 if total == n:
3531 return
0366ae87 3532
0366ae87 3533 if not continuation_token:
6c894ea1 3534 break
0366ae87 3535 data['continuation'] = continuation_token
b05654f0 3536
6c894ea1
U
3537 def _get_n_results(self, query, n):
3538 """Get a specified number of results for a query"""
3539 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3540
c9ae7b95 3541
a3dd9248 3542class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3543 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3544 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3545 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3546 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3547
c9ae7b95 3548
386e1dd9 3549class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3550 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3551 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3552 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3553 # _MAX_RESULTS = 100
3462ffa8 3554 _TESTS = [{
3555 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3556 'playlist_mincount': 5,
3557 'info_dict': {
3558 'title': 'youtube-dl test video',
3559 }
3560 }, {
3561 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3562 'only_matching': True,
3563 }]
3564
386e1dd9 3565 @classmethod
3566 def _make_valid_url(cls):
3567 return cls._VALID_URL
3568
3462ffa8 3569 def _real_extract(self, url):
386e1dd9 3570 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3571 query = (qs.get('search_query') or qs.get('q'))[0]
3572 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3573 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3574
3575
3576class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3577 """
25f14e9f 3578 Base class for feed extractors
3d3dddc9 3579 Subclasses must define the _FEED_NAME property.
d7ae0639 3580 """
b2e8bc1b 3581 _LOGIN_REQUIRED = True
ef2f3c7f 3582 _TESTS = []
d7ae0639
JMF
3583
3584 @property
3585 def IE_NAME(self):
78caa52a 3586 return 'youtube:%s' % self._FEED_NAME
04cc9617 3587
81f0259b 3588 def _real_initialize(self):
b2e8bc1b 3589 self._login()
81f0259b 3590
3853309f 3591 def _real_extract(self, url):
3d3dddc9 3592 return self.url_result(
3593 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3594 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3595
3596
ef2f3c7f 3597class YoutubeWatchLaterIE(InfoExtractor):
3598 IE_NAME = 'youtube:watchlater'
70d5c17b 3599 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3600 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3601 _TESTS = [{
8bdd16b4 3602 'url': ':ytwatchlater',
bc7a9cd8
S
3603 'only_matching': True,
3604 }]
25f14e9f
S
3605
3606 def _real_extract(self, url):
ef2f3c7f 3607 return self.url_result(
3608 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3609
3610
25f14e9f
S
3611class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3612 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3613 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3614 _FEED_NAME = 'recommended'
3d3dddc9 3615 _TESTS = [{
3616 'url': ':ytrec',
3617 'only_matching': True,
3618 }, {
3619 'url': ':ytrecommended',
3620 'only_matching': True,
3621 }, {
3622 'url': 'https://youtube.com',
3623 'only_matching': True,
3624 }]
1ed5b5c9 3625
1ed5b5c9 3626
25f14e9f 3627class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3628 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3629 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3630 _FEED_NAME = 'subscriptions'
3d3dddc9 3631 _TESTS = [{
3632 'url': ':ytsubs',
3633 'only_matching': True,
3634 }, {
3635 'url': ':ytsubscriptions',
3636 'only_matching': True,
3637 }]
1ed5b5c9 3638
1ed5b5c9 3639
25f14e9f 3640class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3641 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3642 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3643 _FEED_NAME = 'history'
3d3dddc9 3644 _TESTS = [{
3645 'url': ':ythistory',
3646 'only_matching': True,
3647 }]
1ed5b5c9
JMF
3648
3649
15870e90
PH
3650class YoutubeTruncatedURLIE(InfoExtractor):
3651 IE_NAME = 'youtube:truncated_url'
3652 IE_DESC = False # Do not list
975d35db 3653 _VALID_URL = r'''(?x)
b95aab84
PH
3654 (?:https?://)?
3655 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3656 (?:watch\?(?:
c4808c60 3657 feature=[a-z_]+|
b95aab84
PH
3658 annotation_id=annotation_[^&]+|
3659 x-yt-cl=[0-9]+|
c1708b89 3660 hl=[^&]*|
287be8c6 3661 t=[0-9]+
b95aab84
PH
3662 )?
3663 |
3664 attribution_link\?a=[^&]+
3665 )
3666 $
975d35db 3667 '''
15870e90 3668
c4808c60 3669 _TESTS = [{
2d3d2997 3670 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3671 'only_matching': True,
dc2fc736 3672 }, {
2d3d2997 3673 'url': 'https://www.youtube.com/watch?',
dc2fc736 3674 'only_matching': True,
b95aab84
PH
3675 }, {
3676 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3677 'only_matching': True,
3678 }, {
3679 'url': 'https://www.youtube.com/watch?feature=foo',
3680 'only_matching': True,
c1708b89
PH
3681 }, {
3682 'url': 'https://www.youtube.com/watch?hl=en-GB',
3683 'only_matching': True,
287be8c6
PH
3684 }, {
3685 'url': 'https://www.youtube.com/watch?t=2372',
3686 'only_matching': True,
c4808c60
PH
3687 }]
3688
15870e90
PH
3689 def _real_extract(self, url):
3690 raise ExtractorError(
78caa52a
PH
3691 'Did you forget to quote the URL? Remember that & is a meta '
3692 'character in most shells, so you want to put the URL in quotes, '
3867038a 3693 'like youtube-dl '
2d3d2997 3694 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3695 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3696 expected=True)
772fd5cc
PH
3697
3698
3699class YoutubeTruncatedIDIE(InfoExtractor):
3700 IE_NAME = 'youtube:truncated_id'
3701 IE_DESC = False # Do not list
b95aab84 3702 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3703
3704 _TESTS = [{
3705 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3706 'only_matching': True,
3707 }]
3708
3709 def _real_extract(self, url):
3710 video_id = self._match_id(url)
3711 raise ExtractorError(
3712 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3713 expected=True)