]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Ignore invalid stretch ratio
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
c5e8d7af 32 ExtractorError,
b60419c5 33 format_field,
2d30521a 34 float_or_none,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
6310acf5 37 parse_codecs,
7c80519c 38 parse_duration,
dca3ff4a 39 qualities,
3995d37d 40 remove_start,
cf7e015f 41 smuggle_url,
dbdaaa23 42 str_or_none,
c93d53f5 43 str_to_int,
556dbe7f 44 try_get,
c5e8d7af
PH
45 unescapeHTML,
46 unified_strdate,
cf7e015f 47 unsmuggle_url,
8bdd16b4 48 update_url_query,
21c340b8 49 url_or_none,
6e6bc8da 50 urlencode_postdata,
d92f5d5a 51 urljoin
c5e8d7af
PH
52)
53
5f6a1245 54
de7f3446 55class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
56 """Provide base functions for Youtube extractors"""
57 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 58 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
59
60 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
61 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
62 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 63
3462ffa8 64 _RESERVED_NAMES = (
cd7c66cf 65 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
66 r'movies|results|shared|hashtag|trending|feed|feeds|'
67 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 68
b2e8bc1b
JMF
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
72
70d5c17b 73 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d
S
129 def warn(message):
130 self._downloader.report_warning(message)
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
354 return self._parse_json(
355 self._search_regex(
356 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 357 default='{}'), video_id, fatal=False) or {}
358
359 def __extract_client_version(self, ytcfg):
360 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
361
362 def _extract_context(self, ytcfg=None):
363 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
364 if context:
365 return context
366
367 # Recreate the client context (required)
368 client_version = self.__extract_client_version(ytcfg)
369 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
370 context = {
371 'client': {
372 'clientName': client_name,
373 'clientVersion': client_version,
374 }
375 }
376 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
377 if visitor_data:
378 context['client']['visitorData'] = visitor_data
379 return context
380
381 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
382 headers = {
383 'X-YouTube-Client-Name': '1',
384 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
385 }
386 if identity_token:
387 headers['x-youtube-identity-token'] = identity_token
388 if account_syncid:
389 headers['X-Goog-PageId'] = account_syncid
390 headers['X-Goog-AuthUser'] = 0
391 if visitor_data:
392 headers['x-goog-visitor-id'] = visitor_data
393 auth = self._generate_sapisidhash_header()
394 if auth is not None:
395 headers['Authorization'] = auth
396 headers['X-Origin'] = 'https://www.youtube.com'
397 return headers
29f7c58a 398
30a074c2 399 def _extract_video(self, renderer):
400 video_id = renderer.get('videoId')
401 title = try_get(
402 renderer,
403 (lambda x: x['title']['runs'][0]['text'],
404 lambda x: x['title']['simpleText']), compat_str)
405 description = try_get(
406 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
407 compat_str)
408 duration = parse_duration(try_get(
409 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
410 view_count_text = try_get(
411 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
412 view_count = str_to_int(self._search_regex(
413 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
414 'view count', default=None))
415 uploader = try_get(
bc2ca1bb 416 renderer,
417 (lambda x: x['ownerText']['runs'][0]['text'],
418 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 419 return {
39ed931e 420 '_type': 'url',
30a074c2 421 'ie_key': YoutubeIE.ie_key(),
422 'id': video_id,
423 'url': video_id,
424 'title': title,
425 'description': description,
426 'duration': duration,
427 'view_count': view_count,
428 'uploader': uploader,
429 }
430
0c148415 431
360e1ca5 432class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 433 IE_DESC = 'YouTube.com'
bc2ca1bb 434 _INVIDIOUS_SITES = (
435 # invidious-redirect websites
436 r'(?:www\.)?redirect\.invidious\.io',
437 r'(?:(?:www|dev)\.)?invidio\.us',
438 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
439 r'(?:www\.)?invidious\.pussthecat\.org',
440 r'(?:www\.)?invidious\.048596\.xyz',
441 r'(?:www\.)?invidious\.zee\.li',
442 r'(?:www\.)?vid\.puffyan\.us',
443 r'(?:(?:www|au)\.)?ytprivate\.com',
444 r'(?:www\.)?invidious\.namazso\.eu',
445 r'(?:www\.)?invidious\.ethibox\.fr',
446 r'(?:www\.)?inv\.skyn3t\.in',
447 r'(?:www\.)?invidious\.himiko\.cloud',
448 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
449 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
450 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
451 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
452 # youtube-dl invidious instances list
453 r'(?:(?:www|no)\.)?invidiou\.sh',
454 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
455 r'(?:www\.)?invidious\.kabi\.tk',
456 r'(?:www\.)?invidious\.13ad\.de',
457 r'(?:www\.)?invidious\.mastodon\.host',
458 r'(?:www\.)?invidious\.zapashcanon\.fr',
459 r'(?:www\.)?invidious\.kavin\.rocks',
460 r'(?:www\.)?invidious\.tube',
461 r'(?:www\.)?invidiou\.site',
462 r'(?:www\.)?invidious\.site',
463 r'(?:www\.)?invidious\.xyz',
464 r'(?:www\.)?invidious\.nixnet\.xyz',
465 r'(?:www\.)?invidious\.drycat\.fr',
466 r'(?:www\.)?tube\.poal\.co',
467 r'(?:www\.)?tube\.connect\.cafe',
468 r'(?:www\.)?vid\.wxzm\.sx',
469 r'(?:www\.)?vid\.mint\.lgbt',
470 r'(?:www\.)?yewtu\.be',
471 r'(?:www\.)?yt\.elukerio\.org',
472 r'(?:www\.)?yt\.lelux\.fi',
473 r'(?:www\.)?invidious\.ggc-project\.de',
474 r'(?:www\.)?yt\.maisputain\.ovh',
475 r'(?:www\.)?invidious\.toot\.koeln',
476 r'(?:www\.)?invidious\.fdn\.fr',
477 r'(?:www\.)?watch\.nettohikari\.com',
478 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
479 r'(?:www\.)?qklhadlycap4cnod\.onion',
480 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
481 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
482 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
483 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
484 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
485 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
486 )
cb7dfeea 487 _VALID_URL = r"""(?x)^
c5e8d7af 488 (
edb53e2d 489 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 490 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
491 (?:www\.)?deturl\.com/www\.youtube\.com|
492 (?:www\.)?pwnyoutube\.com|
493 (?:www\.)?hooktube\.com|
494 (?:www\.)?yourepeat\.com|
495 tube\.majestyc\.net|
496 %(invidious)s|
497 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
498 (?:.*?\#/)? # handle anchor (#/) redirect urls
499 (?: # the various things that can precede the ID:
ac7553d0 500 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 501 |(?: # or the v= param in all its forms
f7000f3a 502 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 503 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 504 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
505 v=
506 )
f4b05232 507 ))
cbaed4bb
S
508 |(?:
509 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
510 vid\.plus| # or vid.plus/xxxx
511 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 512 %(invidious)s
cbaed4bb 513 )/
edb53e2d 514 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 515 )
c5e8d7af 516 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 517 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
518 (?!.*?\blist=
519 (?:
520 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
521 WL # WL are handled by the watch later IE
522 )
523 )
c5e8d7af 524 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 525 $""" % {
526 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
527 'invidious': '|'.join(_INVIDIOUS_SITES),
528 }
e40c758c 529 _PLAYER_INFO_RE = (
cc2db878 530 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
531 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 532 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 533 )
2c62dc26 534 _formats = {
c2d3cb4c 535 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
536 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
537 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
538 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
539 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
540 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
541 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
542 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 543 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 544 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
545 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
546 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
547 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
548 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
549 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 550 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 551 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
552 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 553
554
555 # 3D videos
c2d3cb4c 556 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
557 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
558 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
559 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 560 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
561 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
562 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 563
96fb5605 564 # Apple HTTP Live Streaming
11f12195 565 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 566 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
567 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
568 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
569 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
570 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 571 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
572 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
573
574 # DASH mp4 video
d23028a8
S
575 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
576 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 580 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
581 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
583 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
585 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
586 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 587
f6f1fc92 588 # Dash mp4 audio
d23028a8
S
589 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
590 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
591 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
592 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
593 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
594 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
595 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
596
597 # Dash webm
d23028a8
S
598 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
605 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 613 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
614 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
615 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
616 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
617 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
619 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
620
621 # Dash webm audio
d23028a8
S
622 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
623 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 624
0857baad 625 # Dash webm audio with opus inside
d23028a8
S
626 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
627 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
628 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 629
ce6b9a2d
PH
630 # RTMP (unnamed)
631 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
632
633 # av01 video only formats sometimes served with "unknown" codecs
634 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
635 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
636 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 638 }
29f7c58a 639 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 640
fd5c4aab
S
641 _GEO_BYPASS = False
642
78caa52a 643 IE_NAME = 'youtube'
2eb88d95
PH
644 _TESTS = [
645 {
2d3d2997 646 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
647 'info_dict': {
648 'id': 'BaW_jenozKc',
649 'ext': 'mp4',
3867038a 650 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
651 'uploader': 'Philipp Hagemeister',
652 'uploader_id': 'phihag',
ec85ded8 653 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
654 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
655 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 656 'upload_date': '20121002',
3867038a 657 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 658 'categories': ['Science & Technology'],
3867038a 659 'tags': ['youtube-dl'],
556dbe7f 660 'duration': 10,
dbdaaa23 661 'view_count': int,
3e7c1224
PH
662 'like_count': int,
663 'dislike_count': int,
7c80519c 664 'start_time': 1,
297a564b 665 'end_time': 9,
2eb88d95 666 }
0e853ca4 667 },
fccd3771 668 {
4bc3a23e
PH
669 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
670 'note': 'Embed-only video (#1746)',
671 'info_dict': {
672 'id': 'yZIXLfi8CZQ',
673 'ext': 'mp4',
674 'upload_date': '20120608',
675 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
676 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
677 'uploader': 'SET India',
94bfcd23 678 'uploader_id': 'setindia',
ec85ded8 679 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 680 'age_limit': 18,
545cc85d 681 },
682 'skip': 'Private video',
fccd3771 683 },
11b56058 684 {
8bdd16b4 685 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
686 'note': 'Use the first video ID in the URL',
687 'info_dict': {
688 'id': 'BaW_jenozKc',
689 'ext': 'mp4',
3867038a 690 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
691 'uploader': 'Philipp Hagemeister',
692 'uploader_id': 'phihag',
ec85ded8 693 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 694 'upload_date': '20121002',
3867038a 695 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 696 'categories': ['Science & Technology'],
3867038a 697 'tags': ['youtube-dl'],
556dbe7f 698 'duration': 10,
dbdaaa23 699 'view_count': int,
11b56058
PM
700 'like_count': int,
701 'dislike_count': int,
34a7de29
S
702 },
703 'params': {
704 'skip_download': True,
705 },
11b56058 706 },
dd27fd17 707 {
2d3d2997 708 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
709 'note': '256k DASH audio (format 141) via DASH manifest',
710 'info_dict': {
711 'id': 'a9LDPn-MO4I',
712 'ext': 'm4a',
713 'upload_date': '20121002',
714 'uploader_id': '8KVIDEO',
ec85ded8 715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
716 'description': '',
717 'uploader': '8KVIDEO',
718 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 719 },
4bc3a23e
PH
720 'params': {
721 'youtube_include_dash_manifest': True,
722 'format': '141',
4919603f 723 },
de3c7fe0 724 'skip': 'format 141 not served anymore',
dd27fd17 725 },
8bdd16b4 726 # DASH manifest with encrypted signature
727 {
728 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
729 'info_dict': {
730 'id': 'IB3lcPjvWLA',
731 'ext': 'm4a',
732 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
733 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
734 'duration': 244,
735 'uploader': 'AfrojackVEVO',
736 'uploader_id': 'AfrojackVEVO',
737 'upload_date': '20131011',
cc2db878 738 'abr': 129.495,
8bdd16b4 739 },
740 'params': {
741 'youtube_include_dash_manifest': True,
742 'format': '141/bestaudio[ext=m4a]',
743 },
744 },
aa79ac0c
PH
745 # Controversy video
746 {
747 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
748 'info_dict': {
749 'id': 'T4XJQO3qol8',
750 'ext': 'mp4',
556dbe7f 751 'duration': 219,
aa79ac0c 752 'upload_date': '20100909',
4fe54c12 753 'uploader': 'Amazing Atheist',
aa79ac0c 754 'uploader_id': 'TheAmazingAtheist',
ec85ded8 755 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 756 'title': 'Burning Everyone\'s Koran',
545cc85d 757 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 758 }
c522adb1 759 },
dd2d55f1 760 # Normal age-gate video (embed allowed)
c522adb1 761 {
2d3d2997 762 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
763 'info_dict': {
764 'id': 'HtVdAasjOgU',
765 'ext': 'mp4',
766 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 767 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 768 'duration': 142,
c522adb1
JMF
769 'uploader': 'The Witcher',
770 'uploader_id': 'WitcherGame',
ec85ded8 771 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 772 'upload_date': '20140605',
34952f09 773 'age_limit': 18,
c522adb1
JMF
774 },
775 },
8bdd16b4 776 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
777 # YouTube Red ad is not captured for creator
778 {
779 'url': '__2ABJjxzNo',
780 'info_dict': {
781 'id': '__2ABJjxzNo',
782 'ext': 'mp4',
783 'duration': 266,
784 'upload_date': '20100430',
785 'uploader_id': 'deadmau5',
786 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 787 'creator': 'deadmau5',
788 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 789 'uploader': 'deadmau5',
790 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 791 'alt_title': 'Some Chords',
8bdd16b4 792 },
793 'expected_warnings': [
794 'DASH manifest missing',
795 ]
796 },
067aa17e 797 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
798 {
799 'url': 'lqQg6PlCWgI',
800 'info_dict': {
801 'id': 'lqQg6PlCWgI',
802 'ext': 'mp4',
556dbe7f 803 'duration': 6085,
90227264 804 'upload_date': '20150827',
cbe2bd91 805 'uploader_id': 'olympic',
ec85ded8 806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 807 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 808 'uploader': 'Olympic',
cbe2bd91
PH
809 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
810 },
811 'params': {
812 'skip_download': 'requires avconv',
e52a40ab 813 }
cbe2bd91 814 },
6271f1ca
PH
815 # Non-square pixels
816 {
817 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
818 'info_dict': {
819 'id': '_b-2C3KPAM0',
820 'ext': 'mp4',
821 'stretched_ratio': 16 / 9.,
556dbe7f 822 'duration': 85,
6271f1ca
PH
823 'upload_date': '20110310',
824 'uploader_id': 'AllenMeow',
ec85ded8 825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 826 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 827 'uploader': '孫ᄋᄅ',
6271f1ca
PH
828 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
829 },
06b491eb
S
830 },
831 # url_encoded_fmt_stream_map is empty string
832 {
833 'url': 'qEJwOuvDf7I',
834 'info_dict': {
835 'id': 'qEJwOuvDf7I',
f57b7835 836 'ext': 'webm',
06b491eb
S
837 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
838 'description': '',
839 'upload_date': '20150404',
840 'uploader_id': 'spbelect',
841 'uploader': 'Наблюдатели Петербурга',
842 },
843 'params': {
844 'skip_download': 'requires avconv',
e323cf3f
S
845 },
846 'skip': 'This live event has ended.',
06b491eb 847 },
067aa17e 848 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
849 {
850 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
851 'info_dict': {
852 'id': 'FIl7x6_3R5Y',
eb6793ba 853 'ext': 'webm',
da77d856
S
854 'title': 'md5:7b81415841e02ecd4313668cde88737a',
855 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 856 'duration': 220,
da77d856
S
857 'upload_date': '20150625',
858 'uploader_id': 'dorappi2000',
ec85ded8 859 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 860 'uploader': 'dorappi2000',
eb6793ba 861 'formats': 'mincount:31',
da77d856 862 },
eb6793ba 863 'skip': 'not actual anymore',
2ee8f5d8 864 },
8a1a26ce
YCH
865 # DASH manifest with segment_list
866 {
867 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
868 'md5': '8ce563a1d667b599d21064e982ab9e31',
869 'info_dict': {
870 'id': 'CsmdDsKjzN8',
871 'ext': 'mp4',
17ee98e1 872 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
873 'uploader': 'Airtek',
874 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
875 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
876 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
877 },
878 'params': {
879 'youtube_include_dash_manifest': True,
880 'format': '135', # bestvideo
be49068d
S
881 },
882 'skip': 'This live event has ended.',
2ee8f5d8 883 },
cf7e015f
S
884 {
885 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 886 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 887 'info_dict': {
545cc85d 888 'id': 'jvGDaLqkpTg',
889 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
890 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
891 },
892 'playlist': [{
893 'info_dict': {
545cc85d 894 'id': 'jvGDaLqkpTg',
cf7e015f 895 'ext': 'mp4',
545cc85d 896 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
897 'description': 'md5:e03b909557865076822aa169218d6a5d',
898 'duration': 10643,
899 'upload_date': '20161111',
900 'uploader': 'Team PGP',
901 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
902 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
903 },
904 }, {
905 'info_dict': {
545cc85d 906 'id': '3AKt1R1aDnw',
cf7e015f 907 'ext': 'mp4',
545cc85d 908 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
909 'description': 'md5:e03b909557865076822aa169218d6a5d',
910 'duration': 10991,
911 'upload_date': '20161111',
912 'uploader': 'Team PGP',
913 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
914 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
915 },
916 }, {
917 'info_dict': {
545cc85d 918 'id': 'RtAMM00gpVc',
cf7e015f 919 'ext': 'mp4',
545cc85d 920 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
921 'description': 'md5:e03b909557865076822aa169218d6a5d',
922 'duration': 10995,
923 'upload_date': '20161111',
924 'uploader': 'Team PGP',
925 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
926 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
927 },
928 }, {
929 'info_dict': {
545cc85d 930 'id': '6N2fdlP3C5U',
cf7e015f 931 'ext': 'mp4',
545cc85d 932 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
933 'description': 'md5:e03b909557865076822aa169218d6a5d',
934 'duration': 10990,
935 'upload_date': '20161111',
936 'uploader': 'Team PGP',
937 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
938 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
939 },
940 }],
941 'params': {
942 'skip_download': True,
943 },
cbaed4bb 944 },
f9f49d87 945 {
067aa17e 946 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
947 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
948 'info_dict': {
949 'id': 'gVfLd0zydlo',
950 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
951 },
952 'playlist_count': 2,
be49068d 953 'skip': 'Not multifeed anymore',
f9f49d87 954 },
cbaed4bb 955 {
2d3d2997 956 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 957 'only_matching': True,
0e49d9a6 958 },
6d4fc66b 959 {
2d3d2997 960 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
961 'only_matching': True,
962 },
0e49d9a6 963 {
067aa17e 964 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 965 # Also tests cut-off URL expansion in video description (see
067aa17e
S
966 # https://github.com/ytdl-org/youtube-dl/issues/1892,
967 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
968 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
969 'info_dict': {
970 'id': 'lsguqyKfVQg',
971 'ext': 'mp4',
972 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 973 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 974 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 975 'duration': 133,
0e49d9a6
LL
976 'upload_date': '20151119',
977 'uploader_id': 'IronSoulElf',
ec85ded8 978 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 979 'uploader': 'IronSoulElf',
eb6793ba
S
980 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
981 'track': 'Dark Walk - Position Music',
982 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 983 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
984 },
985 'params': {
986 'skip_download': True,
987 },
988 },
61f92af1 989 {
067aa17e 990 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
991 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
992 'only_matching': True,
993 },
313dfc45
LL
994 {
995 # Video with yt:stretch=17:0
996 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
997 'info_dict': {
998 'id': 'Q39EVAstoRM',
999 'ext': 'mp4',
1000 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1001 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1002 'upload_date': '20151107',
1003 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1004 'uploader': 'CH GAMER DROID',
1005 },
1006 'params': {
1007 'skip_download': True,
1008 },
be49068d 1009 'skip': 'This video does not exist.',
313dfc45 1010 },
7caf9830
S
1011 {
1012 # Video licensed under Creative Commons
1013 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1014 'info_dict': {
1015 'id': 'M4gD1WSo5mA',
1016 'ext': 'mp4',
1017 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1018 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1019 'duration': 721,
7caf9830
S
1020 'upload_date': '20150127',
1021 'uploader_id': 'BerkmanCenter',
ec85ded8 1022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1023 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1024 'license': 'Creative Commons Attribution license (reuse allowed)',
1025 },
1026 'params': {
1027 'skip_download': True,
1028 },
1029 },
fd050249
S
1030 {
1031 # Channel-like uploader_url
1032 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1033 'info_dict': {
1034 'id': 'eQcmzGIKrzg',
1035 'ext': 'mp4',
1036 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1037 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1038 'duration': 4060,
fd050249 1039 'upload_date': '20151119',
eb6793ba 1040 'uploader': 'Bernie Sanders',
fd050249 1041 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1042 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1043 'license': 'Creative Commons Attribution license (reuse allowed)',
1044 },
1045 'params': {
1046 'skip_download': True,
1047 },
1048 },
040ac686
S
1049 {
1050 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1051 'only_matching': True,
7f29cf54
S
1052 },
1053 {
067aa17e 1054 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1055 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1056 'only_matching': True,
6496ccb4
S
1057 },
1058 {
1059 # Rental video preview
1060 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1061 'info_dict': {
1062 'id': 'uGpuVWrhIzE',
1063 'ext': 'mp4',
1064 'title': 'Piku - Trailer',
1065 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1066 'upload_date': '20150811',
1067 'uploader': 'FlixMatrix',
1068 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1069 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1070 'license': 'Standard YouTube License',
1071 },
1072 'params': {
1073 'skip_download': True,
1074 },
eb6793ba 1075 'skip': 'This video is not available.',
022a5d66 1076 },
12afdc2a
S
1077 {
1078 # YouTube Red video with episode data
1079 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1080 'info_dict': {
1081 'id': 'iqKdEhx-dD4',
1082 'ext': 'mp4',
1083 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1084 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1085 'duration': 2085,
12afdc2a
S
1086 'upload_date': '20170118',
1087 'uploader': 'Vsauce',
1088 'uploader_id': 'Vsauce',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1090 'series': 'Mind Field',
1091 'season_number': 1,
1092 'episode_number': 1,
1093 },
1094 'params': {
1095 'skip_download': True,
1096 },
1097 'expected_warnings': [
1098 'Skipping DASH manifest',
1099 ],
1100 },
c7121fa7
S
1101 {
1102 # The following content has been identified by the YouTube community
1103 # as inappropriate or offensive to some audiences.
1104 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1105 'info_dict': {
1106 'id': '6SJNVb0GnPI',
1107 'ext': 'mp4',
1108 'title': 'Race Differences in Intelligence',
1109 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1110 'duration': 965,
1111 'upload_date': '20140124',
1112 'uploader': 'New Century Foundation',
1113 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1115 },
1116 'params': {
1117 'skip_download': True,
1118 },
545cc85d 1119 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1120 },
022a5d66
S
1121 {
1122 # itag 212
1123 'url': '1t24XAntNCY',
1124 'only_matching': True,
fd5c4aab
S
1125 },
1126 {
1127 # geo restricted to JP
1128 'url': 'sJL6WA-aGkQ',
1129 'only_matching': True,
1130 },
cd5a74a2
S
1131 {
1132 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1133 'only_matching': True,
1134 },
bc2ca1bb 1135 {
1136 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1137 'only_matching': True,
1138 },
1139 {
1140 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1141 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1142 'only_matching': True,
1143 },
825cd268
RA
1144 {
1145 # DRM protected
1146 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1147 'only_matching': True,
4fe54c12
S
1148 },
1149 {
1150 # Video with unsupported adaptive stream type formats
1151 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1152 'info_dict': {
1153 'id': 'Z4Vy8R84T1U',
1154 'ext': 'mp4',
1155 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1156 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1157 'duration': 433,
1158 'upload_date': '20130923',
1159 'uploader': 'Amelia Putri Harwita',
1160 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1161 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1162 'formats': 'maxcount:10',
1163 },
1164 'params': {
1165 'skip_download': True,
1166 'youtube_include_dash_manifest': False,
1167 },
5429d6a9 1168 'skip': 'not actual anymore',
5caabd3c 1169 },
1170 {
822b9d9c 1171 # Youtube Music Auto-generated description
5caabd3c 1172 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1173 'info_dict': {
1174 'id': 'MgNrAu2pzNs',
1175 'ext': 'mp4',
1176 'title': 'Voyeur Girl',
1177 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1178 'upload_date': '20190312',
5429d6a9
S
1179 'uploader': 'Stephen - Topic',
1180 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1181 'artist': 'Stephen',
1182 'track': 'Voyeur Girl',
1183 'album': 'it\'s too much love to know my dear',
1184 'release_date': '20190313',
1185 'release_year': 2019,
1186 },
1187 'params': {
1188 'skip_download': True,
1189 },
1190 },
66b48727
RA
1191 {
1192 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1193 'only_matching': True,
1194 },
011e75e6
S
1195 {
1196 # invalid -> valid video id redirection
1197 'url': 'DJztXj2GPfl',
1198 'info_dict': {
1199 'id': 'DJztXj2GPfk',
1200 'ext': 'mp4',
1201 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1202 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1203 'upload_date': '20090125',
1204 'uploader': 'Prochorowka',
1205 'uploader_id': 'Prochorowka',
1206 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1207 'artist': 'Panjabi MC',
1208 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1209 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1210 },
1211 'params': {
1212 'skip_download': True,
1213 },
545cc85d 1214 'skip': 'Video unavailable',
ea74e00b
DP
1215 },
1216 {
1217 # empty description results in an empty string
1218 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1219 'info_dict': {
1220 'id': 'x41yOUIvK2k',
1221 'ext': 'mp4',
1222 'title': 'IMG 3456',
1223 'description': '',
1224 'upload_date': '20170613',
1225 'uploader_id': 'ElevageOrVert',
1226 'uploader': 'ElevageOrVert',
1227 },
1228 'params': {
1229 'skip_download': True,
1230 },
1231 },
a0566bbf 1232 {
29f7c58a 1233 # with '};' inside yt initial data (see [1])
1234 # see [2] for an example with '};' inside ytInitialPlayerResponse
1235 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1236 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1237 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1238 'info_dict': {
1239 'id': 'CHqg6qOn4no',
1240 'ext': 'mp4',
1241 'title': 'Part 77 Sort a list of simple types in c#',
1242 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1243 'upload_date': '20130831',
1244 'uploader_id': 'kudvenkat',
1245 'uploader': 'kudvenkat',
1246 },
1247 'params': {
1248 'skip_download': True,
1249 },
1250 },
29f7c58a 1251 {
1252 # another example of '};' in ytInitialData
1253 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1254 'only_matching': True,
1255 },
1256 {
1257 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1258 'only_matching': True,
1259 },
545cc85d 1260 {
cc2db878 1261 # https://github.com/ytdl-org/youtube-dl/pull/28094
1262 'url': 'OtqTfy26tG0',
1263 'info_dict': {
1264 'id': 'OtqTfy26tG0',
1265 'ext': 'mp4',
1266 'title': 'Burn Out',
1267 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1268 'upload_date': '20141120',
1269 'uploader': 'The Cinematic Orchestra - Topic',
1270 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1271 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1272 'artist': 'The Cinematic Orchestra',
1273 'track': 'Burn Out',
1274 'album': 'Every Day',
1275 'release_data': None,
1276 'release_year': None,
1277 },
1278 'params': {
1279 'skip_download': True,
1280 },
545cc85d 1281 },
bc2ca1bb 1282 {
1283 # controversial video, only works with bpctr when authenticated with cookies
1284 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1285 'only_matching': True,
1286 },
f7ad7160 1287 {
1288 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1289 'url': 'cBvYw8_A0vQ',
1290 'info_dict': {
1291 'id': 'cBvYw8_A0vQ',
1292 'ext': 'mp4',
1293 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1294 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1295 'upload_date': '20201120',
1296 'uploader': 'Walk around Japan',
1297 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1299 },
1300 'params': {
1301 'skip_download': True,
1302 },
1303 },
2eb88d95
PH
1304 ]
1305
e0df6211
PH
1306 def __init__(self, *args, **kwargs):
1307 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1308 self._code_cache = {}
83799698 1309 self._player_cache = {}
e0df6211 1310
60064c53
PH
1311 def _signature_cache_id(self, example_sig):
1312 """ Return a string representation of a signature """
78caa52a 1313 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1314
e40c758c
S
1315 @classmethod
1316 def _extract_player_info(cls, player_url):
1317 for player_re in cls._PLAYER_INFO_RE:
1318 id_m = re.search(player_re, player_url)
1319 if id_m:
1320 break
1321 else:
c081b35c 1322 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1323 return id_m.group('id')
e40c758c
S
1324
1325 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1326 player_id = self._extract_player_info(player_url)
e0df6211 1327
c4417ddb 1328 # Read from filesystem cache
545cc85d 1329 func_id = 'js_%s_%s' % (
1330 player_id, self._signature_cache_id(example_sig))
c4417ddb 1331 assert os.path.basename(func_id) == func_id
a0e07d31 1332
69ea8ca4 1333 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1334 if cache_spec is not None:
78caa52a 1335 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1336
545cc85d 1337 if player_id not in self._code_cache:
1338 self._code_cache[player_id] = self._download_webpage(
e0df6211 1339 player_url, video_id,
545cc85d 1340 note='Downloading player ' + player_id,
69ea8ca4 1341 errnote='Download of %s failed' % player_url)
545cc85d 1342 code = self._code_cache[player_id]
1343 res = self._parse_sig_js(code)
e0df6211 1344
785521bf
PH
1345 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1346 cache_res = res(test_string)
1347 cache_spec = [ord(c) for c in cache_res]
83799698 1348
69ea8ca4 1349 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1350 return res
1351
60064c53 1352 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1353 def gen_sig_code(idxs):
1354 def _genslice(start, end, step):
78caa52a 1355 starts = '' if start == 0 else str(start)
8bcc8756 1356 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1357 steps = '' if step == 1 else (':%d' % step)
78caa52a 1358 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1359
1360 step = None
7af808a5
PH
1361 # Quelch pyflakes warnings - start will be set when step is set
1362 start = '(Never used)'
edf3e38e
PH
1363 for i, prev in zip(idxs[1:], idxs[:-1]):
1364 if step is not None:
1365 if i - prev == step:
1366 continue
1367 yield _genslice(start, prev, step)
1368 step = None
1369 continue
1370 if i - prev in [-1, 1]:
1371 step = i - prev
1372 start = prev
1373 continue
1374 else:
78caa52a 1375 yield 's[%d]' % prev
edf3e38e 1376 if step is None:
78caa52a 1377 yield 's[%d]' % i
edf3e38e
PH
1378 else:
1379 yield _genslice(start, i, step)
1380
78caa52a 1381 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1382 cache_res = func(test_string)
edf3e38e 1383 cache_spec = [ord(c) for c in cache_res]
78caa52a 1384 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1385 signature_id_tuple = '(%s)' % (
1386 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1387 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1388 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1389 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1390
e0df6211
PH
1391 def _parse_sig_js(self, jscode):
1392 funcname = self._search_regex(
abefc03f
S
1393 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1394 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1395 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1396 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1397 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1398 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1399 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1400 # Obsolete patterns
1401 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1402 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1403 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1404 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1405 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1406 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1409 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1410
1411 jsi = JSInterpreter(jscode)
1412 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1413 return lambda s: initial_function([s])
1414
545cc85d 1415 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1416 """Turn the encrypted s field into a working signature"""
6b37f0be 1417
c8bf86d5 1418 if player_url is None:
69ea8ca4 1419 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1420
69ea8ca4 1421 if player_url.startswith('//'):
78caa52a 1422 player_url = 'https:' + player_url
3c90cc8b
S
1423 elif not re.match(r'https?://', player_url):
1424 player_url = compat_urlparse.urljoin(
1425 'https://www.youtube.com', player_url)
c8bf86d5 1426 try:
62af3a0e 1427 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1428 if player_id not in self._player_cache:
1429 func = self._extract_signature_function(
60064c53 1430 video_id, player_url, s
c8bf86d5
PH
1431 )
1432 self._player_cache[player_id] = func
1433 func = self._player_cache[player_id]
1434 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1435 self._print_sig_code(func, s)
c8bf86d5
PH
1436 return func(s)
1437 except Exception as e:
1438 tb = traceback.format_exc()
1439 raise ExtractorError(
78caa52a 1440 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1441
545cc85d 1442 def _mark_watched(self, video_id, player_response):
21c340b8
S
1443 playback_url = url_or_none(try_get(
1444 player_response,
545cc85d 1445 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1446 if not playback_url:
1447 return
1448 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1449 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1450
1451 # cpn generation algorithm is reverse engineered from base.js.
1452 # In fact it works even with dummy cpn.
1453 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1454 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1455
1456 qs.update({
1457 'ver': ['2'],
1458 'cpn': [cpn],
1459 })
1460 playback_url = compat_urlparse.urlunparse(
15707c7e 1461 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1462
1463 self._download_webpage(
1464 playback_url, video_id, 'Marking watched',
1465 'Unable to mark watched', fatal=False)
1466
66c9fa36
S
1467 @staticmethod
1468 def _extract_urls(webpage):
1469 # Embedded YouTube player
1470 entries = [
1471 unescapeHTML(mobj.group('url'))
1472 for mobj in re.finditer(r'''(?x)
1473 (?:
1474 <iframe[^>]+?src=|
1475 data-video-url=|
1476 <embed[^>]+?src=|
1477 embedSWF\(?:\s*|
1478 <object[^>]+data=|
1479 new\s+SWFObject\(
1480 )
1481 (["\'])
1482 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1483 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1484 \1''', webpage)]
1485
1486 # lazyYT YouTube embed
1487 entries.extend(list(map(
1488 unescapeHTML,
1489 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1490
1491 # Wordpress "YouTube Video Importer" plugin
1492 matches = re.findall(r'''(?x)<div[^>]+
1493 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1494 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1495 entries.extend(m[-1] for m in matches)
1496
1497 return entries
1498
1499 @staticmethod
1500 def _extract_url(webpage):
1501 urls = YoutubeIE._extract_urls(webpage)
1502 return urls[0] if urls else None
1503
97665381
PH
1504 @classmethod
1505 def extract_id(cls, url):
1506 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1507 if mobj is None:
69ea8ca4 1508 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1509 video_id = mobj.group(2)
1510 return video_id
1511
545cc85d 1512 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1513 chapters_list = try_get(
8bdd16b4 1514 data,
84213ea8
S
1515 lambda x: x['playerOverlays']
1516 ['playerOverlayRenderer']
1517 ['decoratedPlayerBarRenderer']
1518 ['decoratedPlayerBarRenderer']
1519 ['playerBar']
1520 ['chapteredPlayerBarRenderer']
1521 ['chapters'],
1522 list)
1523 if not chapters_list:
1524 return
1525
1526 def chapter_time(chapter):
1527 return float_or_none(
1528 try_get(
1529 chapter,
1530 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1531 int),
1532 scale=1000)
1533 chapters = []
1534 for next_num, chapter in enumerate(chapters_list, start=1):
1535 start_time = chapter_time(chapter)
1536 if start_time is None:
1537 continue
1538 end_time = (chapter_time(chapters_list[next_num])
1539 if next_num < len(chapters_list) else duration)
1540 if end_time is None:
1541 continue
1542 title = try_get(
1543 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1544 compat_str)
1545 chapters.append({
1546 'start_time': start_time,
1547 'end_time': end_time,
1548 'title': title,
1549 })
1550 return chapters
1551
545cc85d 1552 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1553 return self._parse_json(self._search_regex(
1554 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1555 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1556
d92f5d5a 1557 @staticmethod
1558 def parse_time_text(time_text):
1559 """
1560 Parse the comment time text
1561 time_text is in the format 'X units ago (edited)'
1562 """
1563 time_text_split = time_text.split(' ')
1564 if len(time_text_split) >= 3:
1565 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1566
a1c5d2ca
M
1567 @staticmethod
1568 def _join_text_entries(runs):
1569 text = None
1570 for run in runs:
1571 if not isinstance(run, dict):
1572 continue
1573 sub_text = try_get(run, lambda x: x['text'], compat_str)
1574 if sub_text:
1575 if not text:
1576 text = sub_text
1577 continue
1578 text += sub_text
1579 return text
1580
1581 def _extract_comment(self, comment_renderer, parent=None):
1582 comment_id = comment_renderer.get('commentId')
1583 if not comment_id:
1584 return
1585 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1586 text = self._join_text_entries(comment_text_runs) or ''
1587 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1588 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1589 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1590 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1591 author_id = try_get(comment_renderer,
1592 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1593 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1594 lambda x: x['likeCount']), compat_str)) or 0
1595 author_thumbnail = try_get(comment_renderer,
1596 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1597
1598 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1599 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1600 return {
1601 'id': comment_id,
1602 'text': text,
d92f5d5a 1603 'timestamp': timestamp,
a1c5d2ca
M
1604 'time_text': time_text,
1605 'like_count': votes,
1606 'is_favorited': is_liked,
1607 'author': author,
1608 'author_id': author_id,
1609 'author_thumbnail': author_thumbnail,
1610 'author_is_uploader': author_is_uploader,
1611 'parent': parent or 'root'
1612 }
1613
1614 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1615 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1616
1617 def extract_thread(parent_renderer):
1618 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1619 if not parent:
1620 comment_counts[2] = 0
1621 for content in contents:
1622 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1623 comment_renderer = try_get(
1624 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1625 content, (lambda x: x['commentRenderer'], dict))
1626
1627 if not comment_renderer:
1628 continue
1629 comment = self._extract_comment(comment_renderer, parent)
1630 if not comment:
1631 continue
1632 comment_counts[0] += 1
1633 yield comment
1634 # Attempt to get the replies
1635 comment_replies_renderer = try_get(
1636 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1637
1638 if comment_replies_renderer:
1639 comment_counts[2] += 1
1640 comment_entries_iter = self._comment_entries(
f4f751af 1641 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1642 parent=comment.get('id'), session_token_list=session_token_list,
1643 comment_counts=comment_counts)
1644
1645 for reply_comment in comment_entries_iter:
1646 yield reply_comment
1647
1648 if not comment_counts:
1649 # comment so far, est. total comments, current comment thread #
1650 comment_counts = [0, 0, 0]
a1c5d2ca
M
1651
1652 # TODO: Generalize the download code with TabIE
f4f751af 1653 context = self._extract_context(ytcfg)
1654 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1655 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1656 first_continuation = False
1657 if parent is None:
1658 first_continuation = True
1659
1660 for page_num in itertools.count(0):
1661 if not continuation:
1662 break
f4f751af 1663 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1664 retries = self._downloader.params.get('extractor_retries', 3)
1665 count = -1
1666 last_error = None
1667
1668 while count < retries:
1669 count += 1
1670 if last_error:
1671 self.report_warning('%s. Retrying ...' % last_error)
1672 try:
1673 query = {
1674 'ctoken': continuation['ctoken'],
1675 'pbj': 1,
1676 'type': 'next',
1677 }
1678 if parent:
1679 query['action_get_comment_replies'] = 1
1680 else:
1681 query['action_get_comments'] = 1
1682
1683 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1684 if page_num == 0:
1685 if first_continuation:
d92f5d5a 1686 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1687 else:
d92f5d5a 1688 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1689 else:
d92f5d5a 1690 note_prefix = '%sDownloading comment%s page %d %s' % (
1691 ' ' if parent else '',
a1c5d2ca
M
1692 ' replies' if parent else '',
1693 page_num,
1694 comment_prog_str)
1695
1696 browse = self._download_json(
1697 'https://www.youtube.com/comment_service_ajax', None,
1698 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1699 headers=headers, query=query,
1700 data=urlencode_postdata({
1701 'session_token': session_token_list[0]
1702 }))
1703 except ExtractorError as e:
1704 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1705 if e.cause.code == 413:
d92f5d5a 1706 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1707 return
1708 # Downloading page may result in intermittent 5xx HTTP error
1709 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1710 last_error = 'HTTP Error %s' % e.cause.code
1711 if e.cause.code == 404:
d92f5d5a 1712 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1713 if count < retries:
1714 continue
1715 raise
1716 else:
1717 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1718 if session_token:
1719 session_token_list[0] = session_token
1720
1721 response = try_get(browse,
1722 (lambda x: x['response'],
1723 lambda x: x[1]['response'])) or {}
1724
1725 if response.get('continuationContents'):
1726 break
1727
1728 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1729 if browse.get('reload'):
d92f5d5a 1730 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1731
1732 # TODO: not tested, merged from old extractor
1733 err_msg = browse.get('externalErrorMessage')
1734 if err_msg:
1735 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1736
1737 # Youtube sometimes sends incomplete data
1738 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1739 last_error = 'Incomplete data received'
1740 if count >= retries:
1741 self._downloader.report_error(last_error)
1742
1743 if not response:
1744 break
f4f751af 1745 visitor_data = try_get(
1746 response,
1747 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1748 compat_str) or visitor_data
a1c5d2ca
M
1749
1750 known_continuation_renderers = {
1751 'itemSectionContinuation': extract_thread,
1752 'commentRepliesContinuation': extract_thread
1753 }
1754
1755 # extract next root continuation from the results
1756 continuation_contents = try_get(
1757 response, lambda x: x['continuationContents'], dict) or {}
1758
1759 for key, value in continuation_contents.items():
1760 if key not in known_continuation_renderers:
1761 continue
1762 continuation_renderer = value
1763
1764 if first_continuation:
1765 first_continuation = False
1766 expected_comment_count = try_get(
1767 continuation_renderer,
1768 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1769 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1770 compat_str)
1771
1772 if expected_comment_count:
1773 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1774 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1775 yield comment_counts[1]
1776
1777 # TODO: cli arg.
1778 # 1/True for newest, 0/False for popular (default)
1779 comment_sort_index = int(True)
1780 sort_continuation_renderer = try_get(
1781 continuation_renderer,
1782 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1783 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1784 # If this fails, the initial continuation page
1785 # starts off with popular anyways.
1786 if sort_continuation_renderer:
1787 continuation = YoutubeTabIE._build_continuation_query(
1788 continuation=sort_continuation_renderer.get('continuation'),
1789 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1790 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1791 break
1792
1793 for entry in known_continuation_renderers[key](continuation_renderer):
1794 yield entry
1795
1796 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1797 break
1798
1799 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1800 """Entry for comment extraction"""
1801 comments = []
1802 known_entry_comment_renderers = (
1803 'itemSectionRenderer',
1804 )
1805 estimated_total = 0
1806 for entry in contents:
1807 for key, renderer in entry.items():
1808 if key not in known_entry_comment_renderers:
1809 continue
1810
1811 comment_iter = self._comment_entries(
1812 renderer,
1813 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1814 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1815 ytcfg=ytcfg,
a1c5d2ca
M
1816 session_token_list=[xsrf_token])
1817
1818 for comment in comment_iter:
1819 if isinstance(comment, int):
1820 estimated_total = comment
1821 continue
1822 comments.append(comment)
1823 break
d92f5d5a 1824 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1825 return {
1826 'comments': comments,
1827 'comment_count': len(comments),
1828 }
1829
c5e8d7af 1830 def _real_extract(self, url):
cf7e015f 1831 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1832 video_id = self._match_id(url)
1833 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1834 webpage_url = base_url + 'watch?v=' + video_id
1835 webpage = self._download_webpage(
cce889b9 1836 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1837
1838 player_response = None
1839 if webpage:
1840 player_response = self._extract_yt_initial_variable(
1841 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1842 video_id, 'initial player response')
f4f751af 1843
1844 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1845 if not player_response:
1846 player_response = self._call_api(
f4f751af 1847 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1848
1849 playability_status = player_response.get('playabilityStatus') or {}
1850 if playability_status.get('reason') == 'Sign in to confirm your age':
1851 pr = self._parse_json(try_get(compat_parse_qs(
1852 self._download_webpage(
1853 base_url + 'get_video_info', video_id,
1854 'Refetching age-gated info webpage',
1855 'unable to download video info webpage', query={
1856 'video_id': video_id,
7c60c33e 1857 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1858 }, fatal=False)),
1859 lambda x: x['player_response'][0],
1860 compat_str) or '{}', video_id)
1861 if pr:
1862 player_response = pr
1863
1864 trailer_video_id = try_get(
1865 playability_status,
1866 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1867 compat_str)
1868 if trailer_video_id:
1869 return self.url_result(
1870 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1871
545cc85d 1872 def get_text(x):
1873 if not x:
c2d125d9 1874 return
f7ad7160 1875 text = x.get('simpleText')
1876 if text and isinstance(text, compat_str):
1877 return text
1878 runs = x.get('runs')
1879 if not isinstance(runs, list):
1880 return
1881 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1882
545cc85d 1883 search_meta = (
1884 lambda x: self._html_search_meta(x, webpage, default=None)) \
1885 if webpage else lambda x: None
dbdaaa23 1886
545cc85d 1887 video_details = player_response.get('videoDetails') or {}
37357d21 1888 microformat = try_get(
545cc85d 1889 player_response,
1890 lambda x: x['microformat']['playerMicroformatRenderer'],
1891 dict) or {}
1892 video_title = video_details.get('title') \
1893 or get_text(microformat.get('title')) \
1894 or search_meta(['og:title', 'twitter:title', 'title'])
1895 video_description = video_details.get('shortDescription')
cf7e015f 1896
8fe10494 1897 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1898 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1899 multifeed_metadata_list = try_get(
1900 player_response,
1901 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1902 compat_str)
8fe10494
S
1903 if multifeed_metadata_list:
1904 entries = []
1905 feed_ids = []
1906 for feed in multifeed_metadata_list.split(','):
1907 # Unquote should take place before split on comma (,) since textual
1908 # fields may contain comma as well (see
067aa17e 1909 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1910 feed_data = compat_parse_qs(
1911 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1912
1913 def feed_entry(name):
545cc85d 1914 return try_get(
1915 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1916
1917 feed_id = feed_entry('id')
1918 if not feed_id:
1919 continue
1920 feed_title = feed_entry('title')
1921 title = video_title
1922 if feed_title:
1923 title += ' (%s)' % feed_title
8fe10494
S
1924 entries.append({
1925 '_type': 'url_transparent',
1926 'ie_key': 'Youtube',
1927 'url': smuggle_url(
545cc85d 1928 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1929 {'force_singlefeed': True}),
6b09401b 1930 'title': title,
8fe10494 1931 })
6b09401b 1932 feed_ids.append(feed_id)
8fe10494
S
1933 self.to_screen(
1934 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1935 % (', '.join(feed_ids), video_id))
545cc85d 1936 return self.playlist_result(
1937 entries, video_id, video_title, video_description)
8fe10494
S
1938 else:
1939 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1940
545cc85d 1941 formats = []
1942 itags = []
cc2db878 1943 itag_qualities = {}
545cc85d 1944 player_url = None
dca3ff4a 1945 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1946 streaming_data = player_response.get('streamingData') or {}
1947 streaming_formats = streaming_data.get('formats') or []
1948 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1949 for fmt in streaming_formats:
1950 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1951 continue
321bf820 1952
cc2db878 1953 itag = str_or_none(fmt.get('itag'))
1954 quality = fmt.get('quality')
1955 if itag and quality:
1956 itag_qualities[itag] = quality
1957 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1958 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1959 # number of fragment that would subsequently requested with (`&sq=N`)
1960 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1961 continue
1962
545cc85d 1963 fmt_url = fmt.get('url')
1964 if not fmt_url:
1965 sc = compat_parse_qs(fmt.get('signatureCipher'))
1966 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1967 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1968 if not (sc and fmt_url and encrypted_sig):
1969 continue
1970 if not player_url:
1971 if not webpage:
1972 continue
1973 player_url = self._search_regex(
1974 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1975 webpage, 'player URL', fatal=False)
1976 if not player_url:
201e9eaa 1977 continue
545cc85d 1978 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1979 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1980 fmt_url += '&' + sp + '=' + signature
1981
545cc85d 1982 if itag:
1983 itags.append(itag)
cc2db878 1984 tbr = float_or_none(
1985 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1986 dct = {
1987 'asr': int_or_none(fmt.get('audioSampleRate')),
1988 'filesize': int_or_none(fmt.get('contentLength')),
1989 'format_id': itag,
1990 'format_note': fmt.get('qualityLabel') or quality,
1991 'fps': int_or_none(fmt.get('fps')),
1992 'height': int_or_none(fmt.get('height')),
dca3ff4a 1993 'quality': q(quality),
cc2db878 1994 'tbr': tbr,
545cc85d 1995 'url': fmt_url,
1996 'width': fmt.get('width'),
1997 }
1998 mimetype = fmt.get('mimeType')
1999 if mimetype:
2000 mobj = re.match(
2001 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2002 if mobj:
2003 dct['ext'] = mimetype2ext(mobj.group(1))
2004 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2005 no_audio = dct.get('acodec') == 'none'
2006 no_video = dct.get('vcodec') == 'none'
2007 if no_audio:
2008 dct['vbr'] = tbr
2009 if no_video:
2010 dct['abr'] = tbr
2011 if no_audio or no_video:
545cc85d 2012 dct['downloader_options'] = {
2013 # Youtube throttles chunks >~10M
2014 'http_chunk_size': 10485760,
bf1317d2 2015 }
7c60c33e 2016 if dct.get('ext'):
2017 dct['container'] = dct['ext'] + '_dash'
545cc85d 2018 formats.append(dct)
2019
2020 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2021 if hls_manifest_url:
2022 for f in self._extract_m3u8_formats(
2023 hls_manifest_url, video_id, 'mp4', fatal=False):
2024 itag = self._search_regex(
2025 r'/itag/(\d+)', f['url'], 'itag', default=None)
2026 if itag:
2027 f['format_id'] = itag
2028 formats.append(f)
2029
1418a043 2030 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2031 dash_manifest_url = streaming_data.get('dashManifestUrl')
2032 if dash_manifest_url:
545cc85d 2033 for f in self._extract_mpd_formats(
2034 dash_manifest_url, video_id, fatal=False):
cc2db878 2035 itag = f['format_id']
2036 if itag in itags:
2037 continue
dca3ff4a 2038 if itag in itag_qualities:
2039 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2040 # but kept to maintain feature parity (and code similarity) with youtube-dl
2041 # Remove if this causes any issues with sorting in future
2042 f['quality'] = q(itag_qualities[itag])
545cc85d 2043 filesize = int_or_none(self._search_regex(
2044 r'/clen/(\d+)', f.get('fragment_base_url')
2045 or f['url'], 'file size', default=None))
2046 if filesize:
2047 f['filesize'] = filesize
cc2db878 2048 formats.append(f)
bf1317d2 2049
545cc85d 2050 if not formats:
63ad4d43 2051 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 2052 raise ExtractorError(
2053 'This video is DRM protected.', expected=True)
2054 pemr = try_get(
2055 playability_status,
2056 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2057 dict) or {}
2058 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2059 subreason = pemr.get('subreason')
2060 if subreason:
2061 subreason = clean_html(get_text(subreason))
2062 if subreason == 'The uploader has not made this video available in your country.':
2063 countries = microformat.get('availableCountries')
2064 if not countries:
2065 regions_allowed = search_meta('regionsAllowed')
2066 countries = regions_allowed.split(',') if regions_allowed else None
2067 self.raise_geo_restricted(
2068 subreason, countries)
2069 reason += '\n' + subreason
2070 if reason:
2071 raise ExtractorError(reason, expected=True)
bf1317d2 2072
545cc85d 2073 self._sort_formats(formats)
bf1317d2 2074
545cc85d 2075 keywords = video_details.get('keywords') or []
2076 if not keywords and webpage:
2077 keywords = [
2078 unescapeHTML(m.group('content'))
2079 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2080 for keyword in keywords:
2081 if keyword.startswith('yt:stretch='):
46fff710 2082 stretch_ratio = map(
2083 lambda x: int_or_none(x, default=0),
2084 keyword.split('=')[1].split(':'))
2085 w, h = (list(stretch_ratio) + [0])[:2]
545cc85d 2086 if w > 0 and h > 0:
2087 ratio = w / h
2088 for f in formats:
2089 if f.get('vcodec') != 'none':
2090 f['stretched_ratio'] = ratio
6449cd80 2091
545cc85d 2092 thumbnails = []
2093 for container in (video_details, microformat):
2094 for thumbnail in (try_get(
2095 container,
2096 lambda x: x['thumbnail']['thumbnails'], list) or []):
2097 thumbnail_url = thumbnail.get('url')
2098 if not thumbnail_url:
bf1317d2 2099 continue
1988fab7 2100 # Sometimes youtube gives a wrong thumbnail URL. See:
2101 # https://github.com/yt-dlp/yt-dlp/issues/233
2102 # https://github.com/ytdl-org/youtube-dl/issues/28023
2103 if 'maxresdefault' in thumbnail_url:
2104 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2105 thumbnails.append({
2106 'height': int_or_none(thumbnail.get('height')),
2107 'url': thumbnail_url,
2108 'width': int_or_none(thumbnail.get('width')),
2109 })
2110 if thumbnails:
2111 break
a6211d23 2112 else:
545cc85d 2113 thumbnail = search_meta(['og:image', 'twitter:image'])
2114 if thumbnail:
2115 thumbnails = [{'url': thumbnail}]
2116
2117 category = microformat.get('category') or search_meta('genre')
2118 channel_id = video_details.get('channelId') \
2119 or microformat.get('externalChannelId') \
2120 or search_meta('channelId')
2121 duration = int_or_none(
2122 video_details.get('lengthSeconds')
2123 or microformat.get('lengthSeconds')) \
2124 or parse_duration(search_meta('duration'))
2125 is_live = video_details.get('isLive')
2126 owner_profile_url = microformat.get('ownerProfileUrl')
2127
2128 info = {
2129 'id': video_id,
2130 'title': self._live_title(video_title) if is_live else video_title,
2131 'formats': formats,
2132 'thumbnails': thumbnails,
2133 'description': video_description,
2134 'upload_date': unified_strdate(
2135 microformat.get('uploadDate')
2136 or search_meta('uploadDate')),
2137 'uploader': video_details['author'],
2138 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2139 'uploader_url': owner_profile_url,
2140 'channel_id': channel_id,
2141 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2142 'duration': duration,
2143 'view_count': int_or_none(
2144 video_details.get('viewCount')
2145 or microformat.get('viewCount')
2146 or search_meta('interactionCount')),
2147 'average_rating': float_or_none(video_details.get('averageRating')),
2148 'age_limit': 18 if (
2149 microformat.get('isFamilySafe') is False
2150 or search_meta('isFamilyFriendly') == 'false'
2151 or search_meta('og:restrictions:age') == '18+') else 0,
2152 'webpage_url': webpage_url,
2153 'categories': [category] if category else None,
2154 'tags': keywords,
2155 'is_live': is_live,
2156 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2157 'was_live': video_details.get('isLiveContent'),
545cc85d 2158 }
b477fc13 2159
545cc85d 2160 pctr = try_get(
2161 player_response,
2162 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2163 subtitles = {}
2164 if pctr:
2165 def process_language(container, base_url, lang_code, query):
2166 lang_subs = []
2167 for fmt in self._SUBTITLE_FORMATS:
2168 query.update({
2169 'fmt': fmt,
2170 })
2171 lang_subs.append({
2172 'ext': fmt,
2173 'url': update_url_query(base_url, query),
2174 })
2175 container[lang_code] = lang_subs
7e72694b 2176
545cc85d 2177 for caption_track in (pctr.get('captionTracks') or []):
2178 base_url = caption_track.get('baseUrl')
2179 if not base_url:
2180 continue
2181 if caption_track.get('kind') != 'asr':
2182 lang_code = caption_track.get('languageCode')
2183 if not lang_code:
2184 continue
2185 process_language(
2186 subtitles, base_url, lang_code, {})
2187 continue
2188 automatic_captions = {}
2189 for translation_language in (pctr.get('translationLanguages') or []):
2190 translation_language_code = translation_language.get('languageCode')
2191 if not translation_language_code:
2192 continue
2193 process_language(
2194 automatic_captions, base_url, translation_language_code,
2195 {'tlang': translation_language_code})
2196 info['automatic_captions'] = automatic_captions
2197 info['subtitles'] = subtitles
7e72694b 2198
545cc85d 2199 parsed_url = compat_urllib_parse_urlparse(url)
2200 for component in [parsed_url.fragment, parsed_url.query]:
2201 query = compat_parse_qs(component)
2202 for k, v in query.items():
2203 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2204 d_k += '_time'
2205 if d_k not in info and k in s_ks:
2206 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2207
2208 # Youtube Music Auto-generated description
822b9d9c 2209 if video_description:
38d70284 2210 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2211 if mobj:
822b9d9c
RA
2212 release_year = mobj.group('release_year')
2213 release_date = mobj.group('release_date')
2214 if release_date:
2215 release_date = release_date.replace('-', '')
2216 if not release_year:
545cc85d 2217 release_year = release_date[:4]
2218 info.update({
2219 'album': mobj.group('album'.strip()),
2220 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2221 'track': mobj.group('track').strip(),
2222 'release_date': release_date,
cc2db878 2223 'release_year': int_or_none(release_year),
545cc85d 2224 })
7e72694b 2225
545cc85d 2226 initial_data = None
2227 if webpage:
2228 initial_data = self._extract_yt_initial_variable(
2229 webpage, self._YT_INITIAL_DATA_RE, video_id,
2230 'yt initial data')
2231 if not initial_data:
2232 initial_data = self._call_api(
f4f751af 2233 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2234
2235 if not is_live:
2236 try:
2237 # This will error if there is no livechat
2238 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2239 info['subtitles']['live_chat'] = [{
394dcd44 2240 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2241 'video_id': video_id,
2242 'ext': 'json',
2243 'protocol': 'youtube_live_chat_replay',
2244 }]
2245 except (KeyError, IndexError, TypeError):
2246 pass
2247
2248 if initial_data:
2249 chapters = self._extract_chapters_from_json(
2250 initial_data, video_id, duration)
2251 if not chapters:
2252 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2253 contents = try_get(
2254 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2255 list)
2256 if not contents:
2257 continue
2258
2259 def chapter_time(mmlir):
2260 return parse_duration(
2261 get_text(mmlir.get('timeDescription')))
2262
2263 chapters = []
2264 for next_num, content in enumerate(contents, start=1):
2265 mmlir = content.get('macroMarkersListItemRenderer') or {}
2266 start_time = chapter_time(mmlir)
2267 end_time = chapter_time(try_get(
2268 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2269 if next_num < len(contents) else duration
2270 if start_time is None or end_time is None:
2271 continue
2272 chapters.append({
2273 'start_time': start_time,
2274 'end_time': end_time,
2275 'title': get_text(mmlir.get('title')),
2276 })
2277 if chapters:
2278 break
2279 if chapters:
2280 info['chapters'] = chapters
2281
2282 contents = try_get(
2283 initial_data,
2284 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2285 list) or []
2286 for content in contents:
2287 vpir = content.get('videoPrimaryInfoRenderer')
2288 if vpir:
2289 stl = vpir.get('superTitleLink')
2290 if stl:
2291 stl = get_text(stl)
2292 if try_get(
2293 vpir,
2294 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2295 info['location'] = stl
2296 else:
2297 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2298 if mobj:
2299 info.update({
2300 'series': mobj.group(1),
2301 'season_number': int(mobj.group(2)),
2302 'episode_number': int(mobj.group(3)),
2303 })
2304 for tlb in (try_get(
2305 vpir,
2306 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2307 list) or []):
2308 tbr = tlb.get('toggleButtonRenderer') or {}
2309 for getter, regex in [(
2310 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2311 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2312 lambda x: x['accessibility'],
2313 lambda x: x['accessibilityData']['accessibilityData'],
2314 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2315 label = (try_get(tbr, getter, dict) or {}).get('label')
2316 if label:
2317 mobj = re.match(regex, label)
2318 if mobj:
2319 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2320 break
2321 sbr_tooltip = try_get(
2322 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2323 if sbr_tooltip:
2324 like_count, dislike_count = sbr_tooltip.split(' / ')
2325 info.update({
2326 'like_count': str_to_int(like_count),
2327 'dislike_count': str_to_int(dislike_count),
2328 })
2329 vsir = content.get('videoSecondaryInfoRenderer')
2330 if vsir:
2331 info['channel'] = get_text(try_get(
2332 vsir,
2333 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2334 dict))
545cc85d 2335 rows = try_get(
2336 vsir,
2337 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2338 list) or []
2339 multiple_songs = False
2340 for row in rows:
2341 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2342 multiple_songs = True
2343 break
2344 for row in rows:
2345 mrr = row.get('metadataRowRenderer') or {}
2346 mrr_title = mrr.get('title')
2347 if not mrr_title:
2348 continue
2349 mrr_title = get_text(mrr['title'])
2350 mrr_contents_text = get_text(mrr['contents'][0])
2351 if mrr_title == 'License':
2352 info['license'] = mrr_contents_text
2353 elif not multiple_songs:
2354 if mrr_title == 'Album':
2355 info['album'] = mrr_contents_text
2356 elif mrr_title == 'Artist':
2357 info['artist'] = mrr_contents_text
2358 elif mrr_title == 'Song':
2359 info['track'] = mrr_contents_text
2360
2361 fallbacks = {
2362 'channel': 'uploader',
2363 'channel_id': 'uploader_id',
2364 'channel_url': 'uploader_url',
2365 }
2366 for to, frm in fallbacks.items():
2367 if not info.get(to):
2368 info[to] = info.get(frm)
2369
2370 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2371 v = info.get(s_k)
2372 if v:
2373 info[d_k] = v
b84071c0 2374
c224251a
M
2375 is_private = bool_or_none(video_details.get('isPrivate'))
2376 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2377 is_membersonly = None
b28f8d24 2378 is_premium = None
c224251a
M
2379 if initial_data and is_private is not None:
2380 is_membersonly = False
b28f8d24 2381 is_premium = False
c224251a
M
2382 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2383 for content in contents or []:
2384 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2385 for badge in badges or []:
2386 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2387 if label.lower() == 'members only':
2388 is_membersonly = True
2389 break
b28f8d24
M
2390 elif label.lower() == 'premium':
2391 is_premium = True
2392 break
2393 if is_membersonly or is_premium:
c224251a
M
2394 break
2395
2396 # TODO: Add this for playlists
2397 info['availability'] = self._availability(
2398 is_private=is_private,
b28f8d24 2399 needs_premium=is_premium,
c224251a
M
2400 needs_subscription=is_membersonly,
2401 needs_auth=info['age_limit'] >= 18,
2402 is_unlisted=None if is_private is None else is_unlisted)
2403
06167fbb 2404 # get xsrf for annotations or comments
2405 get_annotations = self._downloader.params.get('writeannotations', False)
2406 get_comments = self._downloader.params.get('getcomments', False)
2407 if get_annotations or get_comments:
29f7c58a 2408 xsrf_token = None
545cc85d 2409 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2410 if ytcfg:
2411 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2412 if not xsrf_token:
2413 xsrf_token = self._search_regex(
2414 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2415 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2416
2417 # annotations
06167fbb 2418 if get_annotations:
64b6a4e9
RA
2419 invideo_url = try_get(
2420 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2421 if xsrf_token and invideo_url:
29f7c58a 2422 xsrf_field_name = None
2423 if ytcfg:
2424 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2425 if not xsrf_field_name:
2426 xsrf_field_name = self._search_regex(
2427 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2428 webpage, 'xsrf field name',
29f7c58a 2429 group='xsrf_field_name', default='session_token')
8a784c74 2430 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2431 self._proto_relative_url(invideo_url),
2432 video_id, note='Downloading annotations',
2433 errnote='Unable to download video annotations', fatal=False,
2434 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2435
277d6ff5 2436 if get_comments:
a1c5d2ca 2437 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2438
545cc85d 2439 self.mark_watched(video_id, player_response)
d77ab8e2 2440
545cc85d 2441 return info
c5e8d7af 2442
5f6a1245 2443
8bdd16b4 2444class YoutubeTabIE(YoutubeBaseInfoExtractor):
2445 IE_DESC = 'YouTube.com tab'
70d5c17b 2446 _VALID_URL = r'''(?x)
2447 https?://
2448 (?:\w+\.)?
2449 (?:
2450 youtube(?:kids)?\.com|
2451 invidio\.us
2452 )/
2453 (?:
2454 (?:channel|c|user)/|
2455 (?P<not_channel>
9ba5705a 2456 feed/|hashtag/|
70d5c17b 2457 (?:playlist|watch)\?.*?\blist=
2458 )|
29f7c58a 2459 (?!(?:%s)\b) # Direct URLs
70d5c17b 2460 )
2461 (?P<id>[^/?\#&]+)
2462 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2463 IE_NAME = 'youtube:tab'
2464
81127aa5 2465 _TESTS = [{
8bdd16b4 2466 # playlists, multipage
2467 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2468 'playlist_mincount': 94,
2469 'info_dict': {
2470 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2471 'title': 'Игорь Клейнер - Playlists',
2472 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2473 'uploader': 'Игорь Клейнер',
2474 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2475 },
2476 }, {
2477 # playlists, multipage, different order
2478 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2479 'playlist_mincount': 94,
2480 'info_dict': {
2481 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2482 'title': 'Игорь Клейнер - Playlists',
2483 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2484 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2485 'uploader': 'Игорь Клейнер',
8bdd16b4 2486 },
2487 }, {
2488 # playlists, singlepage
2489 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2490 'playlist_mincount': 4,
2491 'info_dict': {
2492 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2493 'title': 'ThirstForScience - Playlists',
2494 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2495 'uploader': 'ThirstForScience',
2496 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2497 }
2498 }, {
2499 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2500 'only_matching': True,
2501 }, {
2502 # basic, single video playlist
0e30a7b9 2503 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2504 'info_dict': {
0e30a7b9 2505 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2506 'uploader': 'Sergey M.',
2507 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2508 'title': 'youtube-dl public playlist',
81127aa5 2509 },
0e30a7b9 2510 'playlist_count': 1,
9291475f 2511 }, {
8bdd16b4 2512 # empty playlist
0e30a7b9 2513 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2514 'info_dict': {
0e30a7b9 2515 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2516 'uploader': 'Sergey M.',
2517 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2518 'title': 'youtube-dl empty playlist',
9291475f
PH
2519 },
2520 'playlist_count': 0,
2521 }, {
8bdd16b4 2522 # Home tab
2523 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2524 'info_dict': {
8bdd16b4 2525 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2526 'title': 'lex will - Home',
2527 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2528 'uploader': 'lex will',
2529 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2530 },
8bdd16b4 2531 'playlist_mincount': 2,
9291475f 2532 }, {
8bdd16b4 2533 # Videos tab
2534 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2535 'info_dict': {
8bdd16b4 2536 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2537 'title': 'lex will - Videos',
2538 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2539 'uploader': 'lex will',
2540 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2541 },
8bdd16b4 2542 'playlist_mincount': 975,
9291475f 2543 }, {
8bdd16b4 2544 # Videos tab, sorted by popular
2545 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2546 'info_dict': {
8bdd16b4 2547 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2548 'title': 'lex will - Videos',
2549 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2550 'uploader': 'lex will',
2551 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2552 },
8bdd16b4 2553 'playlist_mincount': 199,
9291475f 2554 }, {
8bdd16b4 2555 # Playlists tab
2556 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2557 'info_dict': {
8bdd16b4 2558 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2559 'title': 'lex will - Playlists',
2560 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2561 'uploader': 'lex will',
2562 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2563 },
8bdd16b4 2564 'playlist_mincount': 17,
ac7553d0 2565 }, {
8bdd16b4 2566 # Community tab
2567 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2568 'info_dict': {
8bdd16b4 2569 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2570 'title': 'lex will - Community',
2571 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2572 'uploader': 'lex will',
2573 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2574 },
2575 'playlist_mincount': 18,
87dadd45 2576 }, {
8bdd16b4 2577 # Channels tab
2578 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2579 'info_dict': {
8bdd16b4 2580 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2581 'title': 'lex will - Channels',
2582 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2583 'uploader': 'lex will',
2584 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2585 },
deaec5af 2586 'playlist_mincount': 12,
6b08cdf6 2587 }, {
a0566bbf 2588 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2589 'only_matching': True,
2590 }, {
a0566bbf 2591 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2592 'only_matching': True,
2593 }, {
a0566bbf 2594 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2595 'only_matching': True,
2596 }, {
2597 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2598 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2599 'info_dict': {
2600 'title': '29C3: Not my department',
2601 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2602 'uploader': 'Christiaan008',
2603 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2604 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2605 },
2606 'playlist_count': 96,
2607 }, {
2608 'note': 'Large playlist',
2609 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2610 'info_dict': {
8bdd16b4 2611 'title': 'Uploads from Cauchemar',
2612 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2613 'uploader': 'Cauchemar',
2614 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2615 },
8bdd16b4 2616 'playlist_mincount': 1123,
2617 }, {
2618 # even larger playlist, 8832 videos
2619 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2620 'only_matching': True,
4b7df0d3
JMF
2621 }, {
2622 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2623 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2624 'info_dict': {
acf757f4
PH
2625 'title': 'Uploads from Interstellar Movie',
2626 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2627 'uploader': 'Interstellar Movie',
8bdd16b4 2628 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2629 },
481cc733 2630 'playlist_mincount': 21,
8bdd16b4 2631 }, {
2632 # https://github.com/ytdl-org/youtube-dl/issues/21844
2633 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634 'info_dict': {
2635 'title': 'Data Analysis with Dr Mike Pound',
2636 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2637 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2638 'uploader': 'Computerphile',
deaec5af 2639 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2640 },
2641 'playlist_mincount': 11,
2642 }, {
a0566bbf 2643 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2644 'only_matching': True,
dacb3a86
S
2645 }, {
2646 # Playlist URL that does not actually serve a playlist
2647 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2648 'info_dict': {
2649 'id': 'FqZTN594JQw',
2650 'ext': 'webm',
2651 'title': "Smiley's People 01 detective, Adventure Series, Action",
2652 'uploader': 'STREEM',
2653 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2655 'upload_date': '20150526',
2656 'license': 'Standard YouTube License',
2657 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2658 'categories': ['People & Blogs'],
2659 'tags': list,
dbdaaa23 2660 'view_count': int,
dacb3a86
S
2661 'like_count': int,
2662 'dislike_count': int,
2663 },
2664 'params': {
2665 'skip_download': True,
2666 },
13a75688 2667 'skip': 'This video is not available.',
dacb3a86 2668 'add_ie': [YoutubeIE.ie_key()],
481cc733 2669 }, {
8bdd16b4 2670 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2671 'only_matching': True,
66b48727 2672 }, {
8bdd16b4 2673 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2674 'only_matching': True,
a0566bbf 2675 }, {
2676 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2677 'info_dict': {
2678 'id': '9Auq9mYxFEE',
2679 'ext': 'mp4',
deaec5af 2680 'title': compat_str,
a0566bbf 2681 'uploader': 'Sky News',
2682 'uploader_id': 'skynews',
2683 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2684 'upload_date': '20191102',
deaec5af 2685 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2686 'categories': ['News & Politics'],
2687 'tags': list,
2688 'like_count': int,
2689 'dislike_count': int,
2690 },
2691 'params': {
2692 'skip_download': True,
2693 },
2694 }, {
2695 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2696 'info_dict': {
2697 'id': 'a48o2S1cPoo',
2698 'ext': 'mp4',
2699 'title': 'The Young Turks - Live Main Show',
2700 'uploader': 'The Young Turks',
2701 'uploader_id': 'TheYoungTurks',
2702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2703 'upload_date': '20150715',
2704 'license': 'Standard YouTube License',
2705 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2706 'categories': ['News & Politics'],
2707 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2708 'like_count': int,
2709 'dislike_count': int,
2710 },
2711 'params': {
2712 'skip_download': True,
2713 },
2714 'only_matching': True,
2715 }, {
2716 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2717 'only_matching': True,
2718 }, {
2719 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2720 'only_matching': True,
3d3dddc9 2721 }, {
2722 'url': 'https://www.youtube.com/feed/trending',
2723 'only_matching': True,
2724 }, {
2725 # needs auth
2726 'url': 'https://www.youtube.com/feed/library',
2727 'only_matching': True,
2728 }, {
2729 # needs auth
2730 'url': 'https://www.youtube.com/feed/history',
2731 'only_matching': True,
2732 }, {
2733 # needs auth
2734 'url': 'https://www.youtube.com/feed/subscriptions',
2735 'only_matching': True,
2736 }, {
2737 # needs auth
2738 'url': 'https://www.youtube.com/feed/watch_later',
2739 'only_matching': True,
2740 }, {
2741 # no longer available?
2742 'url': 'https://www.youtube.com/feed/recommended',
2743 'only_matching': True,
29f7c58a 2744 }, {
2745 # inline playlist with not always working continuations
2746 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2747 'only_matching': True,
2748 }, {
2749 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2750 'only_matching': True,
2751 }, {
2752 'url': 'https://www.youtube.com/course',
2753 'only_matching': True,
2754 }, {
2755 'url': 'https://www.youtube.com/zsecurity',
2756 'only_matching': True,
2757 }, {
2758 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2759 'only_matching': True,
2760 }, {
2761 'url': 'https://www.youtube.com/TheYoungTurks/live',
2762 'only_matching': True,
39ed931e 2763 }, {
2764 'url': 'https://www.youtube.com/hashtag/cctv9',
2765 'info_dict': {
2766 'id': 'cctv9',
2767 'title': '#cctv9',
2768 },
2769 'playlist_mincount': 350,
29f7c58a 2770 }]
2771
2772 @classmethod
2773 def suitable(cls, url):
2774 return False if YoutubeIE.suitable(url) else super(
2775 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2776
2777 def _extract_channel_id(self, webpage):
2778 channel_id = self._html_search_meta(
2779 'channelId', webpage, 'channel id', default=None)
2780 if channel_id:
2781 return channel_id
2782 channel_url = self._html_search_meta(
2783 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2784 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2785 'twitter:app:url:googleplay'), webpage, 'channel url')
2786 return self._search_regex(
2787 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2788 channel_url, 'channel id')
15f6397c 2789
8bdd16b4 2790 @staticmethod
cd7c66cf 2791 def _extract_basic_item_renderer(item):
2792 # Modified from _extract_grid_item_renderer
2793 known_renderers = (
e3c07697 2794 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2795 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2796 )
2797 for key, renderer in item.items():
2798 if key not in known_renderers:
2799 continue
2800 return renderer
8bdd16b4 2801
8bdd16b4 2802 def _grid_entries(self, grid_renderer):
2803 for item in grid_renderer['items']:
2804 if not isinstance(item, dict):
39b62db1 2805 continue
cd7c66cf 2806 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2807 if not isinstance(renderer, dict):
2808 continue
2809 title = try_get(
2810 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2811 # playlist
2812 playlist_id = renderer.get('playlistId')
2813 if playlist_id:
2814 yield self.url_result(
2815 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2816 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2817 video_title=title)
2818 # video
2819 video_id = renderer.get('videoId')
2820 if video_id:
2821 yield self._extract_video(renderer)
2822 # channel
2823 channel_id = renderer.get('channelId')
2824 if channel_id:
2825 title = try_get(
2826 renderer, lambda x: x['title']['simpleText'], compat_str)
2827 yield self.url_result(
2828 'https://www.youtube.com/channel/%s' % channel_id,
2829 ie=YoutubeTabIE.ie_key(), video_title=title)
2830
3d3dddc9 2831 def _shelf_entries_from_content(self, shelf_renderer):
2832 content = shelf_renderer.get('content')
2833 if not isinstance(content, dict):
8bdd16b4 2834 return
cd7c66cf 2835 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2836 if renderer:
2837 # TODO: add support for nested playlists so each shelf is processed
2838 # as separate playlist
2839 # TODO: this includes only first N items
2840 for entry in self._grid_entries(renderer):
2841 yield entry
2842 renderer = content.get('horizontalListRenderer')
2843 if renderer:
2844 # TODO
2845 pass
8bdd16b4 2846
29f7c58a 2847 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2848 ep = try_get(
2849 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2850 compat_str)
2851 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2852 if shelf_url:
29f7c58a 2853 # Skipping links to another channels, note that checking for
2854 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2855 # will not work
2856 if skip_channels and '/channels?' in shelf_url:
2857 return
3d3dddc9 2858 title = try_get(
2859 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2860 yield self.url_result(shelf_url, video_title=title)
2861 # Shelf may not contain shelf URL, fallback to extraction from content
2862 for entry in self._shelf_entries_from_content(shelf_renderer):
2863 yield entry
c5e8d7af 2864
8bdd16b4 2865 def _playlist_entries(self, video_list_renderer):
2866 for content in video_list_renderer['contents']:
2867 if not isinstance(content, dict):
2868 continue
2869 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2870 if not isinstance(renderer, dict):
2871 continue
2872 video_id = renderer.get('videoId')
2873 if not video_id:
2874 continue
2875 yield self._extract_video(renderer)
07aeced6 2876
3462ffa8 2877 def _rich_entries(self, rich_grid_renderer):
2878 renderer = try_get(
70d5c17b 2879 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2880 video_id = renderer.get('videoId')
2881 if not video_id:
2882 return
2883 yield self._extract_video(renderer)
2884
8bdd16b4 2885 def _video_entry(self, video_renderer):
2886 video_id = video_renderer.get('videoId')
2887 if video_id:
2888 return self._extract_video(video_renderer)
dacb3a86 2889
8bdd16b4 2890 def _post_thread_entries(self, post_thread_renderer):
2891 post_renderer = try_get(
2892 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2893 if not post_renderer:
2894 return
2895 # video attachment
2896 video_renderer = try_get(
2897 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2898 video_id = None
2899 if video_renderer:
2900 entry = self._video_entry(video_renderer)
2901 if entry:
2902 yield entry
2903 # inline video links
2904 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2905 for run in runs:
2906 if not isinstance(run, dict):
2907 continue
2908 ep_url = try_get(
2909 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2910 if not ep_url:
2911 continue
2912 if not YoutubeIE.suitable(ep_url):
2913 continue
2914 ep_video_id = YoutubeIE._match_id(ep_url)
2915 if video_id == ep_video_id:
2916 continue
2917 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2918
8bdd16b4 2919 def _post_thread_continuation_entries(self, post_thread_continuation):
2920 contents = post_thread_continuation.get('contents')
2921 if not isinstance(contents, list):
2922 return
2923 for content in contents:
2924 renderer = content.get('backstagePostThreadRenderer')
2925 if not isinstance(renderer, dict):
2926 continue
2927 for entry in self._post_thread_entries(renderer):
2928 yield entry
07aeced6 2929
39ed931e 2930 r''' # unused
2931 def _rich_grid_entries(self, contents):
2932 for content in contents:
2933 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2934 if video_renderer:
2935 entry = self._video_entry(video_renderer)
2936 if entry:
2937 yield entry
2938 '''
2939
29f7c58a 2940 @staticmethod
2941 def _build_continuation_query(continuation, ctp=None):
2942 query = {
2943 'ctoken': continuation,
2944 'continuation': continuation,
2945 }
2946 if ctp:
2947 query['itct'] = ctp
2948 return query
2949
8bdd16b4 2950 @staticmethod
2951 def _extract_next_continuation_data(renderer):
2952 next_continuation = try_get(
2953 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2954 if not next_continuation:
2955 return
2956 continuation = next_continuation.get('continuation')
2957 if not continuation:
2958 return
2959 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2960 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2961
8bdd16b4 2962 @classmethod
2963 def _extract_continuation(cls, renderer):
2964 next_continuation = cls._extract_next_continuation_data(renderer)
2965 if next_continuation:
2966 return next_continuation
cc2db878 2967 contents = []
2968 for key in ('contents', 'items'):
2969 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2970 for content in contents:
2971 if not isinstance(content, dict):
2972 continue
2973 continuation_ep = try_get(
2974 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2975 dict)
2976 if not continuation_ep:
2977 continue
2978 continuation = try_get(
2979 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2980 if not continuation:
2981 continue
2982 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2983 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2984
f4f751af 2985 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 2986
70d5c17b 2987 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2988 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2989 for content in contents:
2990 if not isinstance(content, dict):
8bdd16b4 2991 continue
70d5c17b 2992 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2993 if not is_renderer:
70d5c17b 2994 renderer = content.get('richItemRenderer')
3462ffa8 2995 if renderer:
2996 for entry in self._rich_entries(renderer):
2997 yield entry
2998 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2999 continue
3462ffa8 3000 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3001 for isr_content in isr_contents:
3002 if not isinstance(isr_content, dict):
3003 continue
69184e41 3004
3005 known_renderers = {
3006 'playlistVideoListRenderer': self._playlist_entries,
3007 'gridRenderer': self._grid_entries,
3008 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3009 'backstagePostThreadRenderer': self._post_thread_entries,
3010 'videoRenderer': lambda x: [self._video_entry(x)],
3011 }
3012 for key, renderer in isr_content.items():
3013 if key not in known_renderers:
3014 continue
3015 for entry in known_renderers[key](renderer):
3016 if entry:
3017 yield entry
3462ffa8 3018 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3019 break
70d5c17b 3020
3462ffa8 3021 if not continuation_list[0]:
3022 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3023
3024 if not continuation_list[0]:
3025 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3026
3027 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3028 tab_content = try_get(tab, lambda x: x['content'], dict)
3029 if not tab_content:
3030 return
3462ffa8 3031 parent_renderer = (
29f7c58a 3032 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3033 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3034 for entry in extract_entries(parent_renderer):
3035 yield entry
3462ffa8 3036 continuation = continuation_list[0]
f4f751af 3037 context = self._extract_context(ytcfg)
3038 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3039
8bdd16b4 3040 for page_num in itertools.count(1):
3041 if not continuation:
3042 break
f4f751af 3043 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
62bff2c1 3044 retries = self._downloader.params.get('extractor_retries', 3)
3045 count = -1
3046 last_error = None
3047 while count < retries:
3048 count += 1
3049 if last_error:
3050 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 3051 try:
a5c56234 3052 response = self._call_api(
d92f5d5a 3053 ep='browse', fatal=True, headers=headers,
a5c56234
M
3054 video_id='%s page %s' % (item_id, page_num),
3055 query={
3056 'continuation': continuation['continuation'],
3057 'clickTracking': {'clickTrackingParams': continuation['itct']},
3058 },
f4f751af 3059 context=context,
3060 api_key=self._extract_api_key(ytcfg),
a5c56234 3061 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 3062 except ExtractorError as e:
62bff2c1 3063 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3064 # Downloading page may result in intermittent 5xx HTTP error
3065 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3066 last_error = 'HTTP Error %s' % e.cause.code
3067 if count < retries:
29f7c58a 3068 continue
3069 raise
62bff2c1 3070 else:
62bff2c1 3071 # Youtube sometimes sends incomplete data
3072 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 3073 if dict_get(response,
3074 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 3075 break
f3eaa8dd
M
3076
3077 # Youtube may send alerts if there was an issue with the continuation page
3078 self._extract_alerts(response, expected=False)
3079
3080 last_error = 'Incomplete data received'
c705177d 3081 if count >= retries:
3082 self._downloader.report_error(last_error)
a5c56234
M
3083
3084 if not response:
8bdd16b4 3085 break
f4f751af 3086 visitor_data = try_get(
3087 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3088
69184e41 3089 known_continuation_renderers = {
3090 'playlistVideoListContinuation': self._playlist_entries,
3091 'gridContinuation': self._grid_entries,
3092 'itemSectionContinuation': self._post_thread_continuation_entries,
3093 'sectionListContinuation': extract_entries, # for feeds
3094 }
8bdd16b4 3095 continuation_contents = try_get(
69184e41 3096 response, lambda x: x['continuationContents'], dict) or {}
3097 continuation_renderer = None
3098 for key, value in continuation_contents.items():
3099 if key not in known_continuation_renderers:
3462ffa8 3100 continue
69184e41 3101 continuation_renderer = value
3102 continuation_list = [None]
3103 for entry in known_continuation_renderers[key](continuation_renderer):
3104 yield entry
3105 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3106 break
3107 if continuation_renderer:
3108 continue
c5e8d7af 3109
a1b535bd 3110 known_renderers = {
3111 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3112 'gridVideoRenderer': (self._grid_entries, 'items'),
3113 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3114 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3115 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3116 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3117 }
cce889b9 3118 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3119 continuation_items = try_get(
cce889b9 3120 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3121 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3122 video_items_renderer = None
3123 for key, value in continuation_item.items():
3124 if key not in known_renderers:
8bdd16b4 3125 continue
a1b535bd 3126 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3127 continuation_list = [None]
a1b535bd 3128 for entry in known_renderers[key][0](video_items_renderer):
3129 yield entry
9ba5705a 3130 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3131 break
3132 if video_items_renderer:
3133 continue
8bdd16b4 3134 break
9558dcec 3135
8bdd16b4 3136 @staticmethod
3137 def _extract_selected_tab(tabs):
3138 for tab in tabs:
3139 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3140 return tab['tabRenderer']
2b3c2546 3141 else:
8bdd16b4 3142 raise ExtractorError('Unable to find selected tab')
b82f815f 3143
8bdd16b4 3144 @staticmethod
3145 def _extract_uploader(data):
3146 uploader = {}
3147 sidebar_renderer = try_get(
3148 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3149 if sidebar_renderer:
3150 for item in sidebar_renderer:
3151 if not isinstance(item, dict):
3152 continue
3153 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3154 if not isinstance(renderer, dict):
3155 continue
3156 owner = try_get(
3157 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3158 if owner:
3159 uploader['uploader'] = owner.get('text')
3160 uploader['uploader_id'] = try_get(
3161 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3162 uploader['uploader_url'] = urljoin(
3163 'https://www.youtube.com/',
3164 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3165 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3166
d069eca7 3167 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3168 playlist_id = title = description = channel_url = channel_name = channel_id = None
3169 thumbnails_list = tags = []
3170
8bdd16b4 3171 selected_tab = self._extract_selected_tab(tabs)
3172 renderer = try_get(
3173 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3174 if renderer:
b60419c5 3175 channel_name = renderer.get('title')
3176 channel_url = renderer.get('channelUrl')
3177 channel_id = renderer.get('externalId')
39ed931e 3178 else:
64c0d954 3179 renderer = try_get(
3180 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3181
8bdd16b4 3182 if renderer:
3183 title = renderer.get('title')
ecc97af3 3184 description = renderer.get('description', '')
b60419c5 3185 playlist_id = channel_id
3186 tags = renderer.get('keywords', '').split()
3187 thumbnails_list = (
3188 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3189 or try_get(
3190 data,
3191 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3192 list)
b60419c5 3193 or [])
3194
3195 thumbnails = []
3196 for t in thumbnails_list:
3197 if not isinstance(t, dict):
3198 continue
3199 thumbnail_url = url_or_none(t.get('url'))
3200 if not thumbnail_url:
3201 continue
3202 thumbnails.append({
3203 'url': thumbnail_url,
3204 'width': int_or_none(t.get('width')),
3205 'height': int_or_none(t.get('height')),
3206 })
3462ffa8 3207 if playlist_id is None:
70d5c17b 3208 playlist_id = item_id
3209 if title is None:
39ed931e 3210 title = (
3211 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3212 or playlist_id)
b60419c5 3213 title += format_field(selected_tab, 'title', ' - %s')
3214
3215 metadata = {
3216 'playlist_id': playlist_id,
3217 'playlist_title': title,
3218 'playlist_description': description,
3219 'uploader': channel_name,
3220 'uploader_id': channel_id,
3221 'uploader_url': channel_url,
3222 'thumbnails': thumbnails,
3223 'tags': tags,
3224 }
3225 if not channel_id:
3226 metadata.update(self._extract_uploader(data))
3227 metadata.update({
3228 'channel': metadata['uploader'],
3229 'channel_id': metadata['uploader_id'],
3230 'channel_url': metadata['uploader_url']})
3231 return self.playlist_result(
d069eca7
M
3232 self._entries(
3233 selected_tab, playlist_id,
3234 self._extract_identity_token(webpage, item_id),
f4f751af 3235 self._extract_account_syncid(data),
3236 self._extract_ytcfg(item_id, webpage)),
b60419c5 3237 **metadata)
73c4ac2c 3238
cd7c66cf 3239 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3240 first_id = last_id = None
3241 for page_num in itertools.count(1):
cd7c66cf 3242 videos = list(self._playlist_entries(playlist))
3243 if not videos:
3244 return
2be71994 3245 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3246 if start >= len(videos):
3247 return
3248 for video in videos[start:]:
3249 if video['id'] == first_id:
3250 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3251 return
3252 yield video
3253 first_id = first_id or videos[0]['id']
3254 last_id = videos[-1]['id']
cd7c66cf 3255
cd7c66cf 3256 _, data = self._extract_webpage(
2be71994 3257 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3258 '%s page %d' % (playlist_id, page_num))
3259 playlist = try_get(
3260 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3261
29f7c58a 3262 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3263 title = playlist.get('title') or try_get(
3264 data, lambda x: x['titleText']['simpleText'], compat_str)
3265 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3266
3267 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3268 playlist_url = urljoin(url, try_get(
3269 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3270 compat_str))
3271 if playlist_url and playlist_url != url:
3272 return self.url_result(
3273 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3274 video_title=title)
cd7c66cf 3275
8bdd16b4 3276 return self.playlist_result(
cd7c66cf 3277 self._extract_mix_playlist(playlist, playlist_id),
3278 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3279
f3eaa8dd
M
3280 def _extract_alerts(self, data, expected=False):
3281
3282 def _real_extract_alerts():
3283 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3284 if not isinstance(alert_dict, dict):
02ced43c 3285 continue
f3eaa8dd
M
3286 for alert in alert_dict.values():
3287 alert_type = alert.get('type')
3288 if not alert_type:
3289 continue
3ffc7c89 3290 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3291 if message:
3292 yield alert_type, message
f3eaa8dd 3293 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3294 message += try_get(run, lambda x: x['text'], compat_str)
3295 if message:
3296 yield alert_type, message
f3eaa8dd 3297
3ffc7c89 3298 errors = []
3299 warnings = []
f3eaa8dd
M
3300 for alert_type, alert_message in _real_extract_alerts():
3301 if alert_type.lower() == 'error':
3ffc7c89 3302 errors.append([alert_type, alert_message])
f3eaa8dd 3303 else:
3ffc7c89 3304 warnings.append([alert_type, alert_message])
f3eaa8dd 3305
3ffc7c89 3306 for alert_type, alert_message in (warnings + errors[:-1]):
3307 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3308 if errors:
3309 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3310
cd7c66cf 3311 def _extract_webpage(self, url, item_id):
62bff2c1 3312 retries = self._downloader.params.get('extractor_retries', 3)
3313 count = -1
c705177d 3314 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3315 while count < retries:
62bff2c1 3316 count += 1
14fdfea9 3317 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3318 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3319 if count:
c705177d 3320 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3321 webpage = self._download_webpage(
3322 url, item_id,
cd7c66cf 3323 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3324 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3325 self._extract_alerts(data, expected=True)
14fdfea9 3326 if data.get('contents') or data.get('currentVideoEndpoint'):
3327 break
c705177d 3328 if count >= retries:
3329 self._downloader.report_error(last_error)
cd7c66cf 3330 return webpage, data
3331
3332 def _real_extract(self, url):
3333 item_id = self._match_id(url)
3334 url = compat_urlparse.urlunparse(
3335 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3336
3337 # This is not matched in a channel page with a tab selected
3338 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3339 mobj = mobj.groupdict() if mobj else {}
3340 if mobj and not mobj.get('not_channel'):
3341 self._downloader.report_warning(
3342 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3343 'To download only the videos in the home page, add a "/featured" to the URL')
3344 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3345
3346 # Handle both video/playlist URLs
3347 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3348 video_id = qs.get('v', [None])[0]
3349 playlist_id = qs.get('list', [None])[0]
3350
3351 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3352 if not playlist_id:
3353 # If there is neither video or playlist ids,
3354 # youtube redirects to home page, which is undesirable
3355 raise ExtractorError('Unable to recognize tab page')
3356 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3357 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3358
3359 if video_id and playlist_id:
3360 if self._downloader.params.get('noplaylist'):
3361 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3362 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3363 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3364
3365 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3366
8bdd16b4 3367 tabs = try_get(
3368 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3369 if tabs:
d069eca7 3370 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3371
8bdd16b4 3372 playlist = try_get(
3373 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3374 if playlist:
29f7c58a 3375 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3376
a0566bbf 3377 video_id = try_get(
3378 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3379 compat_str) or video_id
8bdd16b4 3380 if video_id:
cd7c66cf 3381 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3382 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3383
8bdd16b4 3384 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3385
c5e8d7af 3386
8bdd16b4 3387class YoutubePlaylistIE(InfoExtractor):
3388 IE_DESC = 'YouTube.com playlists'
3389 _VALID_URL = r'''(?x)(?:
3390 (?:https?://)?
3391 (?:\w+\.)?
3392 (?:
3393 (?:
3394 youtube(?:kids)?\.com|
29f7c58a 3395 invidio\.us
8bdd16b4 3396 )
3397 /.*?\?.*?\blist=
3398 )?
3399 (?P<id>%(playlist_id)s)
3400 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3401 IE_NAME = 'youtube:playlist'
cdc628a4 3402 _TESTS = [{
8bdd16b4 3403 'note': 'issue #673',
3404 'url': 'PLBB231211A4F62143',
cdc628a4 3405 'info_dict': {
8bdd16b4 3406 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3407 'id': 'PLBB231211A4F62143',
3408 'uploader': 'Wickydoo',
3409 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3410 },
3411 'playlist_mincount': 29,
3412 }, {
3413 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3414 'info_dict': {
3415 'title': 'YDL_safe_search',
3416 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3417 },
3418 'playlist_count': 2,
3419 'skip': 'This playlist is private',
9558dcec 3420 }, {
8bdd16b4 3421 'note': 'embedded',
3422 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3423 'playlist_count': 4,
9558dcec 3424 'info_dict': {
8bdd16b4 3425 'title': 'JODA15',
3426 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3427 'uploader': 'milan',
3428 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3429 }
cdc628a4 3430 }, {
8bdd16b4 3431 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3432 'playlist_mincount': 982,
3433 'info_dict': {
3434 'title': '2018 Chinese New Singles (11/6 updated)',
3435 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3436 'uploader': 'LBK',
3437 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3438 }
daa0df9e 3439 }, {
29f7c58a 3440 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3441 'only_matching': True,
3442 }, {
3443 # music album playlist
3444 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3445 'only_matching': True,
3446 }]
3447
3448 @classmethod
3449 def suitable(cls, url):
3450 return False if YoutubeTabIE.suitable(url) else super(
3451 YoutubePlaylistIE, cls).suitable(url)
3452
3453 def _real_extract(self, url):
3454 playlist_id = self._match_id(url)
3455 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3456 if not qs:
3457 qs = {'list': playlist_id}
3458 return self.url_result(
3459 update_url_query('https://www.youtube.com/playlist', qs),
3460 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3461
3462
3463class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3464 IE_DESC = 'youtu.be'
29f7c58a 3465 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3466 _TESTS = [{
8bdd16b4 3467 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3468 'info_dict': {
3469 'id': 'yeWKywCrFtk',
3470 'ext': 'mp4',
3471 'title': 'Small Scale Baler and Braiding Rugs',
3472 'uploader': 'Backus-Page House Museum',
3473 'uploader_id': 'backuspagemuseum',
3474 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3475 'upload_date': '20161008',
3476 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3477 'categories': ['Nonprofits & Activism'],
3478 'tags': list,
3479 'like_count': int,
3480 'dislike_count': int,
3481 },
3482 'params': {
3483 'noplaylist': True,
3484 'skip_download': True,
3485 },
39e7107d 3486 }, {
8bdd16b4 3487 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3488 'only_matching': True,
cdc628a4
PH
3489 }]
3490
8bdd16b4 3491 def _real_extract(self, url):
29f7c58a 3492 mobj = re.match(self._VALID_URL, url)
3493 video_id = mobj.group('id')
3494 playlist_id = mobj.group('playlist_id')
8bdd16b4 3495 return self.url_result(
29f7c58a 3496 update_url_query('https://www.youtube.com/watch', {
3497 'v': video_id,
3498 'list': playlist_id,
3499 'feature': 'youtu.be',
3500 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3501
3502
3503class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3504 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3505 _VALID_URL = r'ytuser:(?P<id>.+)'
3506 _TESTS = [{
3507 'url': 'ytuser:phihag',
3508 'only_matching': True,
3509 }]
3510
3511 def _real_extract(self, url):
3512 user_id = self._match_id(url)
3513 return self.url_result(
3514 'https://www.youtube.com/user/%s' % user_id,
3515 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3516
b05654f0 3517
3d3dddc9 3518class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3519 IE_NAME = 'youtube:favorites'
3520 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3521 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3522 _LOGIN_REQUIRED = True
3523 _TESTS = [{
3524 'url': ':ytfav',
3525 'only_matching': True,
3526 }, {
3527 'url': ':ytfavorites',
3528 'only_matching': True,
3529 }]
3530
3531 def _real_extract(self, url):
3532 return self.url_result(
3533 'https://www.youtube.com/playlist?list=LL',
3534 ie=YoutubeTabIE.ie_key())
3535
3536
8bdd16b4 3537class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3538 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3539 # there doesn't appear to be a real limit, for example if you search for
3540 # 'python' you get more than 8.000.000 results
3541 _MAX_RESULTS = float('inf')
78caa52a 3542 IE_NAME = 'youtube:search'
b05654f0 3543 _SEARCH_KEY = 'ytsearch'
6c894ea1 3544 _SEARCH_PARAMS = None
9dd8e46a 3545 _TESTS = []
b05654f0 3546
6c894ea1 3547 def _entries(self, query, n):
a5c56234 3548 data = {'query': query}
6c894ea1
U
3549 if self._SEARCH_PARAMS:
3550 data['params'] = self._SEARCH_PARAMS
3551 total = 0
3552 for page_num in itertools.count(1):
a5c56234
M
3553 search = self._call_api(
3554 ep='search', video_id='query "%s"' % query, fatal=False,
3555 note='Downloading page %s' % page_num, query=data)
6c894ea1 3556 if not search:
b4c08069 3557 break
6c894ea1
U
3558 slr_contents = try_get(
3559 search,
3560 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3561 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3562 list)
3563 if not slr_contents:
a22b2fd1 3564 break
0366ae87 3565
0366ae87
M
3566 # Youtube sometimes adds promoted content to searches,
3567 # changing the index location of videos and token.
3568 # So we search through all entries till we find them.
30a074c2 3569 continuation_token = None
3570 for slr_content in slr_contents:
a96c6d15 3571 if continuation_token is None:
3572 continuation_token = try_get(
3573 slr_content,
3574 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3575 compat_str)
3576
30a074c2 3577 isr_contents = try_get(
3578 slr_content,
3579 lambda x: x['itemSectionRenderer']['contents'],
3580 list)
9da76d30 3581 if not isr_contents:
30a074c2 3582 continue
3583 for content in isr_contents:
3584 if not isinstance(content, dict):
3585 continue
3586 video = content.get('videoRenderer')
3587 if not isinstance(video, dict):
3588 continue
3589 video_id = video.get('videoId')
3590 if not video_id:
3591 continue
3592
3593 yield self._extract_video(video)
3594 total += 1
3595 if total == n:
3596 return
0366ae87 3597
0366ae87 3598 if not continuation_token:
6c894ea1 3599 break
0366ae87 3600 data['continuation'] = continuation_token
b05654f0 3601
6c894ea1
U
3602 def _get_n_results(self, query, n):
3603 """Get a specified number of results for a query"""
3604 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3605
c9ae7b95 3606
a3dd9248 3607class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3608 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3609 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3610 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3611 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3612
c9ae7b95 3613
386e1dd9 3614class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3615 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3616 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3617 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3618 # _MAX_RESULTS = 100
3462ffa8 3619 _TESTS = [{
3620 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3621 'playlist_mincount': 5,
3622 'info_dict': {
3623 'title': 'youtube-dl test video',
3624 }
3625 }, {
3626 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3627 'only_matching': True,
3628 }]
3629
386e1dd9 3630 @classmethod
3631 def _make_valid_url(cls):
3632 return cls._VALID_URL
3633
3462ffa8 3634 def _real_extract(self, url):
386e1dd9 3635 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3636 query = (qs.get('search_query') or qs.get('q'))[0]
3637 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3638 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3639
3640
3641class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3642 """
25f14e9f 3643 Base class for feed extractors
3d3dddc9 3644 Subclasses must define the _FEED_NAME property.
d7ae0639 3645 """
b2e8bc1b 3646 _LOGIN_REQUIRED = True
ef2f3c7f 3647 _TESTS = []
d7ae0639
JMF
3648
3649 @property
3650 def IE_NAME(self):
78caa52a 3651 return 'youtube:%s' % self._FEED_NAME
04cc9617 3652
81f0259b 3653 def _real_initialize(self):
b2e8bc1b 3654 self._login()
81f0259b 3655
3853309f 3656 def _real_extract(self, url):
3d3dddc9 3657 return self.url_result(
3658 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3659 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3660
3661
ef2f3c7f 3662class YoutubeWatchLaterIE(InfoExtractor):
3663 IE_NAME = 'youtube:watchlater'
70d5c17b 3664 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3665 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3666 _TESTS = [{
8bdd16b4 3667 'url': ':ytwatchlater',
bc7a9cd8
S
3668 'only_matching': True,
3669 }]
25f14e9f
S
3670
3671 def _real_extract(self, url):
ef2f3c7f 3672 return self.url_result(
3673 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3674
3675
25f14e9f
S
3676class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3677 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3678 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3679 _FEED_NAME = 'recommended'
3d3dddc9 3680 _TESTS = [{
3681 'url': ':ytrec',
3682 'only_matching': True,
3683 }, {
3684 'url': ':ytrecommended',
3685 'only_matching': True,
3686 }, {
3687 'url': 'https://youtube.com',
3688 'only_matching': True,
3689 }]
1ed5b5c9 3690
1ed5b5c9 3691
25f14e9f 3692class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3693 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3694 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3695 _FEED_NAME = 'subscriptions'
3d3dddc9 3696 _TESTS = [{
3697 'url': ':ytsubs',
3698 'only_matching': True,
3699 }, {
3700 'url': ':ytsubscriptions',
3701 'only_matching': True,
3702 }]
1ed5b5c9 3703
1ed5b5c9 3704
25f14e9f 3705class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3706 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3707 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3708 _FEED_NAME = 'history'
3d3dddc9 3709 _TESTS = [{
3710 'url': ':ythistory',
3711 'only_matching': True,
3712 }]
1ed5b5c9
JMF
3713
3714
15870e90
PH
3715class YoutubeTruncatedURLIE(InfoExtractor):
3716 IE_NAME = 'youtube:truncated_url'
3717 IE_DESC = False # Do not list
975d35db 3718 _VALID_URL = r'''(?x)
b95aab84
PH
3719 (?:https?://)?
3720 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3721 (?:watch\?(?:
c4808c60 3722 feature=[a-z_]+|
b95aab84
PH
3723 annotation_id=annotation_[^&]+|
3724 x-yt-cl=[0-9]+|
c1708b89 3725 hl=[^&]*|
287be8c6 3726 t=[0-9]+
b95aab84
PH
3727 )?
3728 |
3729 attribution_link\?a=[^&]+
3730 )
3731 $
975d35db 3732 '''
15870e90 3733
c4808c60 3734 _TESTS = [{
2d3d2997 3735 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3736 'only_matching': True,
dc2fc736 3737 }, {
2d3d2997 3738 'url': 'https://www.youtube.com/watch?',
dc2fc736 3739 'only_matching': True,
b95aab84
PH
3740 }, {
3741 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3742 'only_matching': True,
3743 }, {
3744 'url': 'https://www.youtube.com/watch?feature=foo',
3745 'only_matching': True,
c1708b89
PH
3746 }, {
3747 'url': 'https://www.youtube.com/watch?hl=en-GB',
3748 'only_matching': True,
287be8c6
PH
3749 }, {
3750 'url': 'https://www.youtube.com/watch?t=2372',
3751 'only_matching': True,
c4808c60
PH
3752 }]
3753
15870e90
PH
3754 def _real_extract(self, url):
3755 raise ExtractorError(
78caa52a
PH
3756 'Did you forget to quote the URL? Remember that & is a meta '
3757 'character in most shells, so you want to put the URL in quotes, '
3867038a 3758 'like youtube-dl '
2d3d2997 3759 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3760 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3761 expected=True)
772fd5cc
PH
3762
3763
3764class YoutubeTruncatedIDIE(InfoExtractor):
3765 IE_NAME = 'youtube:truncated_id'
3766 IE_DESC = False # Do not list
b95aab84 3767 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3768
3769 _TESTS = [{
3770 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3771 'only_matching': True,
3772 }]
3773
3774 def _real_extract(self, url):
3775 video_id = self._match_id(url)
3776 raise ExtractorError(
3777 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3778 expected=True)