]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[linuxacadamy] Improve regex
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c5e8d7af 28 clean_html,
26fe8ffe 29 dict_get,
c5e8d7af 30 ExtractorError,
b60419c5 31 format_field,
2d30521a 32 float_or_none,
dd27fd17 33 int_or_none,
94278f72 34 mimetype2ext,
6310acf5 35 parse_codecs,
7c80519c 36 parse_duration,
dca3ff4a 37 qualities,
3995d37d 38 remove_start,
cf7e015f 39 smuggle_url,
dbdaaa23 40 str_or_none,
c93d53f5 41 str_to_int,
556dbe7f 42 try_get,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
8bdd16b4 46 update_url_query,
21c340b8 47 url_or_none,
6e6bc8da 48 urlencode_postdata,
8bdd16b4 49 urljoin,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
57
58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 61
3462ffa8 62 _RESERVED_NAMES = (
cd7c66cf 63 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
64 r'movies|results|shared|hashtag|trending|feed|feeds|'
65 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
70d5c17b 71 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 72
25f14e9f
S
73 def _ids_to_results(self, ids):
74 return [
75 self.url_result(vid_id, 'Youtube', video_id=vid_id)
76 for vid_id in ids]
77
b2e8bc1b 78 def _login(self):
83317f69 79 """
80 Attempt to log in to YouTube.
81 True is returned if successful or skipped.
82 False is returned if login failed.
83
84 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
85 """
68217024 86 username, password = self._get_login_info()
b2e8bc1b
JMF
87 # No authentication to be performed
88 if username is None:
70d35d16 89 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 90 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 91 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
92 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
baf67a60
S
115 # TODO: reverse actual botguard identifier generation algo
116 'bgRequest': '["identifier",""]',
041bc3ad 117 })
e00eb564
S
118 return self._download_json(
119 url, None, note=note, errnote=errnote,
120 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121 fatal=False,
122 data=urlencode_postdata(data), headers={
123 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124 'Google-Accounts-XSRF': 1,
125 })
126
3995d37d
S
127 def warn(message):
128 self._downloader.report_warning(message)
129
130 lookup_req = [
131 username,
132 None, [], None, 'US', None, None, 2, False, True,
133 [
134 None, None,
135 [2, 1, None, 1,
136 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137 None, [], 4],
138 1, [None, None, []], None, None, None, True
139 ],
140 username,
141 ]
142
e00eb564 143 lookup_results = req(
3995d37d 144 self._LOOKUP_URL, lookup_req,
e00eb564
S
145 'Looking up account info', 'Unable to look up account info')
146
147 if lookup_results is False:
148 return False
041bc3ad 149
3995d37d
S
150 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151 if not user_hash:
152 warn('Unable to extract user hash')
153 return False
154
155 challenge_req = [
156 user_hash,
157 None, 1, None, [1, None, None, None, [password, None, True]],
158 [
159 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160 1, [None, None, []], None, None, None, True
161 ]]
83317f69 162
3995d37d
S
163 challenge_results = req(
164 self._CHALLENGE_URL, challenge_req,
165 'Logging in', 'Unable to log in')
83317f69 166
3995d37d 167 if challenge_results is False:
e00eb564 168 return
83317f69 169
3995d37d
S
170 login_res = try_get(challenge_results, lambda x: x[0][5], list)
171 if login_res:
172 login_msg = try_get(login_res, lambda x: x[5], compat_str)
173 warn(
174 'Unable to login: %s' % 'Invalid password'
175 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 return False
177
178 res = try_get(challenge_results, lambda x: x[0][-1], list)
179 if not res:
180 warn('Unable to extract result entry')
181 return False
182
9a6628aa
S
183 login_challenge = try_get(res, lambda x: x[0][0], list)
184 if login_challenge:
185 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
187 # SEND_SUCCESS - TFA code has been successfully sent to phone
188 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 189 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
190 if status == 'QUOTA_EXCEEDED':
191 warn('Exceeded the limit of TFA codes, try later')
192 return False
193
194 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195 if not tl:
196 warn('Unable to extract TL')
197 return False
198
199 tfa_code = self._get_tfa_info('2-step verification code')
200
201 if not tfa_code:
202 warn(
203 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 return False
206
207 tfa_code = remove_start(tfa_code, 'G-')
208
209 tfa_req = [
210 user_hash, None, 2, None,
211 [
212 9, None, None, None, None, None, None, None,
213 [None, tfa_code, True, 2]
214 ]]
215
216 tfa_results = req(
217 self._TFA_URL.format(tl), tfa_req,
218 'Submitting TFA code', 'Unable to submit TFA code')
219
220 if tfa_results is False:
221 return False
222
223 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224 if tfa_res:
225 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226 warn(
227 'Unable to finish TFA: %s' % 'Invalid TFA code'
228 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 return False
230
231 check_cookie_url = try_get(
232 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
233 else:
234 CHALLENGES = {
235 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238 }
239 challenge = CHALLENGES.get(
240 challenge_str,
241 '%s returned error %s.' % (self.IE_NAME, challenge_str))
242 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 return False
3995d37d
S
244 else:
245 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247 if not check_cookie_url:
248 warn('Unable to extract CheckCookie URL')
249 return False
e00eb564
S
250
251 check_cookie_results = self._download_webpage(
3995d37d
S
252 check_cookie_url, None, 'Checking cookie', fatal=False)
253
254 if check_cookie_results is False:
255 return False
e00eb564 256
3995d37d
S
257 if 'https://myaccount.google.com/' not in check_cookie_results:
258 warn('Unable to log in')
b2e8bc1b 259 return False
e00eb564 260
b2e8bc1b
JMF
261 return True
262
30226342 263 def _download_webpage_handle(self, *args, **kwargs):
c1148516 264 query = kwargs.get('query', {}).copy()
c1148516 265 kwargs['query'] = query
30226342 266 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
267 *args, **compat_kwargs(kwargs))
268
b2e8bc1b
JMF
269 def _real_initialize(self):
270 if self._downloader is None:
271 return
b2e8bc1b
JMF
272 if not self._login():
273 return
c5e8d7af 274
a1c5d2ca 275 _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
8bdd16b4 276 _DEFAULT_API_DATA = {
277 'context': {
278 'client': {
279 'clientName': 'WEB',
a1c5d2ca 280 'clientVersion': _YT_WEB_CLIENT_VERSION,
8bdd16b4 281 }
282 },
283 }
8377574c 284
a1c5d2ca
M
285 _DEFAULT_BASIC_API_HEADERS = {
286 'X-YouTube-Client-Name': '1',
287 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
288 }
289
a0566bbf 290 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 291 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
292 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 293
a5c56234
M
294 def _generate_sapisidhash_header(self):
295 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
296 if sapisid_cookie is None:
297 return
298 time_now = round(time.time())
299 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
300 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
301
302 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
303 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 304 data = self._DEFAULT_API_DATA.copy()
305 data.update(query)
a5c56234
M
306 headers = headers or {}
307 headers.update({'content-type': 'application/json'})
308 auth = self._generate_sapisidhash_header()
309 if auth is not None:
310 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
314 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 315 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 316
8bdd16b4 317 def _extract_yt_initial_data(self, video_id, webpage):
318 return self._parse_json(
319 self._search_regex(
29f7c58a 320 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 321 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 322 video_id)
0c148415 323
a1c5d2ca
M
324 def _extract_identity_token(self, webpage, item_id):
325 ytcfg = self._extract_ytcfg(item_id, webpage)
326 if ytcfg:
327 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
328 if token:
329 return token
330 return self._search_regex(
331 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
332 'identity token', default=None)
333
334 @staticmethod
335 def _extract_account_syncid(data):
336 """Extract syncId required to download private playlists of secondary channels"""
337 sync_ids = (
338 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
339 or '').split("||")
340 if len(sync_ids) >= 2 and sync_ids[1]:
341 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
342 # and just "user_syncid||" for primary channel. We only want the channel_syncid
343 return sync_ids[0]
344
29f7c58a 345 def _extract_ytcfg(self, video_id, webpage):
346 return self._parse_json(
347 self._search_regex(
348 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
349 default='{}'), video_id, fatal=False)
350
30a074c2 351 def _extract_video(self, renderer):
352 video_id = renderer.get('videoId')
353 title = try_get(
354 renderer,
355 (lambda x: x['title']['runs'][0]['text'],
356 lambda x: x['title']['simpleText']), compat_str)
357 description = try_get(
358 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
359 compat_str)
360 duration = parse_duration(try_get(
361 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
362 view_count_text = try_get(
363 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
364 view_count = str_to_int(self._search_regex(
365 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
366 'view count', default=None))
367 uploader = try_get(
bc2ca1bb 368 renderer,
369 (lambda x: x['ownerText']['runs'][0]['text'],
370 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 371 return {
372 '_type': 'url_transparent',
373 'ie_key': YoutubeIE.ie_key(),
374 'id': video_id,
375 'url': video_id,
376 'title': title,
377 'description': description,
378 'duration': duration,
379 'view_count': view_count,
380 'uploader': uploader,
381 }
382
0c148415 383
360e1ca5 384class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 385 IE_DESC = 'YouTube.com'
bc2ca1bb 386 _INVIDIOUS_SITES = (
387 # invidious-redirect websites
388 r'(?:www\.)?redirect\.invidious\.io',
389 r'(?:(?:www|dev)\.)?invidio\.us',
390 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
391 r'(?:www\.)?invidious\.pussthecat\.org',
392 r'(?:www\.)?invidious\.048596\.xyz',
393 r'(?:www\.)?invidious\.zee\.li',
394 r'(?:www\.)?vid\.puffyan\.us',
395 r'(?:(?:www|au)\.)?ytprivate\.com',
396 r'(?:www\.)?invidious\.namazso\.eu',
397 r'(?:www\.)?invidious\.ethibox\.fr',
398 r'(?:www\.)?inv\.skyn3t\.in',
399 r'(?:www\.)?invidious\.himiko\.cloud',
400 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
401 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
402 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
403 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
404 # youtube-dl invidious instances list
405 r'(?:(?:www|no)\.)?invidiou\.sh',
406 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
407 r'(?:www\.)?invidious\.kabi\.tk',
408 r'(?:www\.)?invidious\.13ad\.de',
409 r'(?:www\.)?invidious\.mastodon\.host',
410 r'(?:www\.)?invidious\.zapashcanon\.fr',
411 r'(?:www\.)?invidious\.kavin\.rocks',
412 r'(?:www\.)?invidious\.tube',
413 r'(?:www\.)?invidiou\.site',
414 r'(?:www\.)?invidious\.site',
415 r'(?:www\.)?invidious\.xyz',
416 r'(?:www\.)?invidious\.nixnet\.xyz',
417 r'(?:www\.)?invidious\.drycat\.fr',
418 r'(?:www\.)?tube\.poal\.co',
419 r'(?:www\.)?tube\.connect\.cafe',
420 r'(?:www\.)?vid\.wxzm\.sx',
421 r'(?:www\.)?vid\.mint\.lgbt',
422 r'(?:www\.)?yewtu\.be',
423 r'(?:www\.)?yt\.elukerio\.org',
424 r'(?:www\.)?yt\.lelux\.fi',
425 r'(?:www\.)?invidious\.ggc-project\.de',
426 r'(?:www\.)?yt\.maisputain\.ovh',
427 r'(?:www\.)?invidious\.toot\.koeln',
428 r'(?:www\.)?invidious\.fdn\.fr',
429 r'(?:www\.)?watch\.nettohikari\.com',
430 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
431 r'(?:www\.)?qklhadlycap4cnod\.onion',
432 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
433 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
434 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
435 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
436 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
437 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
438 )
cb7dfeea 439 _VALID_URL = r"""(?x)^
c5e8d7af 440 (
edb53e2d 441 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 442 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
443 (?:www\.)?deturl\.com/www\.youtube\.com|
444 (?:www\.)?pwnyoutube\.com|
445 (?:www\.)?hooktube\.com|
446 (?:www\.)?yourepeat\.com|
447 tube\.majestyc\.net|
448 %(invidious)s|
449 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
450 (?:.*?\#/)? # handle anchor (#/) redirect urls
451 (?: # the various things that can precede the ID:
ac7553d0 452 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 453 |(?: # or the v= param in all its forms
f7000f3a 454 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 455 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 456 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
457 v=
458 )
f4b05232 459 ))
cbaed4bb
S
460 |(?:
461 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
462 vid\.plus| # or vid.plus/xxxx
463 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 464 %(invidious)s
cbaed4bb 465 )/
edb53e2d 466 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 467 )
c5e8d7af 468 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 469 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
470 (?!.*?\blist=
471 (?:
472 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
473 WL # WL are handled by the watch later IE
474 )
475 )
c5e8d7af 476 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 477 $""" % {
478 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
479 'invidious': '|'.join(_INVIDIOUS_SITES),
480 }
e40c758c 481 _PLAYER_INFO_RE = (
cc2db878 482 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
483 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 484 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 485 )
2c62dc26 486 _formats = {
c2d3cb4c 487 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
488 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
489 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
490 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
491 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
492 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
493 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
494 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 495 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 496 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
497 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
498 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
499 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
500 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
501 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 502 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 503 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
504 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 505
506
507 # 3D videos
c2d3cb4c 508 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
509 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
510 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
511 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 512 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
513 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
514 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 515
96fb5605 516 # Apple HTTP Live Streaming
11f12195 517 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 518 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
519 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
520 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
521 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
522 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 523 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
524 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
525
526 # DASH mp4 video
d23028a8
S
527 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
528 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
529 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
530 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
531 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 532 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
533 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
534 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
535 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
536 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
537 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
538 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 539
f6f1fc92 540 # Dash mp4 audio
d23028a8
S
541 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
542 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
543 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
544 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
545 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
546 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
547 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
548
549 # Dash webm
d23028a8
S
550 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
551 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
552 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
553 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
554 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
555 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
556 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
557 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
558 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
559 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
560 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
561 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
562 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
563 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
564 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 565 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
566 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
567 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
568 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
569 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
570 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
571 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
572
573 # Dash webm audio
d23028a8
S
574 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
575 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 576
0857baad 577 # Dash webm audio with opus inside
d23028a8
S
578 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
579 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
580 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 581
ce6b9a2d
PH
582 # RTMP (unnamed)
583 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
584
585 # av01 video only formats sometimes served with "unknown" codecs
586 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
587 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
588 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
589 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 590 }
29f7c58a 591 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 592
fd5c4aab
S
593 _GEO_BYPASS = False
594
78caa52a 595 IE_NAME = 'youtube'
2eb88d95
PH
596 _TESTS = [
597 {
2d3d2997 598 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
599 'info_dict': {
600 'id': 'BaW_jenozKc',
601 'ext': 'mp4',
3867038a 602 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
603 'uploader': 'Philipp Hagemeister',
604 'uploader_id': 'phihag',
ec85ded8 605 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
606 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
607 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 608 'upload_date': '20121002',
3867038a 609 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 610 'categories': ['Science & Technology'],
3867038a 611 'tags': ['youtube-dl'],
556dbe7f 612 'duration': 10,
dbdaaa23 613 'view_count': int,
3e7c1224
PH
614 'like_count': int,
615 'dislike_count': int,
7c80519c 616 'start_time': 1,
297a564b 617 'end_time': 9,
2eb88d95 618 }
0e853ca4 619 },
fccd3771 620 {
4bc3a23e
PH
621 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
622 'note': 'Embed-only video (#1746)',
623 'info_dict': {
624 'id': 'yZIXLfi8CZQ',
625 'ext': 'mp4',
626 'upload_date': '20120608',
627 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
628 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
629 'uploader': 'SET India',
94bfcd23 630 'uploader_id': 'setindia',
ec85ded8 631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 632 'age_limit': 18,
545cc85d 633 },
634 'skip': 'Private video',
fccd3771 635 },
11b56058 636 {
8bdd16b4 637 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
638 'note': 'Use the first video ID in the URL',
639 'info_dict': {
640 'id': 'BaW_jenozKc',
641 'ext': 'mp4',
3867038a 642 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
643 'uploader': 'Philipp Hagemeister',
644 'uploader_id': 'phihag',
ec85ded8 645 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 646 'upload_date': '20121002',
3867038a 647 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 648 'categories': ['Science & Technology'],
3867038a 649 'tags': ['youtube-dl'],
556dbe7f 650 'duration': 10,
dbdaaa23 651 'view_count': int,
11b56058
PM
652 'like_count': int,
653 'dislike_count': int,
34a7de29
S
654 },
655 'params': {
656 'skip_download': True,
657 },
11b56058 658 },
dd27fd17 659 {
2d3d2997 660 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
661 'note': '256k DASH audio (format 141) via DASH manifest',
662 'info_dict': {
663 'id': 'a9LDPn-MO4I',
664 'ext': 'm4a',
665 'upload_date': '20121002',
666 'uploader_id': '8KVIDEO',
ec85ded8 667 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
668 'description': '',
669 'uploader': '8KVIDEO',
670 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 671 },
4bc3a23e
PH
672 'params': {
673 'youtube_include_dash_manifest': True,
674 'format': '141',
4919603f 675 },
de3c7fe0 676 'skip': 'format 141 not served anymore',
dd27fd17 677 },
8bdd16b4 678 # DASH manifest with encrypted signature
679 {
680 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
681 'info_dict': {
682 'id': 'IB3lcPjvWLA',
683 'ext': 'm4a',
684 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
685 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
686 'duration': 244,
687 'uploader': 'AfrojackVEVO',
688 'uploader_id': 'AfrojackVEVO',
689 'upload_date': '20131011',
cc2db878 690 'abr': 129.495,
8bdd16b4 691 },
692 'params': {
693 'youtube_include_dash_manifest': True,
694 'format': '141/bestaudio[ext=m4a]',
695 },
696 },
aa79ac0c
PH
697 # Controversy video
698 {
699 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
700 'info_dict': {
701 'id': 'T4XJQO3qol8',
702 'ext': 'mp4',
556dbe7f 703 'duration': 219,
aa79ac0c 704 'upload_date': '20100909',
4fe54c12 705 'uploader': 'Amazing Atheist',
aa79ac0c 706 'uploader_id': 'TheAmazingAtheist',
ec85ded8 707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 708 'title': 'Burning Everyone\'s Koran',
545cc85d 709 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 710 }
c522adb1 711 },
dd2d55f1 712 # Normal age-gate video (embed allowed)
c522adb1 713 {
2d3d2997 714 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
715 'info_dict': {
716 'id': 'HtVdAasjOgU',
717 'ext': 'mp4',
718 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 719 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 720 'duration': 142,
c522adb1
JMF
721 'uploader': 'The Witcher',
722 'uploader_id': 'WitcherGame',
ec85ded8 723 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 724 'upload_date': '20140605',
34952f09 725 'age_limit': 18,
c522adb1
JMF
726 },
727 },
8bdd16b4 728 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
729 # YouTube Red ad is not captured for creator
730 {
731 'url': '__2ABJjxzNo',
732 'info_dict': {
733 'id': '__2ABJjxzNo',
734 'ext': 'mp4',
735 'duration': 266,
736 'upload_date': '20100430',
737 'uploader_id': 'deadmau5',
738 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 739 'creator': 'deadmau5',
740 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 741 'uploader': 'deadmau5',
742 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 743 'alt_title': 'Some Chords',
8bdd16b4 744 },
745 'expected_warnings': [
746 'DASH manifest missing',
747 ]
748 },
067aa17e 749 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
750 {
751 'url': 'lqQg6PlCWgI',
752 'info_dict': {
753 'id': 'lqQg6PlCWgI',
754 'ext': 'mp4',
556dbe7f 755 'duration': 6085,
90227264 756 'upload_date': '20150827',
cbe2bd91 757 'uploader_id': 'olympic',
ec85ded8 758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 759 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 760 'uploader': 'Olympic',
cbe2bd91
PH
761 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
762 },
763 'params': {
764 'skip_download': 'requires avconv',
e52a40ab 765 }
cbe2bd91 766 },
6271f1ca
PH
767 # Non-square pixels
768 {
769 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
770 'info_dict': {
771 'id': '_b-2C3KPAM0',
772 'ext': 'mp4',
773 'stretched_ratio': 16 / 9.,
556dbe7f 774 'duration': 85,
6271f1ca
PH
775 'upload_date': '20110310',
776 'uploader_id': 'AllenMeow',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 778 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 779 'uploader': '孫ᄋᄅ',
6271f1ca
PH
780 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
781 },
06b491eb
S
782 },
783 # url_encoded_fmt_stream_map is empty string
784 {
785 'url': 'qEJwOuvDf7I',
786 'info_dict': {
787 'id': 'qEJwOuvDf7I',
f57b7835 788 'ext': 'webm',
06b491eb
S
789 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
790 'description': '',
791 'upload_date': '20150404',
792 'uploader_id': 'spbelect',
793 'uploader': 'Наблюдатели Петербурга',
794 },
795 'params': {
796 'skip_download': 'requires avconv',
e323cf3f
S
797 },
798 'skip': 'This live event has ended.',
06b491eb 799 },
067aa17e 800 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
801 {
802 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
803 'info_dict': {
804 'id': 'FIl7x6_3R5Y',
eb6793ba 805 'ext': 'webm',
da77d856
S
806 'title': 'md5:7b81415841e02ecd4313668cde88737a',
807 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 808 'duration': 220,
da77d856
S
809 'upload_date': '20150625',
810 'uploader_id': 'dorappi2000',
ec85ded8 811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 812 'uploader': 'dorappi2000',
eb6793ba 813 'formats': 'mincount:31',
da77d856 814 },
eb6793ba 815 'skip': 'not actual anymore',
2ee8f5d8 816 },
8a1a26ce
YCH
817 # DASH manifest with segment_list
818 {
819 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
820 'md5': '8ce563a1d667b599d21064e982ab9e31',
821 'info_dict': {
822 'id': 'CsmdDsKjzN8',
823 'ext': 'mp4',
17ee98e1 824 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
825 'uploader': 'Airtek',
826 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
827 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
828 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
829 },
830 'params': {
831 'youtube_include_dash_manifest': True,
832 'format': '135', # bestvideo
be49068d
S
833 },
834 'skip': 'This live event has ended.',
2ee8f5d8 835 },
cf7e015f
S
836 {
837 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 838 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 839 'info_dict': {
545cc85d 840 'id': 'jvGDaLqkpTg',
841 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
842 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
843 },
844 'playlist': [{
845 'info_dict': {
545cc85d 846 'id': 'jvGDaLqkpTg',
cf7e015f 847 'ext': 'mp4',
545cc85d 848 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
849 'description': 'md5:e03b909557865076822aa169218d6a5d',
850 'duration': 10643,
851 'upload_date': '20161111',
852 'uploader': 'Team PGP',
853 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
854 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
855 },
856 }, {
857 'info_dict': {
545cc85d 858 'id': '3AKt1R1aDnw',
cf7e015f 859 'ext': 'mp4',
545cc85d 860 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
861 'description': 'md5:e03b909557865076822aa169218d6a5d',
862 'duration': 10991,
863 'upload_date': '20161111',
864 'uploader': 'Team PGP',
865 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
866 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
867 },
868 }, {
869 'info_dict': {
545cc85d 870 'id': 'RtAMM00gpVc',
cf7e015f 871 'ext': 'mp4',
545cc85d 872 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
873 'description': 'md5:e03b909557865076822aa169218d6a5d',
874 'duration': 10995,
875 'upload_date': '20161111',
876 'uploader': 'Team PGP',
877 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
878 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
879 },
880 }, {
881 'info_dict': {
545cc85d 882 'id': '6N2fdlP3C5U',
cf7e015f 883 'ext': 'mp4',
545cc85d 884 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
885 'description': 'md5:e03b909557865076822aa169218d6a5d',
886 'duration': 10990,
887 'upload_date': '20161111',
888 'uploader': 'Team PGP',
889 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
891 },
892 }],
893 'params': {
894 'skip_download': True,
895 },
cbaed4bb 896 },
f9f49d87 897 {
067aa17e 898 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
899 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
900 'info_dict': {
901 'id': 'gVfLd0zydlo',
902 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
903 },
904 'playlist_count': 2,
be49068d 905 'skip': 'Not multifeed anymore',
f9f49d87 906 },
cbaed4bb 907 {
2d3d2997 908 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 909 'only_matching': True,
0e49d9a6 910 },
6d4fc66b 911 {
2d3d2997 912 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
913 'only_matching': True,
914 },
0e49d9a6 915 {
067aa17e 916 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 917 # Also tests cut-off URL expansion in video description (see
067aa17e
S
918 # https://github.com/ytdl-org/youtube-dl/issues/1892,
919 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
920 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
921 'info_dict': {
922 'id': 'lsguqyKfVQg',
923 'ext': 'mp4',
924 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 925 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 926 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 927 'duration': 133,
0e49d9a6
LL
928 'upload_date': '20151119',
929 'uploader_id': 'IronSoulElf',
ec85ded8 930 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 931 'uploader': 'IronSoulElf',
eb6793ba
S
932 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
933 'track': 'Dark Walk - Position Music',
934 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 935 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
936 },
937 'params': {
938 'skip_download': True,
939 },
940 },
61f92af1 941 {
067aa17e 942 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
943 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
944 'only_matching': True,
945 },
313dfc45
LL
946 {
947 # Video with yt:stretch=17:0
948 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
949 'info_dict': {
950 'id': 'Q39EVAstoRM',
951 'ext': 'mp4',
952 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
953 'description': 'md5:ee18a25c350637c8faff806845bddee9',
954 'upload_date': '20151107',
955 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
956 'uploader': 'CH GAMER DROID',
957 },
958 'params': {
959 'skip_download': True,
960 },
be49068d 961 'skip': 'This video does not exist.',
313dfc45 962 },
7caf9830
S
963 {
964 # Video licensed under Creative Commons
965 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
966 'info_dict': {
967 'id': 'M4gD1WSo5mA',
968 'ext': 'mp4',
969 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
970 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 971 'duration': 721,
7caf9830
S
972 'upload_date': '20150127',
973 'uploader_id': 'BerkmanCenter',
ec85ded8 974 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 975 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
976 'license': 'Creative Commons Attribution license (reuse allowed)',
977 },
978 'params': {
979 'skip_download': True,
980 },
981 },
fd050249
S
982 {
983 # Channel-like uploader_url
984 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
985 'info_dict': {
986 'id': 'eQcmzGIKrzg',
987 'ext': 'mp4',
988 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 989 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 990 'duration': 4060,
fd050249 991 'upload_date': '20151119',
eb6793ba 992 'uploader': 'Bernie Sanders',
fd050249 993 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
995 'license': 'Creative Commons Attribution license (reuse allowed)',
996 },
997 'params': {
998 'skip_download': True,
999 },
1000 },
040ac686
S
1001 {
1002 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1003 'only_matching': True,
7f29cf54
S
1004 },
1005 {
067aa17e 1006 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1007 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1008 'only_matching': True,
6496ccb4
S
1009 },
1010 {
1011 # Rental video preview
1012 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1013 'info_dict': {
1014 'id': 'uGpuVWrhIzE',
1015 'ext': 'mp4',
1016 'title': 'Piku - Trailer',
1017 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1018 'upload_date': '20150811',
1019 'uploader': 'FlixMatrix',
1020 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1021 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1022 'license': 'Standard YouTube License',
1023 },
1024 'params': {
1025 'skip_download': True,
1026 },
eb6793ba 1027 'skip': 'This video is not available.',
022a5d66 1028 },
12afdc2a
S
1029 {
1030 # YouTube Red video with episode data
1031 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1032 'info_dict': {
1033 'id': 'iqKdEhx-dD4',
1034 'ext': 'mp4',
1035 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1036 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1037 'duration': 2085,
12afdc2a
S
1038 'upload_date': '20170118',
1039 'uploader': 'Vsauce',
1040 'uploader_id': 'Vsauce',
1041 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1042 'series': 'Mind Field',
1043 'season_number': 1,
1044 'episode_number': 1,
1045 },
1046 'params': {
1047 'skip_download': True,
1048 },
1049 'expected_warnings': [
1050 'Skipping DASH manifest',
1051 ],
1052 },
c7121fa7
S
1053 {
1054 # The following content has been identified by the YouTube community
1055 # as inappropriate or offensive to some audiences.
1056 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1057 'info_dict': {
1058 'id': '6SJNVb0GnPI',
1059 'ext': 'mp4',
1060 'title': 'Race Differences in Intelligence',
1061 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1062 'duration': 965,
1063 'upload_date': '20140124',
1064 'uploader': 'New Century Foundation',
1065 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1066 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1067 },
1068 'params': {
1069 'skip_download': True,
1070 },
545cc85d 1071 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1072 },
022a5d66
S
1073 {
1074 # itag 212
1075 'url': '1t24XAntNCY',
1076 'only_matching': True,
fd5c4aab
S
1077 },
1078 {
1079 # geo restricted to JP
1080 'url': 'sJL6WA-aGkQ',
1081 'only_matching': True,
1082 },
cd5a74a2
S
1083 {
1084 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1085 'only_matching': True,
1086 },
bc2ca1bb 1087 {
1088 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1089 'only_matching': True,
1090 },
1091 {
1092 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1093 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1094 'only_matching': True,
1095 },
825cd268
RA
1096 {
1097 # DRM protected
1098 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1099 'only_matching': True,
4fe54c12
S
1100 },
1101 {
1102 # Video with unsupported adaptive stream type formats
1103 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1104 'info_dict': {
1105 'id': 'Z4Vy8R84T1U',
1106 'ext': 'mp4',
1107 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1108 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1109 'duration': 433,
1110 'upload_date': '20130923',
1111 'uploader': 'Amelia Putri Harwita',
1112 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1113 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1114 'formats': 'maxcount:10',
1115 },
1116 'params': {
1117 'skip_download': True,
1118 'youtube_include_dash_manifest': False,
1119 },
5429d6a9 1120 'skip': 'not actual anymore',
5caabd3c 1121 },
1122 {
822b9d9c 1123 # Youtube Music Auto-generated description
5caabd3c 1124 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1125 'info_dict': {
1126 'id': 'MgNrAu2pzNs',
1127 'ext': 'mp4',
1128 'title': 'Voyeur Girl',
1129 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1130 'upload_date': '20190312',
5429d6a9
S
1131 'uploader': 'Stephen - Topic',
1132 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1133 'artist': 'Stephen',
1134 'track': 'Voyeur Girl',
1135 'album': 'it\'s too much love to know my dear',
1136 'release_date': '20190313',
1137 'release_year': 2019,
1138 },
1139 'params': {
1140 'skip_download': True,
1141 },
1142 },
66b48727
RA
1143 {
1144 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1145 'only_matching': True,
1146 },
011e75e6
S
1147 {
1148 # invalid -> valid video id redirection
1149 'url': 'DJztXj2GPfl',
1150 'info_dict': {
1151 'id': 'DJztXj2GPfk',
1152 'ext': 'mp4',
1153 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1154 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1155 'upload_date': '20090125',
1156 'uploader': 'Prochorowka',
1157 'uploader_id': 'Prochorowka',
1158 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1159 'artist': 'Panjabi MC',
1160 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1161 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1162 },
1163 'params': {
1164 'skip_download': True,
1165 },
545cc85d 1166 'skip': 'Video unavailable',
ea74e00b
DP
1167 },
1168 {
1169 # empty description results in an empty string
1170 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1171 'info_dict': {
1172 'id': 'x41yOUIvK2k',
1173 'ext': 'mp4',
1174 'title': 'IMG 3456',
1175 'description': '',
1176 'upload_date': '20170613',
1177 'uploader_id': 'ElevageOrVert',
1178 'uploader': 'ElevageOrVert',
1179 },
1180 'params': {
1181 'skip_download': True,
1182 },
1183 },
a0566bbf 1184 {
29f7c58a 1185 # with '};' inside yt initial data (see [1])
1186 # see [2] for an example with '};' inside ytInitialPlayerResponse
1187 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1188 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1189 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1190 'info_dict': {
1191 'id': 'CHqg6qOn4no',
1192 'ext': 'mp4',
1193 'title': 'Part 77 Sort a list of simple types in c#',
1194 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1195 'upload_date': '20130831',
1196 'uploader_id': 'kudvenkat',
1197 'uploader': 'kudvenkat',
1198 },
1199 'params': {
1200 'skip_download': True,
1201 },
1202 },
29f7c58a 1203 {
1204 # another example of '};' in ytInitialData
1205 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1206 'only_matching': True,
1207 },
1208 {
1209 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1210 'only_matching': True,
1211 },
545cc85d 1212 {
cc2db878 1213 # https://github.com/ytdl-org/youtube-dl/pull/28094
1214 'url': 'OtqTfy26tG0',
1215 'info_dict': {
1216 'id': 'OtqTfy26tG0',
1217 'ext': 'mp4',
1218 'title': 'Burn Out',
1219 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1220 'upload_date': '20141120',
1221 'uploader': 'The Cinematic Orchestra - Topic',
1222 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1223 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1224 'artist': 'The Cinematic Orchestra',
1225 'track': 'Burn Out',
1226 'album': 'Every Day',
1227 'release_data': None,
1228 'release_year': None,
1229 },
1230 'params': {
1231 'skip_download': True,
1232 },
545cc85d 1233 },
bc2ca1bb 1234 {
1235 # controversial video, only works with bpctr when authenticated with cookies
1236 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1237 'only_matching': True,
1238 },
2eb88d95
PH
1239 ]
1240
e0df6211
PH
1241 def __init__(self, *args, **kwargs):
1242 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1243 self._code_cache = {}
83799698 1244 self._player_cache = {}
e0df6211 1245
60064c53
PH
1246 def _signature_cache_id(self, example_sig):
1247 """ Return a string representation of a signature """
78caa52a 1248 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1249
e40c758c
S
1250 @classmethod
1251 def _extract_player_info(cls, player_url):
1252 for player_re in cls._PLAYER_INFO_RE:
1253 id_m = re.search(player_re, player_url)
1254 if id_m:
1255 break
1256 else:
c081b35c 1257 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1258 return id_m.group('id')
e40c758c
S
1259
1260 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1261 player_id = self._extract_player_info(player_url)
e0df6211 1262
c4417ddb 1263 # Read from filesystem cache
545cc85d 1264 func_id = 'js_%s_%s' % (
1265 player_id, self._signature_cache_id(example_sig))
c4417ddb 1266 assert os.path.basename(func_id) == func_id
a0e07d31 1267
69ea8ca4 1268 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1269 if cache_spec is not None:
78caa52a 1270 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1271
545cc85d 1272 if player_id not in self._code_cache:
1273 self._code_cache[player_id] = self._download_webpage(
e0df6211 1274 player_url, video_id,
545cc85d 1275 note='Downloading player ' + player_id,
69ea8ca4 1276 errnote='Download of %s failed' % player_url)
545cc85d 1277 code = self._code_cache[player_id]
1278 res = self._parse_sig_js(code)
e0df6211 1279
785521bf
PH
1280 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1281 cache_res = res(test_string)
1282 cache_spec = [ord(c) for c in cache_res]
83799698 1283
69ea8ca4 1284 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1285 return res
1286
60064c53 1287 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1288 def gen_sig_code(idxs):
1289 def _genslice(start, end, step):
78caa52a 1290 starts = '' if start == 0 else str(start)
8bcc8756 1291 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1292 steps = '' if step == 1 else (':%d' % step)
78caa52a 1293 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1294
1295 step = None
7af808a5
PH
1296 # Quelch pyflakes warnings - start will be set when step is set
1297 start = '(Never used)'
edf3e38e
PH
1298 for i, prev in zip(idxs[1:], idxs[:-1]):
1299 if step is not None:
1300 if i - prev == step:
1301 continue
1302 yield _genslice(start, prev, step)
1303 step = None
1304 continue
1305 if i - prev in [-1, 1]:
1306 step = i - prev
1307 start = prev
1308 continue
1309 else:
78caa52a 1310 yield 's[%d]' % prev
edf3e38e 1311 if step is None:
78caa52a 1312 yield 's[%d]' % i
edf3e38e
PH
1313 else:
1314 yield _genslice(start, i, step)
1315
78caa52a 1316 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1317 cache_res = func(test_string)
edf3e38e 1318 cache_spec = [ord(c) for c in cache_res]
78caa52a 1319 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1320 signature_id_tuple = '(%s)' % (
1321 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1322 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1323 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1324 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1325
e0df6211
PH
1326 def _parse_sig_js(self, jscode):
1327 funcname = self._search_regex(
abefc03f
S
1328 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1329 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1330 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1331 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1332 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1333 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1334 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1335 # Obsolete patterns
1336 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1337 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1338 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1343 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1344 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1345
1346 jsi = JSInterpreter(jscode)
1347 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1348 return lambda s: initial_function([s])
1349
545cc85d 1350 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1351 """Turn the encrypted s field into a working signature"""
6b37f0be 1352
c8bf86d5 1353 if player_url is None:
69ea8ca4 1354 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1355
69ea8ca4 1356 if player_url.startswith('//'):
78caa52a 1357 player_url = 'https:' + player_url
3c90cc8b
S
1358 elif not re.match(r'https?://', player_url):
1359 player_url = compat_urlparse.urljoin(
1360 'https://www.youtube.com', player_url)
c8bf86d5 1361 try:
62af3a0e 1362 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1363 if player_id not in self._player_cache:
1364 func = self._extract_signature_function(
60064c53 1365 video_id, player_url, s
c8bf86d5
PH
1366 )
1367 self._player_cache[player_id] = func
1368 func = self._player_cache[player_id]
1369 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1370 self._print_sig_code(func, s)
c8bf86d5
PH
1371 return func(s)
1372 except Exception as e:
1373 tb = traceback.format_exc()
1374 raise ExtractorError(
78caa52a 1375 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1376
545cc85d 1377 def _mark_watched(self, video_id, player_response):
21c340b8
S
1378 playback_url = url_or_none(try_get(
1379 player_response,
545cc85d 1380 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1381 if not playback_url:
1382 return
1383 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1384 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1385
1386 # cpn generation algorithm is reverse engineered from base.js.
1387 # In fact it works even with dummy cpn.
1388 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1389 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1390
1391 qs.update({
1392 'ver': ['2'],
1393 'cpn': [cpn],
1394 })
1395 playback_url = compat_urlparse.urlunparse(
15707c7e 1396 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1397
1398 self._download_webpage(
1399 playback_url, video_id, 'Marking watched',
1400 'Unable to mark watched', fatal=False)
1401
66c9fa36
S
1402 @staticmethod
1403 def _extract_urls(webpage):
1404 # Embedded YouTube player
1405 entries = [
1406 unescapeHTML(mobj.group('url'))
1407 for mobj in re.finditer(r'''(?x)
1408 (?:
1409 <iframe[^>]+?src=|
1410 data-video-url=|
1411 <embed[^>]+?src=|
1412 embedSWF\(?:\s*|
1413 <object[^>]+data=|
1414 new\s+SWFObject\(
1415 )
1416 (["\'])
1417 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1418 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1419 \1''', webpage)]
1420
1421 # lazyYT YouTube embed
1422 entries.extend(list(map(
1423 unescapeHTML,
1424 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1425
1426 # Wordpress "YouTube Video Importer" plugin
1427 matches = re.findall(r'''(?x)<div[^>]+
1428 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1429 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1430 entries.extend(m[-1] for m in matches)
1431
1432 return entries
1433
1434 @staticmethod
1435 def _extract_url(webpage):
1436 urls = YoutubeIE._extract_urls(webpage)
1437 return urls[0] if urls else None
1438
97665381
PH
1439 @classmethod
1440 def extract_id(cls, url):
1441 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1442 if mobj is None:
69ea8ca4 1443 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1444 video_id = mobj.group(2)
1445 return video_id
1446
545cc85d 1447 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1448 chapters_list = try_get(
8bdd16b4 1449 data,
84213ea8
S
1450 lambda x: x['playerOverlays']
1451 ['playerOverlayRenderer']
1452 ['decoratedPlayerBarRenderer']
1453 ['decoratedPlayerBarRenderer']
1454 ['playerBar']
1455 ['chapteredPlayerBarRenderer']
1456 ['chapters'],
1457 list)
1458 if not chapters_list:
1459 return
1460
1461 def chapter_time(chapter):
1462 return float_or_none(
1463 try_get(
1464 chapter,
1465 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1466 int),
1467 scale=1000)
1468 chapters = []
1469 for next_num, chapter in enumerate(chapters_list, start=1):
1470 start_time = chapter_time(chapter)
1471 if start_time is None:
1472 continue
1473 end_time = (chapter_time(chapters_list[next_num])
1474 if next_num < len(chapters_list) else duration)
1475 if end_time is None:
1476 continue
1477 title = try_get(
1478 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1479 compat_str)
1480 chapters.append({
1481 'start_time': start_time,
1482 'end_time': end_time,
1483 'title': title,
1484 })
1485 return chapters
1486
545cc85d 1487 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1488 return self._parse_json(self._search_regex(
1489 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1490 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1491
a1c5d2ca
M
1492 @staticmethod
1493 def _join_text_entries(runs):
1494 text = None
1495 for run in runs:
1496 if not isinstance(run, dict):
1497 continue
1498 sub_text = try_get(run, lambda x: x['text'], compat_str)
1499 if sub_text:
1500 if not text:
1501 text = sub_text
1502 continue
1503 text += sub_text
1504 return text
1505
1506 def _extract_comment(self, comment_renderer, parent=None):
1507 comment_id = comment_renderer.get('commentId')
1508 if not comment_id:
1509 return
1510 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1511 text = self._join_text_entries(comment_text_runs) or ''
1512 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1513 time_text = self._join_text_entries(comment_time_text)
1514
1515 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1516 author_id = try_get(comment_renderer,
1517 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1518 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1519 lambda x: x['likeCount']), compat_str)) or 0
1520 author_thumbnail = try_get(comment_renderer,
1521 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1522
1523 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1524 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
1525
1526 return {
1527 'id': comment_id,
1528 'text': text,
1529 # TODO: This should be parsed to timestamp
1530 'time_text': time_text,
1531 'like_count': votes,
1532 'is_favorited': is_liked,
1533 'author': author,
1534 'author_id': author_id,
1535 'author_thumbnail': author_thumbnail,
1536 'author_is_uploader': author_is_uploader,
1537 'parent': parent or 'root'
1538 }
1539
1540 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
1541 session_token_list, parent=None, comment_counts=None):
1542
1543 def extract_thread(parent_renderer):
1544 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1545 if not parent:
1546 comment_counts[2] = 0
1547 for content in contents:
1548 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1549 comment_renderer = try_get(
1550 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1551 content, (lambda x: x['commentRenderer'], dict))
1552
1553 if not comment_renderer:
1554 continue
1555 comment = self._extract_comment(comment_renderer, parent)
1556 if not comment:
1557 continue
1558 comment_counts[0] += 1
1559 yield comment
1560 # Attempt to get the replies
1561 comment_replies_renderer = try_get(
1562 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1563
1564 if comment_replies_renderer:
1565 comment_counts[2] += 1
1566 comment_entries_iter = self._comment_entries(
1567 comment_replies_renderer, identity_token, account_syncid,
1568 parent=comment.get('id'), session_token_list=session_token_list,
1569 comment_counts=comment_counts)
1570
1571 for reply_comment in comment_entries_iter:
1572 yield reply_comment
1573
1574 if not comment_counts:
1575 # comment so far, est. total comments, current comment thread #
1576 comment_counts = [0, 0, 0]
1577 headers = self._DEFAULT_BASIC_API_HEADERS.copy()
1578
1579 # TODO: Generalize the download code with TabIE
1580 if identity_token:
1581 headers['x-youtube-identity-token'] = identity_token
1582
1583 if account_syncid:
1584 headers['X-Goog-PageId'] = account_syncid
1585 headers['X-Goog-AuthUser'] = 0
1586
1587 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1588 first_continuation = False
1589 if parent is None:
1590 first_continuation = True
1591
1592 for page_num in itertools.count(0):
1593 if not continuation:
1594 break
1595 retries = self._downloader.params.get('extractor_retries', 3)
1596 count = -1
1597 last_error = None
1598
1599 while count < retries:
1600 count += 1
1601 if last_error:
1602 self.report_warning('%s. Retrying ...' % last_error)
1603 try:
1604 query = {
1605 'ctoken': continuation['ctoken'],
1606 'pbj': 1,
1607 'type': 'next',
1608 }
1609 if parent:
1610 query['action_get_comment_replies'] = 1
1611 else:
1612 query['action_get_comments'] = 1
1613
1614 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1615 if page_num == 0:
1616 if first_continuation:
1617 note_prefix = "Downloading initial comment continuation page"
1618 else:
1619 note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
1620 else:
1621 note_prefix = "%sDownloading comment%s page %d %s" % (
1622 " " if parent else "",
1623 ' replies' if parent else '',
1624 page_num,
1625 comment_prog_str)
1626
1627 browse = self._download_json(
1628 'https://www.youtube.com/comment_service_ajax', None,
1629 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1630 headers=headers, query=query,
1631 data=urlencode_postdata({
1632 'session_token': session_token_list[0]
1633 }))
1634 except ExtractorError as e:
1635 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1636 if e.cause.code == 413:
1637 self.report_warning("Assumed end of comments (received HTTP Error 413)")
1638 return
1639 # Downloading page may result in intermittent 5xx HTTP error
1640 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1641 last_error = 'HTTP Error %s' % e.cause.code
1642 if e.cause.code == 404:
1643 last_error = last_error + " (this API is probably deprecated)"
1644 if count < retries:
1645 continue
1646 raise
1647 else:
1648 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1649 if session_token:
1650 session_token_list[0] = session_token
1651
1652 response = try_get(browse,
1653 (lambda x: x['response'],
1654 lambda x: x[1]['response'])) or {}
1655
1656 if response.get('continuationContents'):
1657 break
1658
1659 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1660 if browse.get('reload'):
1661 raise ExtractorError("Invalid or missing params in continuation request", expected=False)
1662
1663 # TODO: not tested, merged from old extractor
1664 err_msg = browse.get('externalErrorMessage')
1665 if err_msg:
1666 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1667
1668 # Youtube sometimes sends incomplete data
1669 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1670 last_error = 'Incomplete data received'
1671 if count >= retries:
1672 self._downloader.report_error(last_error)
1673
1674 if not response:
1675 break
1676
1677 known_continuation_renderers = {
1678 'itemSectionContinuation': extract_thread,
1679 'commentRepliesContinuation': extract_thread
1680 }
1681
1682 # extract next root continuation from the results
1683 continuation_contents = try_get(
1684 response, lambda x: x['continuationContents'], dict) or {}
1685
1686 for key, value in continuation_contents.items():
1687 if key not in known_continuation_renderers:
1688 continue
1689 continuation_renderer = value
1690
1691 if first_continuation:
1692 first_continuation = False
1693 expected_comment_count = try_get(
1694 continuation_renderer,
1695 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1696 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1697 compat_str)
1698
1699 if expected_comment_count:
1700 comment_counts[1] = str_to_int(expected_comment_count)
1701 self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
1702 yield comment_counts[1]
1703
1704 # TODO: cli arg.
1705 # 1/True for newest, 0/False for popular (default)
1706 comment_sort_index = int(True)
1707 sort_continuation_renderer = try_get(
1708 continuation_renderer,
1709 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1710 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1711 # If this fails, the initial continuation page
1712 # starts off with popular anyways.
1713 if sort_continuation_renderer:
1714 continuation = YoutubeTabIE._build_continuation_query(
1715 continuation=sort_continuation_renderer.get('continuation'),
1716 ctp=sort_continuation_renderer.get('clickTrackingParams'))
1717 self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
1718 break
1719
1720 for entry in known_continuation_renderers[key](continuation_renderer):
1721 yield entry
1722
1723 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1724 break
1725
1726 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1727 """Entry for comment extraction"""
1728 comments = []
1729 known_entry_comment_renderers = (
1730 'itemSectionRenderer',
1731 )
1732 estimated_total = 0
1733 for entry in contents:
1734 for key, renderer in entry.items():
1735 if key not in known_entry_comment_renderers:
1736 continue
1737
1738 comment_iter = self._comment_entries(
1739 renderer,
1740 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1741 account_syncid=self._extract_account_syncid(ytcfg),
1742 session_token_list=[xsrf_token])
1743
1744 for comment in comment_iter:
1745 if isinstance(comment, int):
1746 estimated_total = comment
1747 continue
1748 comments.append(comment)
1749 break
1750 self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
1751 return {
1752 'comments': comments,
1753 'comment_count': len(comments),
1754 }
1755
c5e8d7af 1756 def _real_extract(self, url):
cf7e015f 1757 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1758 video_id = self._match_id(url)
1759 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1760 webpage_url = base_url + 'watch?v=' + video_id
1761 webpage = self._download_webpage(
1762 webpage_url + '&has_verified=1&bpctr=9999999999',
1763 video_id, fatal=False)
545cc85d 1764
1765 player_response = None
1766 if webpage:
1767 player_response = self._extract_yt_initial_variable(
1768 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1769 video_id, 'initial player response')
1770 if not player_response:
1771 player_response = self._call_api(
1772 'player', {'videoId': video_id}, video_id)
1773
1774 playability_status = player_response.get('playabilityStatus') or {}
1775 if playability_status.get('reason') == 'Sign in to confirm your age':
1776 pr = self._parse_json(try_get(compat_parse_qs(
1777 self._download_webpage(
1778 base_url + 'get_video_info', video_id,
1779 'Refetching age-gated info webpage',
1780 'unable to download video info webpage', query={
1781 'video_id': video_id,
7c60c33e 1782 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1783 }, fatal=False)),
1784 lambda x: x['player_response'][0],
1785 compat_str) or '{}', video_id)
1786 if pr:
1787 player_response = pr
1788
1789 trailer_video_id = try_get(
1790 playability_status,
1791 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1792 compat_str)
1793 if trailer_video_id:
1794 return self.url_result(
1795 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1796
545cc85d 1797 def get_text(x):
1798 if not x:
c2d125d9 1799 return
545cc85d 1800 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1801
545cc85d 1802 search_meta = (
1803 lambda x: self._html_search_meta(x, webpage, default=None)) \
1804 if webpage else lambda x: None
dbdaaa23 1805
545cc85d 1806 video_details = player_response.get('videoDetails') or {}
37357d21 1807 microformat = try_get(
545cc85d 1808 player_response,
1809 lambda x: x['microformat']['playerMicroformatRenderer'],
1810 dict) or {}
1811 video_title = video_details.get('title') \
1812 or get_text(microformat.get('title')) \
1813 or search_meta(['og:title', 'twitter:title', 'title'])
1814 video_description = video_details.get('shortDescription')
cf7e015f 1815
8fe10494 1816 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1817 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1818 multifeed_metadata_list = try_get(
1819 player_response,
1820 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1821 compat_str)
8fe10494
S
1822 if multifeed_metadata_list:
1823 entries = []
1824 feed_ids = []
1825 for feed in multifeed_metadata_list.split(','):
1826 # Unquote should take place before split on comma (,) since textual
1827 # fields may contain comma as well (see
067aa17e 1828 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1829 feed_data = compat_parse_qs(
1830 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1831
1832 def feed_entry(name):
545cc85d 1833 return try_get(
1834 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1835
1836 feed_id = feed_entry('id')
1837 if not feed_id:
1838 continue
1839 feed_title = feed_entry('title')
1840 title = video_title
1841 if feed_title:
1842 title += ' (%s)' % feed_title
8fe10494
S
1843 entries.append({
1844 '_type': 'url_transparent',
1845 'ie_key': 'Youtube',
1846 'url': smuggle_url(
545cc85d 1847 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1848 {'force_singlefeed': True}),
6b09401b 1849 'title': title,
8fe10494 1850 })
6b09401b 1851 feed_ids.append(feed_id)
8fe10494
S
1852 self.to_screen(
1853 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1854 % (', '.join(feed_ids), video_id))
545cc85d 1855 return self.playlist_result(
1856 entries, video_id, video_title, video_description)
8fe10494
S
1857 else:
1858 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1859
545cc85d 1860 formats = []
1861 itags = []
cc2db878 1862 itag_qualities = {}
545cc85d 1863 player_url = None
dca3ff4a 1864 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1865 streaming_data = player_response.get('streamingData') or {}
1866 streaming_formats = streaming_data.get('formats') or []
1867 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1868 for fmt in streaming_formats:
1869 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1870 continue
321bf820 1871
cc2db878 1872 itag = str_or_none(fmt.get('itag'))
1873 quality = fmt.get('quality')
1874 if itag and quality:
1875 itag_qualities[itag] = quality
1876 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1877 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1878 # number of fragment that would subsequently requested with (`&sq=N`)
1879 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1880 continue
1881
545cc85d 1882 fmt_url = fmt.get('url')
1883 if not fmt_url:
1884 sc = compat_parse_qs(fmt.get('signatureCipher'))
1885 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1886 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1887 if not (sc and fmt_url and encrypted_sig):
1888 continue
1889 if not player_url:
1890 if not webpage:
1891 continue
1892 player_url = self._search_regex(
1893 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1894 webpage, 'player URL', fatal=False)
1895 if not player_url:
201e9eaa 1896 continue
545cc85d 1897 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1898 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1899 fmt_url += '&' + sp + '=' + signature
1900
545cc85d 1901 if itag:
1902 itags.append(itag)
cc2db878 1903 tbr = float_or_none(
1904 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1905 dct = {
1906 'asr': int_or_none(fmt.get('audioSampleRate')),
1907 'filesize': int_or_none(fmt.get('contentLength')),
1908 'format_id': itag,
1909 'format_note': fmt.get('qualityLabel') or quality,
1910 'fps': int_or_none(fmt.get('fps')),
1911 'height': int_or_none(fmt.get('height')),
dca3ff4a 1912 'quality': q(quality),
cc2db878 1913 'tbr': tbr,
545cc85d 1914 'url': fmt_url,
1915 'width': fmt.get('width'),
1916 }
1917 mimetype = fmt.get('mimeType')
1918 if mimetype:
1919 mobj = re.match(
1920 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1921 if mobj:
1922 dct['ext'] = mimetype2ext(mobj.group(1))
1923 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1924 no_audio = dct.get('acodec') == 'none'
1925 no_video = dct.get('vcodec') == 'none'
1926 if no_audio:
1927 dct['vbr'] = tbr
1928 if no_video:
1929 dct['abr'] = tbr
1930 if no_audio or no_video:
545cc85d 1931 dct['downloader_options'] = {
1932 # Youtube throttles chunks >~10M
1933 'http_chunk_size': 10485760,
bf1317d2 1934 }
7c60c33e 1935 if dct.get('ext'):
1936 dct['container'] = dct['ext'] + '_dash'
545cc85d 1937 formats.append(dct)
1938
1939 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1940 if hls_manifest_url:
1941 for f in self._extract_m3u8_formats(
1942 hls_manifest_url, video_id, 'mp4', fatal=False):
1943 itag = self._search_regex(
1944 r'/itag/(\d+)', f['url'], 'itag', default=None)
1945 if itag:
1946 f['format_id'] = itag
1947 formats.append(f)
1948
1949 if self._downloader.params.get('youtube_include_dash_manifest'):
1950 dash_manifest_url = streaming_data.get('dashManifestUrl')
1951 if dash_manifest_url:
545cc85d 1952 for f in self._extract_mpd_formats(
1953 dash_manifest_url, video_id, fatal=False):
cc2db878 1954 itag = f['format_id']
1955 if itag in itags:
1956 continue
dca3ff4a 1957 if itag in itag_qualities:
1958 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1959 # but kept to maintain feature parity (and code similarity) with youtube-dl
1960 # Remove if this causes any issues with sorting in future
1961 f['quality'] = q(itag_qualities[itag])
545cc85d 1962 filesize = int_or_none(self._search_regex(
1963 r'/clen/(\d+)', f.get('fragment_base_url')
1964 or f['url'], 'file size', default=None))
1965 if filesize:
1966 f['filesize'] = filesize
cc2db878 1967 formats.append(f)
bf1317d2 1968
545cc85d 1969 if not formats:
63ad4d43 1970 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1971 raise ExtractorError(
1972 'This video is DRM protected.', expected=True)
1973 pemr = try_get(
1974 playability_status,
1975 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1976 dict) or {}
1977 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1978 subreason = pemr.get('subreason')
1979 if subreason:
1980 subreason = clean_html(get_text(subreason))
1981 if subreason == 'The uploader has not made this video available in your country.':
1982 countries = microformat.get('availableCountries')
1983 if not countries:
1984 regions_allowed = search_meta('regionsAllowed')
1985 countries = regions_allowed.split(',') if regions_allowed else None
1986 self.raise_geo_restricted(
1987 subreason, countries)
1988 reason += '\n' + subreason
1989 if reason:
1990 raise ExtractorError(reason, expected=True)
bf1317d2 1991
545cc85d 1992 self._sort_formats(formats)
bf1317d2 1993
545cc85d 1994 keywords = video_details.get('keywords') or []
1995 if not keywords and webpage:
1996 keywords = [
1997 unescapeHTML(m.group('content'))
1998 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1999 for keyword in keywords:
2000 if keyword.startswith('yt:stretch='):
2001 w, h = keyword.split('=')[1].split(':')
2002 w, h = int(w), int(h)
2003 if w > 0 and h > 0:
2004 ratio = w / h
2005 for f in formats:
2006 if f.get('vcodec') != 'none':
2007 f['stretched_ratio'] = ratio
6449cd80 2008
545cc85d 2009 thumbnails = []
2010 for container in (video_details, microformat):
2011 for thumbnail in (try_get(
2012 container,
2013 lambda x: x['thumbnail']['thumbnails'], list) or []):
2014 thumbnail_url = thumbnail.get('url')
2015 if not thumbnail_url:
bf1317d2 2016 continue
545cc85d 2017 thumbnails.append({
2018 'height': int_or_none(thumbnail.get('height')),
2019 'url': thumbnail_url,
2020 'width': int_or_none(thumbnail.get('width')),
2021 })
2022 if thumbnails:
2023 break
a6211d23 2024 else:
545cc85d 2025 thumbnail = search_meta(['og:image', 'twitter:image'])
2026 if thumbnail:
2027 thumbnails = [{'url': thumbnail}]
2028
2029 category = microformat.get('category') or search_meta('genre')
2030 channel_id = video_details.get('channelId') \
2031 or microformat.get('externalChannelId') \
2032 or search_meta('channelId')
2033 duration = int_or_none(
2034 video_details.get('lengthSeconds')
2035 or microformat.get('lengthSeconds')) \
2036 or parse_duration(search_meta('duration'))
2037 is_live = video_details.get('isLive')
2038 owner_profile_url = microformat.get('ownerProfileUrl')
2039
2040 info = {
2041 'id': video_id,
2042 'title': self._live_title(video_title) if is_live else video_title,
2043 'formats': formats,
2044 'thumbnails': thumbnails,
2045 'description': video_description,
2046 'upload_date': unified_strdate(
2047 microformat.get('uploadDate')
2048 or search_meta('uploadDate')),
2049 'uploader': video_details['author'],
2050 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2051 'uploader_url': owner_profile_url,
2052 'channel_id': channel_id,
2053 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2054 'duration': duration,
2055 'view_count': int_or_none(
2056 video_details.get('viewCount')
2057 or microformat.get('viewCount')
2058 or search_meta('interactionCount')),
2059 'average_rating': float_or_none(video_details.get('averageRating')),
2060 'age_limit': 18 if (
2061 microformat.get('isFamilySafe') is False
2062 or search_meta('isFamilyFriendly') == 'false'
2063 or search_meta('og:restrictions:age') == '18+') else 0,
2064 'webpage_url': webpage_url,
2065 'categories': [category] if category else None,
2066 'tags': keywords,
2067 'is_live': is_live,
2068 'playable_in_embed': playability_status.get('playableInEmbed'),
f76ede8e 2069 'was_live': video_details.get('isLiveContent')
545cc85d 2070 }
b477fc13 2071
545cc85d 2072 pctr = try_get(
2073 player_response,
2074 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2075 subtitles = {}
2076 if pctr:
2077 def process_language(container, base_url, lang_code, query):
2078 lang_subs = []
2079 for fmt in self._SUBTITLE_FORMATS:
2080 query.update({
2081 'fmt': fmt,
2082 })
2083 lang_subs.append({
2084 'ext': fmt,
2085 'url': update_url_query(base_url, query),
2086 })
2087 container[lang_code] = lang_subs
7e72694b 2088
545cc85d 2089 for caption_track in (pctr.get('captionTracks') or []):
2090 base_url = caption_track.get('baseUrl')
2091 if not base_url:
2092 continue
2093 if caption_track.get('kind') != 'asr':
2094 lang_code = caption_track.get('languageCode')
2095 if not lang_code:
2096 continue
2097 process_language(
2098 subtitles, base_url, lang_code, {})
2099 continue
2100 automatic_captions = {}
2101 for translation_language in (pctr.get('translationLanguages') or []):
2102 translation_language_code = translation_language.get('languageCode')
2103 if not translation_language_code:
2104 continue
2105 process_language(
2106 automatic_captions, base_url, translation_language_code,
2107 {'tlang': translation_language_code})
2108 info['automatic_captions'] = automatic_captions
2109 info['subtitles'] = subtitles
7e72694b 2110
545cc85d 2111 parsed_url = compat_urllib_parse_urlparse(url)
2112 for component in [parsed_url.fragment, parsed_url.query]:
2113 query = compat_parse_qs(component)
2114 for k, v in query.items():
2115 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2116 d_k += '_time'
2117 if d_k not in info and k in s_ks:
2118 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2119
2120 # Youtube Music Auto-generated description
822b9d9c 2121 if video_description:
38d70284 2122 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2123 if mobj:
822b9d9c
RA
2124 release_year = mobj.group('release_year')
2125 release_date = mobj.group('release_date')
2126 if release_date:
2127 release_date = release_date.replace('-', '')
2128 if not release_year:
545cc85d 2129 release_year = release_date[:4]
2130 info.update({
2131 'album': mobj.group('album'.strip()),
2132 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2133 'track': mobj.group('track').strip(),
2134 'release_date': release_date,
cc2db878 2135 'release_year': int_or_none(release_year),
545cc85d 2136 })
7e72694b 2137
545cc85d 2138 initial_data = None
2139 if webpage:
2140 initial_data = self._extract_yt_initial_variable(
2141 webpage, self._YT_INITIAL_DATA_RE, video_id,
2142 'yt initial data')
2143 if not initial_data:
2144 initial_data = self._call_api(
2145 'next', {'videoId': video_id}, video_id, fatal=False)
2146
2147 if not is_live:
2148 try:
2149 # This will error if there is no livechat
2150 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2151 info['subtitles']['live_chat'] = [{
2152 'video_id': video_id,
2153 'ext': 'json',
2154 'protocol': 'youtube_live_chat_replay',
2155 }]
2156 except (KeyError, IndexError, TypeError):
2157 pass
2158
2159 if initial_data:
2160 chapters = self._extract_chapters_from_json(
2161 initial_data, video_id, duration)
2162 if not chapters:
2163 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2164 contents = try_get(
2165 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2166 list)
2167 if not contents:
2168 continue
2169
2170 def chapter_time(mmlir):
2171 return parse_duration(
2172 get_text(mmlir.get('timeDescription')))
2173
2174 chapters = []
2175 for next_num, content in enumerate(contents, start=1):
2176 mmlir = content.get('macroMarkersListItemRenderer') or {}
2177 start_time = chapter_time(mmlir)
2178 end_time = chapter_time(try_get(
2179 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2180 if next_num < len(contents) else duration
2181 if start_time is None or end_time is None:
2182 continue
2183 chapters.append({
2184 'start_time': start_time,
2185 'end_time': end_time,
2186 'title': get_text(mmlir.get('title')),
2187 })
2188 if chapters:
2189 break
2190 if chapters:
2191 info['chapters'] = chapters
2192
2193 contents = try_get(
2194 initial_data,
2195 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2196 list) or []
2197 for content in contents:
2198 vpir = content.get('videoPrimaryInfoRenderer')
2199 if vpir:
2200 stl = vpir.get('superTitleLink')
2201 if stl:
2202 stl = get_text(stl)
2203 if try_get(
2204 vpir,
2205 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2206 info['location'] = stl
2207 else:
2208 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2209 if mobj:
2210 info.update({
2211 'series': mobj.group(1),
2212 'season_number': int(mobj.group(2)),
2213 'episode_number': int(mobj.group(3)),
2214 })
2215 for tlb in (try_get(
2216 vpir,
2217 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2218 list) or []):
2219 tbr = tlb.get('toggleButtonRenderer') or {}
2220 for getter, regex in [(
2221 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2222 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2223 lambda x: x['accessibility'],
2224 lambda x: x['accessibilityData']['accessibilityData'],
2225 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2226 label = (try_get(tbr, getter, dict) or {}).get('label')
2227 if label:
2228 mobj = re.match(regex, label)
2229 if mobj:
2230 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2231 break
2232 sbr_tooltip = try_get(
2233 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2234 if sbr_tooltip:
2235 like_count, dislike_count = sbr_tooltip.split(' / ')
2236 info.update({
2237 'like_count': str_to_int(like_count),
2238 'dislike_count': str_to_int(dislike_count),
2239 })
2240 vsir = content.get('videoSecondaryInfoRenderer')
2241 if vsir:
2242 info['channel'] = get_text(try_get(
2243 vsir,
2244 lambda x: x['owner']['videoOwnerRenderer']['title'],
2245 compat_str))
2246 rows = try_get(
2247 vsir,
2248 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2249 list) or []
2250 multiple_songs = False
2251 for row in rows:
2252 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2253 multiple_songs = True
2254 break
2255 for row in rows:
2256 mrr = row.get('metadataRowRenderer') or {}
2257 mrr_title = mrr.get('title')
2258 if not mrr_title:
2259 continue
2260 mrr_title = get_text(mrr['title'])
2261 mrr_contents_text = get_text(mrr['contents'][0])
2262 if mrr_title == 'License':
2263 info['license'] = mrr_contents_text
2264 elif not multiple_songs:
2265 if mrr_title == 'Album':
2266 info['album'] = mrr_contents_text
2267 elif mrr_title == 'Artist':
2268 info['artist'] = mrr_contents_text
2269 elif mrr_title == 'Song':
2270 info['track'] = mrr_contents_text
2271
2272 fallbacks = {
2273 'channel': 'uploader',
2274 'channel_id': 'uploader_id',
2275 'channel_url': 'uploader_url',
2276 }
2277 for to, frm in fallbacks.items():
2278 if not info.get(to):
2279 info[to] = info.get(frm)
2280
2281 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2282 v = info.get(s_k)
2283 if v:
2284 info[d_k] = v
b84071c0 2285
06167fbb 2286 # get xsrf for annotations or comments
2287 get_annotations = self._downloader.params.get('writeannotations', False)
2288 get_comments = self._downloader.params.get('getcomments', False)
2289 if get_annotations or get_comments:
29f7c58a 2290 xsrf_token = None
545cc85d 2291 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2292 if ytcfg:
2293 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2294 if not xsrf_token:
2295 xsrf_token = self._search_regex(
2296 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2297 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2298
2299 # annotations
06167fbb 2300 if get_annotations:
64b6a4e9
RA
2301 invideo_url = try_get(
2302 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2303 if xsrf_token and invideo_url:
29f7c58a 2304 xsrf_field_name = None
2305 if ytcfg:
2306 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2307 if not xsrf_field_name:
2308 xsrf_field_name = self._search_regex(
2309 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2310 webpage, 'xsrf field name',
29f7c58a 2311 group='xsrf_field_name', default='session_token')
8a784c74 2312 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2313 self._proto_relative_url(invideo_url),
2314 video_id, note='Downloading annotations',
2315 errnote='Unable to download video annotations', fatal=False,
2316 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2317
277d6ff5 2318 if get_comments:
a1c5d2ca 2319 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2320
545cc85d 2321 self.mark_watched(video_id, player_response)
d77ab8e2 2322
545cc85d 2323 return info
c5e8d7af 2324
5f6a1245 2325
8bdd16b4 2326class YoutubeTabIE(YoutubeBaseInfoExtractor):
2327 IE_DESC = 'YouTube.com tab'
70d5c17b 2328 _VALID_URL = r'''(?x)
2329 https?://
2330 (?:\w+\.)?
2331 (?:
2332 youtube(?:kids)?\.com|
2333 invidio\.us
2334 )/
2335 (?:
2336 (?:channel|c|user)/|
2337 (?P<not_channel>
9ba5705a 2338 feed/|hashtag/|
70d5c17b 2339 (?:playlist|watch)\?.*?\blist=
2340 )|
29f7c58a 2341 (?!(?:%s)\b) # Direct URLs
70d5c17b 2342 )
2343 (?P<id>[^/?\#&]+)
2344 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2345 IE_NAME = 'youtube:tab'
2346
81127aa5 2347 _TESTS = [{
8bdd16b4 2348 # playlists, multipage
2349 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2350 'playlist_mincount': 94,
2351 'info_dict': {
2352 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2353 'title': 'Игорь Клейнер - Playlists',
2354 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2355 'uploader': 'Игорь Клейнер',
2356 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2357 },
2358 }, {
2359 # playlists, multipage, different order
2360 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2361 'playlist_mincount': 94,
2362 'info_dict': {
2363 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2364 'title': 'Игорь Клейнер - Playlists',
2365 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2366 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2367 'uploader': 'Игорь Клейнер',
8bdd16b4 2368 },
2369 }, {
2370 # playlists, singlepage
2371 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2372 'playlist_mincount': 4,
2373 'info_dict': {
2374 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2375 'title': 'ThirstForScience - Playlists',
2376 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2377 'uploader': 'ThirstForScience',
2378 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2379 }
2380 }, {
2381 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2382 'only_matching': True,
2383 }, {
2384 # basic, single video playlist
0e30a7b9 2385 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2386 'info_dict': {
0e30a7b9 2387 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2388 'uploader': 'Sergey M.',
2389 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2390 'title': 'youtube-dl public playlist',
81127aa5 2391 },
0e30a7b9 2392 'playlist_count': 1,
9291475f 2393 }, {
8bdd16b4 2394 # empty playlist
0e30a7b9 2395 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2396 'info_dict': {
0e30a7b9 2397 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2398 'uploader': 'Sergey M.',
2399 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2400 'title': 'youtube-dl empty playlist',
9291475f
PH
2401 },
2402 'playlist_count': 0,
2403 }, {
8bdd16b4 2404 # Home tab
2405 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2406 'info_dict': {
8bdd16b4 2407 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2408 'title': 'lex will - Home',
2409 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2410 'uploader': 'lex will',
2411 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2412 },
8bdd16b4 2413 'playlist_mincount': 2,
9291475f 2414 }, {
8bdd16b4 2415 # Videos tab
2416 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2417 'info_dict': {
8bdd16b4 2418 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2419 'title': 'lex will - Videos',
2420 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2421 'uploader': 'lex will',
2422 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2423 },
8bdd16b4 2424 'playlist_mincount': 975,
9291475f 2425 }, {
8bdd16b4 2426 # Videos tab, sorted by popular
2427 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2428 'info_dict': {
8bdd16b4 2429 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2430 'title': 'lex will - Videos',
2431 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2432 'uploader': 'lex will',
2433 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2434 },
8bdd16b4 2435 'playlist_mincount': 199,
9291475f 2436 }, {
8bdd16b4 2437 # Playlists tab
2438 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2439 'info_dict': {
8bdd16b4 2440 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2441 'title': 'lex will - Playlists',
2442 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2443 'uploader': 'lex will',
2444 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2445 },
8bdd16b4 2446 'playlist_mincount': 17,
ac7553d0 2447 }, {
8bdd16b4 2448 # Community tab
2449 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2450 'info_dict': {
8bdd16b4 2451 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2452 'title': 'lex will - Community',
2453 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2454 'uploader': 'lex will',
2455 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2456 },
2457 'playlist_mincount': 18,
87dadd45 2458 }, {
8bdd16b4 2459 # Channels tab
2460 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2461 'info_dict': {
8bdd16b4 2462 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2463 'title': 'lex will - Channels',
2464 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2465 'uploader': 'lex will',
2466 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2467 },
deaec5af 2468 'playlist_mincount': 12,
6b08cdf6 2469 }, {
a0566bbf 2470 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2471 'only_matching': True,
2472 }, {
a0566bbf 2473 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2474 'only_matching': True,
2475 }, {
a0566bbf 2476 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2477 'only_matching': True,
2478 }, {
2479 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2480 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2481 'info_dict': {
2482 'title': '29C3: Not my department',
2483 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2484 'uploader': 'Christiaan008',
2485 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2486 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2487 },
2488 'playlist_count': 96,
2489 }, {
2490 'note': 'Large playlist',
2491 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2492 'info_dict': {
8bdd16b4 2493 'title': 'Uploads from Cauchemar',
2494 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2495 'uploader': 'Cauchemar',
2496 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2497 },
8bdd16b4 2498 'playlist_mincount': 1123,
2499 }, {
2500 # even larger playlist, 8832 videos
2501 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2502 'only_matching': True,
4b7df0d3
JMF
2503 }, {
2504 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2505 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2506 'info_dict': {
acf757f4
PH
2507 'title': 'Uploads from Interstellar Movie',
2508 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2509 'uploader': 'Interstellar Movie',
8bdd16b4 2510 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2511 },
481cc733 2512 'playlist_mincount': 21,
8bdd16b4 2513 }, {
2514 # https://github.com/ytdl-org/youtube-dl/issues/21844
2515 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2516 'info_dict': {
2517 'title': 'Data Analysis with Dr Mike Pound',
2518 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2519 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2520 'uploader': 'Computerphile',
deaec5af 2521 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2522 },
2523 'playlist_mincount': 11,
2524 }, {
a0566bbf 2525 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2526 'only_matching': True,
dacb3a86
S
2527 }, {
2528 # Playlist URL that does not actually serve a playlist
2529 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2530 'info_dict': {
2531 'id': 'FqZTN594JQw',
2532 'ext': 'webm',
2533 'title': "Smiley's People 01 detective, Adventure Series, Action",
2534 'uploader': 'STREEM',
2535 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2536 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2537 'upload_date': '20150526',
2538 'license': 'Standard YouTube License',
2539 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2540 'categories': ['People & Blogs'],
2541 'tags': list,
dbdaaa23 2542 'view_count': int,
dacb3a86
S
2543 'like_count': int,
2544 'dislike_count': int,
2545 },
2546 'params': {
2547 'skip_download': True,
2548 },
13a75688 2549 'skip': 'This video is not available.',
dacb3a86 2550 'add_ie': [YoutubeIE.ie_key()],
481cc733 2551 }, {
8bdd16b4 2552 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2553 'only_matching': True,
66b48727 2554 }, {
8bdd16b4 2555 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2556 'only_matching': True,
a0566bbf 2557 }, {
2558 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2559 'info_dict': {
2560 'id': '9Auq9mYxFEE',
2561 'ext': 'mp4',
deaec5af 2562 'title': compat_str,
a0566bbf 2563 'uploader': 'Sky News',
2564 'uploader_id': 'skynews',
2565 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2566 'upload_date': '20191102',
deaec5af 2567 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2568 'categories': ['News & Politics'],
2569 'tags': list,
2570 'like_count': int,
2571 'dislike_count': int,
2572 },
2573 'params': {
2574 'skip_download': True,
2575 },
2576 }, {
2577 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2578 'info_dict': {
2579 'id': 'a48o2S1cPoo',
2580 'ext': 'mp4',
2581 'title': 'The Young Turks - Live Main Show',
2582 'uploader': 'The Young Turks',
2583 'uploader_id': 'TheYoungTurks',
2584 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2585 'upload_date': '20150715',
2586 'license': 'Standard YouTube License',
2587 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2588 'categories': ['News & Politics'],
2589 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2590 'like_count': int,
2591 'dislike_count': int,
2592 },
2593 'params': {
2594 'skip_download': True,
2595 },
2596 'only_matching': True,
2597 }, {
2598 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2599 'only_matching': True,
2600 }, {
2601 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2602 'only_matching': True,
3d3dddc9 2603 }, {
2604 'url': 'https://www.youtube.com/feed/trending',
2605 'only_matching': True,
2606 }, {
2607 # needs auth
2608 'url': 'https://www.youtube.com/feed/library',
2609 'only_matching': True,
2610 }, {
2611 # needs auth
2612 'url': 'https://www.youtube.com/feed/history',
2613 'only_matching': True,
2614 }, {
2615 # needs auth
2616 'url': 'https://www.youtube.com/feed/subscriptions',
2617 'only_matching': True,
2618 }, {
2619 # needs auth
2620 'url': 'https://www.youtube.com/feed/watch_later',
2621 'only_matching': True,
2622 }, {
2623 # no longer available?
2624 'url': 'https://www.youtube.com/feed/recommended',
2625 'only_matching': True,
29f7c58a 2626 }, {
2627 # inline playlist with not always working continuations
2628 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2629 'only_matching': True,
2630 }, {
2631 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2632 'only_matching': True,
2633 }, {
2634 'url': 'https://www.youtube.com/course',
2635 'only_matching': True,
2636 }, {
2637 'url': 'https://www.youtube.com/zsecurity',
2638 'only_matching': True,
2639 }, {
2640 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2641 'only_matching': True,
2642 }, {
2643 'url': 'https://www.youtube.com/TheYoungTurks/live',
2644 'only_matching': True,
2645 }]
2646
2647 @classmethod
2648 def suitable(cls, url):
2649 return False if YoutubeIE.suitable(url) else super(
2650 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2651
2652 def _extract_channel_id(self, webpage):
2653 channel_id = self._html_search_meta(
2654 'channelId', webpage, 'channel id', default=None)
2655 if channel_id:
2656 return channel_id
2657 channel_url = self._html_search_meta(
2658 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2659 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2660 'twitter:app:url:googleplay'), webpage, 'channel url')
2661 return self._search_regex(
2662 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2663 channel_url, 'channel id')
15f6397c 2664
8bdd16b4 2665 @staticmethod
cd7c66cf 2666 def _extract_basic_item_renderer(item):
2667 # Modified from _extract_grid_item_renderer
2668 known_renderers = (
e3c07697 2669 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2670 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2671 )
2672 for key, renderer in item.items():
2673 if key not in known_renderers:
2674 continue
2675 return renderer
8bdd16b4 2676
8bdd16b4 2677 def _grid_entries(self, grid_renderer):
2678 for item in grid_renderer['items']:
2679 if not isinstance(item, dict):
39b62db1 2680 continue
cd7c66cf 2681 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2682 if not isinstance(renderer, dict):
2683 continue
2684 title = try_get(
2685 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2686 # playlist
2687 playlist_id = renderer.get('playlistId')
2688 if playlist_id:
2689 yield self.url_result(
2690 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2691 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2692 video_title=title)
2693 # video
2694 video_id = renderer.get('videoId')
2695 if video_id:
2696 yield self._extract_video(renderer)
2697 # channel
2698 channel_id = renderer.get('channelId')
2699 if channel_id:
2700 title = try_get(
2701 renderer, lambda x: x['title']['simpleText'], compat_str)
2702 yield self.url_result(
2703 'https://www.youtube.com/channel/%s' % channel_id,
2704 ie=YoutubeTabIE.ie_key(), video_title=title)
2705
3d3dddc9 2706 def _shelf_entries_from_content(self, shelf_renderer):
2707 content = shelf_renderer.get('content')
2708 if not isinstance(content, dict):
8bdd16b4 2709 return
cd7c66cf 2710 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2711 if renderer:
2712 # TODO: add support for nested playlists so each shelf is processed
2713 # as separate playlist
2714 # TODO: this includes only first N items
2715 for entry in self._grid_entries(renderer):
2716 yield entry
2717 renderer = content.get('horizontalListRenderer')
2718 if renderer:
2719 # TODO
2720 pass
8bdd16b4 2721
29f7c58a 2722 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2723 ep = try_get(
2724 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2725 compat_str)
2726 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2727 if shelf_url:
29f7c58a 2728 # Skipping links to another channels, note that checking for
2729 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2730 # will not work
2731 if skip_channels and '/channels?' in shelf_url:
2732 return
3d3dddc9 2733 title = try_get(
2734 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2735 yield self.url_result(shelf_url, video_title=title)
2736 # Shelf may not contain shelf URL, fallback to extraction from content
2737 for entry in self._shelf_entries_from_content(shelf_renderer):
2738 yield entry
c5e8d7af 2739
8bdd16b4 2740 def _playlist_entries(self, video_list_renderer):
2741 for content in video_list_renderer['contents']:
2742 if not isinstance(content, dict):
2743 continue
2744 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2745 if not isinstance(renderer, dict):
2746 continue
2747 video_id = renderer.get('videoId')
2748 if not video_id:
2749 continue
2750 yield self._extract_video(renderer)
07aeced6 2751
3462ffa8 2752 def _rich_entries(self, rich_grid_renderer):
2753 renderer = try_get(
70d5c17b 2754 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2755 video_id = renderer.get('videoId')
2756 if not video_id:
2757 return
2758 yield self._extract_video(renderer)
2759
8bdd16b4 2760 def _video_entry(self, video_renderer):
2761 video_id = video_renderer.get('videoId')
2762 if video_id:
2763 return self._extract_video(video_renderer)
dacb3a86 2764
8bdd16b4 2765 def _post_thread_entries(self, post_thread_renderer):
2766 post_renderer = try_get(
2767 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2768 if not post_renderer:
2769 return
2770 # video attachment
2771 video_renderer = try_get(
2772 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2773 video_id = None
2774 if video_renderer:
2775 entry = self._video_entry(video_renderer)
2776 if entry:
2777 yield entry
2778 # inline video links
2779 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2780 for run in runs:
2781 if not isinstance(run, dict):
2782 continue
2783 ep_url = try_get(
2784 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2785 if not ep_url:
2786 continue
2787 if not YoutubeIE.suitable(ep_url):
2788 continue
2789 ep_video_id = YoutubeIE._match_id(ep_url)
2790 if video_id == ep_video_id:
2791 continue
2792 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2793
8bdd16b4 2794 def _post_thread_continuation_entries(self, post_thread_continuation):
2795 contents = post_thread_continuation.get('contents')
2796 if not isinstance(contents, list):
2797 return
2798 for content in contents:
2799 renderer = content.get('backstagePostThreadRenderer')
2800 if not isinstance(renderer, dict):
2801 continue
2802 for entry in self._post_thread_entries(renderer):
2803 yield entry
07aeced6 2804
29f7c58a 2805 @staticmethod
2806 def _build_continuation_query(continuation, ctp=None):
2807 query = {
2808 'ctoken': continuation,
2809 'continuation': continuation,
2810 }
2811 if ctp:
2812 query['itct'] = ctp
2813 return query
2814
8bdd16b4 2815 @staticmethod
2816 def _extract_next_continuation_data(renderer):
2817 next_continuation = try_get(
2818 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2819 if not next_continuation:
2820 return
2821 continuation = next_continuation.get('continuation')
2822 if not continuation:
2823 return
2824 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2825 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2826
8bdd16b4 2827 @classmethod
2828 def _extract_continuation(cls, renderer):
2829 next_continuation = cls._extract_next_continuation_data(renderer)
2830 if next_continuation:
2831 return next_continuation
cc2db878 2832 contents = []
2833 for key in ('contents', 'items'):
2834 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2835 for content in contents:
2836 if not isinstance(content, dict):
2837 continue
2838 continuation_ep = try_get(
2839 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2840 dict)
2841 if not continuation_ep:
2842 continue
2843 continuation = try_get(
2844 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2845 if not continuation:
2846 continue
2847 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2848 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2849
d069eca7 2850 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2851
70d5c17b 2852 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2853 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2854 for content in contents:
2855 if not isinstance(content, dict):
8bdd16b4 2856 continue
70d5c17b 2857 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2858 if not is_renderer:
70d5c17b 2859 renderer = content.get('richItemRenderer')
3462ffa8 2860 if renderer:
2861 for entry in self._rich_entries(renderer):
2862 yield entry
2863 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2864 continue
3462ffa8 2865 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2866 for isr_content in isr_contents:
2867 if not isinstance(isr_content, dict):
2868 continue
69184e41 2869
2870 known_renderers = {
2871 'playlistVideoListRenderer': self._playlist_entries,
2872 'gridRenderer': self._grid_entries,
2873 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2874 'backstagePostThreadRenderer': self._post_thread_entries,
2875 'videoRenderer': lambda x: [self._video_entry(x)],
2876 }
2877 for key, renderer in isr_content.items():
2878 if key not in known_renderers:
2879 continue
2880 for entry in known_renderers[key](renderer):
2881 if entry:
2882 yield entry
3462ffa8 2883 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2884 break
70d5c17b 2885
3462ffa8 2886 if not continuation_list[0]:
2887 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2888
2889 if not continuation_list[0]:
2890 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2891
2892 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2893 tab_content = try_get(tab, lambda x: x['content'], dict)
2894 if not tab_content:
2895 return
3462ffa8 2896 parent_renderer = (
29f7c58a 2897 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2898 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2899 for entry in extract_entries(parent_renderer):
2900 yield entry
3462ffa8 2901 continuation = continuation_list[0]
8bdd16b4 2902
2903 headers = {
2904 'x-youtube-client-name': '1',
2905 'x-youtube-client-version': '2.20201112.04.01',
2906 }
2907 if identity_token:
2908 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2909
d069eca7
M
2910 if account_syncid:
2911 headers['X-Goog-PageId'] = account_syncid
2912 headers['X-Goog-AuthUser'] = 0
2913
8bdd16b4 2914 for page_num in itertools.count(1):
2915 if not continuation:
2916 break
62bff2c1 2917 retries = self._downloader.params.get('extractor_retries', 3)
2918 count = -1
2919 last_error = None
2920 while count < retries:
2921 count += 1
2922 if last_error:
2923 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2924 try:
a5c56234
M
2925 response = self._call_api(
2926 ep="browse", fatal=True, headers=headers,
2927 video_id='%s page %s' % (item_id, page_num),
2928 query={
2929 'continuation': continuation['continuation'],
2930 'clickTracking': {'clickTrackingParams': continuation['itct']},
2931 },
2932 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2933 except ExtractorError as e:
62bff2c1 2934 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2935 # Downloading page may result in intermittent 5xx HTTP error
2936 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2937 last_error = 'HTTP Error %s' % e.cause.code
2938 if count < retries:
29f7c58a 2939 continue
2940 raise
62bff2c1 2941 else:
62bff2c1 2942 # Youtube sometimes sends incomplete data
2943 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 2944 if dict_get(response,
2945 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 2946 break
f3eaa8dd
M
2947
2948 # Youtube may send alerts if there was an issue with the continuation page
2949 self._extract_alerts(response, expected=False)
2950
2951 last_error = 'Incomplete data received'
c705177d 2952 if count >= retries:
2953 self._downloader.report_error(last_error)
a5c56234
M
2954
2955 if not response:
8bdd16b4 2956 break
ebf1b291 2957
69184e41 2958 known_continuation_renderers = {
2959 'playlistVideoListContinuation': self._playlist_entries,
2960 'gridContinuation': self._grid_entries,
2961 'itemSectionContinuation': self._post_thread_continuation_entries,
2962 'sectionListContinuation': extract_entries, # for feeds
2963 }
8bdd16b4 2964 continuation_contents = try_get(
69184e41 2965 response, lambda x: x['continuationContents'], dict) or {}
2966 continuation_renderer = None
2967 for key, value in continuation_contents.items():
2968 if key not in known_continuation_renderers:
3462ffa8 2969 continue
69184e41 2970 continuation_renderer = value
2971 continuation_list = [None]
2972 for entry in known_continuation_renderers[key](continuation_renderer):
2973 yield entry
2974 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2975 break
2976 if continuation_renderer:
2977 continue
c5e8d7af 2978
a1b535bd 2979 known_renderers = {
2980 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2981 'gridVideoRenderer': (self._grid_entries, 'items'),
2982 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 2983 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 2984 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 2985 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 2986 }
8bdd16b4 2987 continuation_items = try_get(
26fe8ffe 2988 response,
2989 lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2990 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2991 video_items_renderer = None
2992 for key, value in continuation_item.items():
2993 if key not in known_renderers:
8bdd16b4 2994 continue
a1b535bd 2995 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 2996 continuation_list = [None]
a1b535bd 2997 for entry in known_renderers[key][0](video_items_renderer):
2998 yield entry
9ba5705a 2999 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3000 break
3001 if video_items_renderer:
3002 continue
8bdd16b4 3003 break
9558dcec 3004
8bdd16b4 3005 @staticmethod
3006 def _extract_selected_tab(tabs):
3007 for tab in tabs:
3008 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3009 return tab['tabRenderer']
2b3c2546 3010 else:
8bdd16b4 3011 raise ExtractorError('Unable to find selected tab')
b82f815f 3012
8bdd16b4 3013 @staticmethod
3014 def _extract_uploader(data):
3015 uploader = {}
3016 sidebar_renderer = try_get(
3017 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3018 if sidebar_renderer:
3019 for item in sidebar_renderer:
3020 if not isinstance(item, dict):
3021 continue
3022 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3023 if not isinstance(renderer, dict):
3024 continue
3025 owner = try_get(
3026 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3027 if owner:
3028 uploader['uploader'] = owner.get('text')
3029 uploader['uploader_id'] = try_get(
3030 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3031 uploader['uploader_url'] = urljoin(
3032 'https://www.youtube.com/',
3033 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3034 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3035
d069eca7 3036 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3037 playlist_id = title = description = channel_url = channel_name = channel_id = None
3038 thumbnails_list = tags = []
3039
8bdd16b4 3040 selected_tab = self._extract_selected_tab(tabs)
3041 renderer = try_get(
3042 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3043 if renderer:
b60419c5 3044 channel_name = renderer.get('title')
3045 channel_url = renderer.get('channelUrl')
3046 channel_id = renderer.get('externalId')
64c0d954 3047
64c0d954 3048 if not renderer:
3049 renderer = try_get(
3050 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 3051 if renderer:
3052 title = renderer.get('title')
ecc97af3 3053 description = renderer.get('description', '')
b60419c5 3054 playlist_id = channel_id
3055 tags = renderer.get('keywords', '').split()
3056 thumbnails_list = (
3057 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3058 or try_get(
3059 data,
3060 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3061 list)
b60419c5 3062 or [])
3063
3064 thumbnails = []
3065 for t in thumbnails_list:
3066 if not isinstance(t, dict):
3067 continue
3068 thumbnail_url = url_or_none(t.get('url'))
3069 if not thumbnail_url:
3070 continue
3071 thumbnails.append({
3072 'url': thumbnail_url,
3073 'width': int_or_none(t.get('width')),
3074 'height': int_or_none(t.get('height')),
3075 })
64c0d954 3076
3462ffa8 3077 if playlist_id is None:
70d5c17b 3078 playlist_id = item_id
3079 if title is None:
b60419c5 3080 title = playlist_id
3081 title += format_field(selected_tab, 'title', ' - %s')
3082
3083 metadata = {
3084 'playlist_id': playlist_id,
3085 'playlist_title': title,
3086 'playlist_description': description,
3087 'uploader': channel_name,
3088 'uploader_id': channel_id,
3089 'uploader_url': channel_url,
3090 'thumbnails': thumbnails,
3091 'tags': tags,
3092 }
3093 if not channel_id:
3094 metadata.update(self._extract_uploader(data))
3095 metadata.update({
3096 'channel': metadata['uploader'],
3097 'channel_id': metadata['uploader_id'],
3098 'channel_url': metadata['uploader_url']})
3099 return self.playlist_result(
d069eca7
M
3100 self._entries(
3101 selected_tab, playlist_id,
3102 self._extract_identity_token(webpage, item_id),
3103 self._extract_account_syncid(data)),
b60419c5 3104 **metadata)
73c4ac2c 3105
cd7c66cf 3106 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3107 first_id = last_id = None
3108 for page_num in itertools.count(1):
cd7c66cf 3109 videos = list(self._playlist_entries(playlist))
3110 if not videos:
3111 return
2be71994 3112 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3113 if start >= len(videos):
3114 return
3115 for video in videos[start:]:
3116 if video['id'] == first_id:
3117 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3118 return
3119 yield video
3120 first_id = first_id or videos[0]['id']
3121 last_id = videos[-1]['id']
cd7c66cf 3122
cd7c66cf 3123 _, data = self._extract_webpage(
2be71994 3124 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3125 '%s page %d' % (playlist_id, page_num))
3126 playlist = try_get(
3127 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3128
29f7c58a 3129 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3130 title = playlist.get('title') or try_get(
3131 data, lambda x: x['titleText']['simpleText'], compat_str)
3132 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3133
3134 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3135 playlist_url = urljoin(url, try_get(
3136 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3137 compat_str))
3138 if playlist_url and playlist_url != url:
3139 return self.url_result(
3140 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3141 video_title=title)
cd7c66cf 3142
8bdd16b4 3143 return self.playlist_result(
cd7c66cf 3144 self._extract_mix_playlist(playlist, playlist_id),
3145 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3146
f3eaa8dd
M
3147 def _extract_alerts(self, data, expected=False):
3148
3149 def _real_extract_alerts():
3150 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3151 if not isinstance(alert_dict, dict):
02ced43c 3152 continue
f3eaa8dd
M
3153 for alert in alert_dict.values():
3154 alert_type = alert.get('type')
3155 if not alert_type:
3156 continue
3157 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3158 if message:
3159 yield alert_type, message
f3eaa8dd
M
3160 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3161 message = try_get(run, lambda x: x['text'], compat_str)
3162 if message:
3163 yield alert_type, message
3164
3165 err_msg = None
3166 for alert_type, alert_message in _real_extract_alerts():
3167 if alert_type.lower() == 'error':
3168 if err_msg:
3169 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3170 err_msg = alert_message
3171 else:
3172 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3173
3174 if err_msg:
3175 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3176
cd7c66cf 3177 def _extract_webpage(self, url, item_id):
62bff2c1 3178 retries = self._downloader.params.get('extractor_retries', 3)
3179 count = -1
c705177d 3180 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3181 while count < retries:
62bff2c1 3182 count += 1
14fdfea9 3183 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3184 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3185 if count:
c705177d 3186 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3187 webpage = self._download_webpage(
3188 url, item_id,
cd7c66cf 3189 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3190 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3191 self._extract_alerts(data, expected=True)
14fdfea9 3192 if data.get('contents') or data.get('currentVideoEndpoint'):
3193 break
c705177d 3194 if count >= retries:
3195 self._downloader.report_error(last_error)
cd7c66cf 3196 return webpage, data
3197
3198 def _real_extract(self, url):
3199 item_id = self._match_id(url)
3200 url = compat_urlparse.urlunparse(
3201 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3202
3203 # This is not matched in a channel page with a tab selected
3204 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3205 mobj = mobj.groupdict() if mobj else {}
3206 if mobj and not mobj.get('not_channel'):
3207 self._downloader.report_warning(
3208 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3209 'To download only the videos in the home page, add a "/featured" to the URL')
3210 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3211
3212 # Handle both video/playlist URLs
3213 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3214 video_id = qs.get('v', [None])[0]
3215 playlist_id = qs.get('list', [None])[0]
3216
3217 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3218 if not playlist_id:
3219 # If there is neither video or playlist ids,
3220 # youtube redirects to home page, which is undesirable
3221 raise ExtractorError('Unable to recognize tab page')
3222 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3223 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3224
3225 if video_id and playlist_id:
3226 if self._downloader.params.get('noplaylist'):
3227 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3228 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3229 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3230
3231 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3232
8bdd16b4 3233 tabs = try_get(
3234 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3235 if tabs:
d069eca7 3236 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3237
8bdd16b4 3238 playlist = try_get(
3239 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3240 if playlist:
29f7c58a 3241 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3242
a0566bbf 3243 video_id = try_get(
3244 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3245 compat_str) or video_id
8bdd16b4 3246 if video_id:
cd7c66cf 3247 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3248 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3249
8bdd16b4 3250 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3251
c5e8d7af 3252
8bdd16b4 3253class YoutubePlaylistIE(InfoExtractor):
3254 IE_DESC = 'YouTube.com playlists'
3255 _VALID_URL = r'''(?x)(?:
3256 (?:https?://)?
3257 (?:\w+\.)?
3258 (?:
3259 (?:
3260 youtube(?:kids)?\.com|
29f7c58a 3261 invidio\.us
8bdd16b4 3262 )
3263 /.*?\?.*?\blist=
3264 )?
3265 (?P<id>%(playlist_id)s)
3266 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3267 IE_NAME = 'youtube:playlist'
cdc628a4 3268 _TESTS = [{
8bdd16b4 3269 'note': 'issue #673',
3270 'url': 'PLBB231211A4F62143',
cdc628a4 3271 'info_dict': {
8bdd16b4 3272 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3273 'id': 'PLBB231211A4F62143',
3274 'uploader': 'Wickydoo',
3275 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3276 },
3277 'playlist_mincount': 29,
3278 }, {
3279 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3280 'info_dict': {
3281 'title': 'YDL_safe_search',
3282 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3283 },
3284 'playlist_count': 2,
3285 'skip': 'This playlist is private',
9558dcec 3286 }, {
8bdd16b4 3287 'note': 'embedded',
3288 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3289 'playlist_count': 4,
9558dcec 3290 'info_dict': {
8bdd16b4 3291 'title': 'JODA15',
3292 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3293 'uploader': 'milan',
3294 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3295 }
cdc628a4 3296 }, {
8bdd16b4 3297 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3298 'playlist_mincount': 982,
3299 'info_dict': {
3300 'title': '2018 Chinese New Singles (11/6 updated)',
3301 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3302 'uploader': 'LBK',
3303 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3304 }
daa0df9e 3305 }, {
29f7c58a 3306 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3307 'only_matching': True,
3308 }, {
3309 # music album playlist
3310 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3311 'only_matching': True,
3312 }]
3313
3314 @classmethod
3315 def suitable(cls, url):
3316 return False if YoutubeTabIE.suitable(url) else super(
3317 YoutubePlaylistIE, cls).suitable(url)
3318
3319 def _real_extract(self, url):
3320 playlist_id = self._match_id(url)
3321 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3322 if not qs:
3323 qs = {'list': playlist_id}
3324 return self.url_result(
3325 update_url_query('https://www.youtube.com/playlist', qs),
3326 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3327
3328
3329class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3330 IE_DESC = 'youtu.be'
29f7c58a 3331 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3332 _TESTS = [{
8bdd16b4 3333 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3334 'info_dict': {
3335 'id': 'yeWKywCrFtk',
3336 'ext': 'mp4',
3337 'title': 'Small Scale Baler and Braiding Rugs',
3338 'uploader': 'Backus-Page House Museum',
3339 'uploader_id': 'backuspagemuseum',
3340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3341 'upload_date': '20161008',
3342 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3343 'categories': ['Nonprofits & Activism'],
3344 'tags': list,
3345 'like_count': int,
3346 'dislike_count': int,
3347 },
3348 'params': {
3349 'noplaylist': True,
3350 'skip_download': True,
3351 },
39e7107d 3352 }, {
8bdd16b4 3353 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3354 'only_matching': True,
cdc628a4
PH
3355 }]
3356
8bdd16b4 3357 def _real_extract(self, url):
29f7c58a 3358 mobj = re.match(self._VALID_URL, url)
3359 video_id = mobj.group('id')
3360 playlist_id = mobj.group('playlist_id')
8bdd16b4 3361 return self.url_result(
29f7c58a 3362 update_url_query('https://www.youtube.com/watch', {
3363 'v': video_id,
3364 'list': playlist_id,
3365 'feature': 'youtu.be',
3366 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3367
3368
3369class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3370 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3371 _VALID_URL = r'ytuser:(?P<id>.+)'
3372 _TESTS = [{
3373 'url': 'ytuser:phihag',
3374 'only_matching': True,
3375 }]
3376
3377 def _real_extract(self, url):
3378 user_id = self._match_id(url)
3379 return self.url_result(
3380 'https://www.youtube.com/user/%s' % user_id,
3381 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3382
b05654f0 3383
3d3dddc9 3384class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3385 IE_NAME = 'youtube:favorites'
3386 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3387 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3388 _LOGIN_REQUIRED = True
3389 _TESTS = [{
3390 'url': ':ytfav',
3391 'only_matching': True,
3392 }, {
3393 'url': ':ytfavorites',
3394 'only_matching': True,
3395 }]
3396
3397 def _real_extract(self, url):
3398 return self.url_result(
3399 'https://www.youtube.com/playlist?list=LL',
3400 ie=YoutubeTabIE.ie_key())
3401
3402
8bdd16b4 3403class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3404 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3405 # there doesn't appear to be a real limit, for example if you search for
3406 # 'python' you get more than 8.000.000 results
3407 _MAX_RESULTS = float('inf')
78caa52a 3408 IE_NAME = 'youtube:search'
b05654f0 3409 _SEARCH_KEY = 'ytsearch'
6c894ea1 3410 _SEARCH_PARAMS = None
9dd8e46a 3411 _TESTS = []
b05654f0 3412
6c894ea1 3413 def _entries(self, query, n):
a5c56234 3414 data = {'query': query}
6c894ea1
U
3415 if self._SEARCH_PARAMS:
3416 data['params'] = self._SEARCH_PARAMS
3417 total = 0
3418 for page_num in itertools.count(1):
a5c56234
M
3419 search = self._call_api(
3420 ep='search', video_id='query "%s"' % query, fatal=False,
3421 note='Downloading page %s' % page_num, query=data)
6c894ea1 3422 if not search:
b4c08069 3423 break
6c894ea1
U
3424 slr_contents = try_get(
3425 search,
3426 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3427 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3428 list)
3429 if not slr_contents:
a22b2fd1 3430 break
0366ae87 3431
0366ae87
M
3432 # Youtube sometimes adds promoted content to searches,
3433 # changing the index location of videos and token.
3434 # So we search through all entries till we find them.
30a074c2 3435 continuation_token = None
3436 for slr_content in slr_contents:
a96c6d15 3437 if continuation_token is None:
3438 continuation_token = try_get(
3439 slr_content,
3440 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3441 compat_str)
3442
30a074c2 3443 isr_contents = try_get(
3444 slr_content,
3445 lambda x: x['itemSectionRenderer']['contents'],
3446 list)
9da76d30 3447 if not isr_contents:
30a074c2 3448 continue
3449 for content in isr_contents:
3450 if not isinstance(content, dict):
3451 continue
3452 video = content.get('videoRenderer')
3453 if not isinstance(video, dict):
3454 continue
3455 video_id = video.get('videoId')
3456 if not video_id:
3457 continue
3458
3459 yield self._extract_video(video)
3460 total += 1
3461 if total == n:
3462 return
0366ae87 3463
0366ae87 3464 if not continuation_token:
6c894ea1 3465 break
0366ae87 3466 data['continuation'] = continuation_token
b05654f0 3467
6c894ea1
U
3468 def _get_n_results(self, query, n):
3469 """Get a specified number of results for a query"""
3470 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3471
c9ae7b95 3472
a3dd9248 3473class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3474 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3475 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3476 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3477 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3478
c9ae7b95 3479
386e1dd9 3480class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3481 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3482 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3483 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3484 # _MAX_RESULTS = 100
3462ffa8 3485 _TESTS = [{
3486 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3487 'playlist_mincount': 5,
3488 'info_dict': {
3489 'title': 'youtube-dl test video',
3490 }
3491 }, {
3492 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3493 'only_matching': True,
3494 }]
3495
386e1dd9 3496 @classmethod
3497 def _make_valid_url(cls):
3498 return cls._VALID_URL
3499
3462ffa8 3500 def _real_extract(self, url):
386e1dd9 3501 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3502 query = (qs.get('search_query') or qs.get('q'))[0]
3503 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3504 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3505
3506
3507class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3508 """
25f14e9f 3509 Base class for feed extractors
3d3dddc9 3510 Subclasses must define the _FEED_NAME property.
d7ae0639 3511 """
b2e8bc1b 3512 _LOGIN_REQUIRED = True
ef2f3c7f 3513 _TESTS = []
d7ae0639
JMF
3514
3515 @property
3516 def IE_NAME(self):
78caa52a 3517 return 'youtube:%s' % self._FEED_NAME
04cc9617 3518
81f0259b 3519 def _real_initialize(self):
b2e8bc1b 3520 self._login()
81f0259b 3521
3853309f 3522 def _real_extract(self, url):
3d3dddc9 3523 return self.url_result(
3524 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3525 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3526
3527
ef2f3c7f 3528class YoutubeWatchLaterIE(InfoExtractor):
3529 IE_NAME = 'youtube:watchlater'
70d5c17b 3530 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3531 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3532 _TESTS = [{
8bdd16b4 3533 'url': ':ytwatchlater',
bc7a9cd8
S
3534 'only_matching': True,
3535 }]
25f14e9f
S
3536
3537 def _real_extract(self, url):
ef2f3c7f 3538 return self.url_result(
3539 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3540
3541
25f14e9f
S
3542class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3543 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3544 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3545 _FEED_NAME = 'recommended'
3d3dddc9 3546 _TESTS = [{
3547 'url': ':ytrec',
3548 'only_matching': True,
3549 }, {
3550 'url': ':ytrecommended',
3551 'only_matching': True,
3552 }, {
3553 'url': 'https://youtube.com',
3554 'only_matching': True,
3555 }]
1ed5b5c9 3556
1ed5b5c9 3557
25f14e9f 3558class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3559 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3560 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3561 _FEED_NAME = 'subscriptions'
3d3dddc9 3562 _TESTS = [{
3563 'url': ':ytsubs',
3564 'only_matching': True,
3565 }, {
3566 'url': ':ytsubscriptions',
3567 'only_matching': True,
3568 }]
1ed5b5c9 3569
1ed5b5c9 3570
25f14e9f 3571class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3572 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3573 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3574 _FEED_NAME = 'history'
3d3dddc9 3575 _TESTS = [{
3576 'url': ':ythistory',
3577 'only_matching': True,
3578 }]
1ed5b5c9
JMF
3579
3580
15870e90
PH
3581class YoutubeTruncatedURLIE(InfoExtractor):
3582 IE_NAME = 'youtube:truncated_url'
3583 IE_DESC = False # Do not list
975d35db 3584 _VALID_URL = r'''(?x)
b95aab84
PH
3585 (?:https?://)?
3586 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3587 (?:watch\?(?:
c4808c60 3588 feature=[a-z_]+|
b95aab84
PH
3589 annotation_id=annotation_[^&]+|
3590 x-yt-cl=[0-9]+|
c1708b89 3591 hl=[^&]*|
287be8c6 3592 t=[0-9]+
b95aab84
PH
3593 )?
3594 |
3595 attribution_link\?a=[^&]+
3596 )
3597 $
975d35db 3598 '''
15870e90 3599
c4808c60 3600 _TESTS = [{
2d3d2997 3601 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3602 'only_matching': True,
dc2fc736 3603 }, {
2d3d2997 3604 'url': 'https://www.youtube.com/watch?',
dc2fc736 3605 'only_matching': True,
b95aab84
PH
3606 }, {
3607 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3608 'only_matching': True,
3609 }, {
3610 'url': 'https://www.youtube.com/watch?feature=foo',
3611 'only_matching': True,
c1708b89
PH
3612 }, {
3613 'url': 'https://www.youtube.com/watch?hl=en-GB',
3614 'only_matching': True,
287be8c6
PH
3615 }, {
3616 'url': 'https://www.youtube.com/watch?t=2372',
3617 'only_matching': True,
c4808c60
PH
3618 }]
3619
15870e90
PH
3620 def _real_extract(self, url):
3621 raise ExtractorError(
78caa52a
PH
3622 'Did you forget to quote the URL? Remember that & is a meta '
3623 'character in most shells, so you want to put the URL in quotes, '
3867038a 3624 'like youtube-dl '
2d3d2997 3625 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3626 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3627 expected=True)
772fd5cc
PH
3628
3629
3630class YoutubeTruncatedIDIE(InfoExtractor):
3631 IE_NAME = 'youtube:truncated_id'
3632 IE_DESC = False # Do not list
b95aab84 3633 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3634
3635 _TESTS = [{
3636 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3637 'only_matching': True,
3638 }]
3639
3640 def _real_extract(self, url):
3641 video_id = self._match_id(url)
3642 raise ExtractorError(
3643 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3644 expected=True)