]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[la7] Add podcasts and podcast playlists (#198)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
8a784c74 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 15from ..compat import (
edf3e38e 16 compat_chr,
29f7c58a 17 compat_HTTPError,
8d81f3e3 18 compat_kwargs,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
c5e8d7af 31 ExtractorError,
b60419c5 32 format_field,
2d30521a 33 float_or_none,
dd27fd17 34 int_or_none,
94278f72 35 mimetype2ext,
6310acf5 36 parse_codecs,
7c80519c 37 parse_duration,
dca3ff4a 38 qualities,
3995d37d 39 remove_start,
cf7e015f 40 smuggle_url,
dbdaaa23 41 str_or_none,
c93d53f5 42 str_to_int,
556dbe7f 43 try_get,
c5e8d7af
PH
44 unescapeHTML,
45 unified_strdate,
cf7e015f 46 unsmuggle_url,
8bdd16b4 47 update_url_query,
21c340b8 48 url_or_none,
6e6bc8da 49 urlencode_postdata,
8bdd16b4 50 urljoin,
c5e8d7af
PH
51)
52
5f6a1245 53
de7f3446 54class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
58
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 62
3462ffa8 63 _RESERVED_NAMES = (
cd7c66cf 64 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
65 r'movies|results|shared|hashtag|trending|feed|feeds|'
66 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
70d5c17b 72 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 73
25f14e9f
S
74 def _ids_to_results(self, ids):
75 return [
76 self.url_result(vid_id, 'Youtube', video_id=vid_id)
77 for vid_id in ids]
78
b2e8bc1b 79 def _login(self):
83317f69 80 """
81 Attempt to log in to YouTube.
82 True is returned if successful or skipped.
83 False is returned if login failed.
84
85 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
86 """
68217024 87 username, password = self._get_login_info()
b2e8bc1b
JMF
88 # No authentication to be performed
89 if username is None:
70d35d16 90 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 91 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 92 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
93 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 94 return True
b2e8bc1b 95
7cc3570e
PH
96 login_page = self._download_webpage(
97 self._LOGIN_URL, None,
69ea8ca4
PH
98 note='Downloading login page',
99 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
100 if login_page is False:
101 return
b2e8bc1b 102
1212e997 103 login_form = self._hidden_inputs(login_page)
c5e8d7af 104
e00eb564
S
105 def req(url, f_req, note, errnote):
106 data = login_form.copy()
107 data.update({
108 'pstMsg': 1,
109 'checkConnection': 'youtube',
110 'checkedDomains': 'youtube',
111 'hl': 'en',
112 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 113 'f.req': json.dumps(f_req),
e00eb564
S
114 'flowName': 'GlifWebSignIn',
115 'flowEntry': 'ServiceLogin',
baf67a60
S
116 # TODO: reverse actual botguard identifier generation algo
117 'bgRequest': '["identifier",""]',
041bc3ad 118 })
e00eb564
S
119 return self._download_json(
120 url, None, note=note, errnote=errnote,
121 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
122 fatal=False,
123 data=urlencode_postdata(data), headers={
124 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
125 'Google-Accounts-XSRF': 1,
126 })
127
3995d37d
S
128 def warn(message):
129 self._downloader.report_warning(message)
130
131 lookup_req = [
132 username,
133 None, [], None, 'US', None, None, 2, False, True,
134 [
135 None, None,
136 [2, 1, None, 1,
137 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
138 None, [], 4],
139 1, [None, None, []], None, None, None, True
140 ],
141 username,
142 ]
143
e00eb564 144 lookup_results = req(
3995d37d 145 self._LOOKUP_URL, lookup_req,
e00eb564
S
146 'Looking up account info', 'Unable to look up account info')
147
148 if lookup_results is False:
149 return False
041bc3ad 150
3995d37d
S
151 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
152 if not user_hash:
153 warn('Unable to extract user hash')
154 return False
155
156 challenge_req = [
157 user_hash,
158 None, 1, None, [1, None, None, None, [password, None, True]],
159 [
160 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
161 1, [None, None, []], None, None, None, True
162 ]]
83317f69 163
3995d37d
S
164 challenge_results = req(
165 self._CHALLENGE_URL, challenge_req,
166 'Logging in', 'Unable to log in')
83317f69 167
3995d37d 168 if challenge_results is False:
e00eb564 169 return
83317f69 170
3995d37d
S
171 login_res = try_get(challenge_results, lambda x: x[0][5], list)
172 if login_res:
173 login_msg = try_get(login_res, lambda x: x[5], compat_str)
174 warn(
175 'Unable to login: %s' % 'Invalid password'
176 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
177 return False
178
179 res = try_get(challenge_results, lambda x: x[0][-1], list)
180 if not res:
181 warn('Unable to extract result entry')
182 return False
183
9a6628aa
S
184 login_challenge = try_get(res, lambda x: x[0][0], list)
185 if login_challenge:
186 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
187 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
188 # SEND_SUCCESS - TFA code has been successfully sent to phone
189 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 190 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
191 if status == 'QUOTA_EXCEEDED':
192 warn('Exceeded the limit of TFA codes, try later')
193 return False
194
195 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
196 if not tl:
197 warn('Unable to extract TL')
198 return False
199
200 tfa_code = self._get_tfa_info('2-step verification code')
201
202 if not tfa_code:
203 warn(
204 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
205 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
206 return False
207
208 tfa_code = remove_start(tfa_code, 'G-')
209
210 tfa_req = [
211 user_hash, None, 2, None,
212 [
213 9, None, None, None, None, None, None, None,
214 [None, tfa_code, True, 2]
215 ]]
216
217 tfa_results = req(
218 self._TFA_URL.format(tl), tfa_req,
219 'Submitting TFA code', 'Unable to submit TFA code')
220
221 if tfa_results is False:
222 return False
223
224 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
225 if tfa_res:
226 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
227 warn(
228 'Unable to finish TFA: %s' % 'Invalid TFA code'
229 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
230 return False
231
232 check_cookie_url = try_get(
233 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
234 else:
235 CHALLENGES = {
236 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
237 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
238 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
239 }
240 challenge = CHALLENGES.get(
241 challenge_str,
242 '%s returned error %s.' % (self.IE_NAME, challenge_str))
243 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
244 return False
3995d37d
S
245 else:
246 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
247
248 if not check_cookie_url:
249 warn('Unable to extract CheckCookie URL')
250 return False
e00eb564
S
251
252 check_cookie_results = self._download_webpage(
3995d37d
S
253 check_cookie_url, None, 'Checking cookie', fatal=False)
254
255 if check_cookie_results is False:
256 return False
e00eb564 257
3995d37d
S
258 if 'https://myaccount.google.com/' not in check_cookie_results:
259 warn('Unable to log in')
b2e8bc1b 260 return False
e00eb564 261
b2e8bc1b
JMF
262 return True
263
30226342 264 def _download_webpage_handle(self, *args, **kwargs):
c1148516 265 query = kwargs.get('query', {}).copy()
c1148516 266 kwargs['query'] = query
30226342 267 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
268 *args, **compat_kwargs(kwargs))
269
b2e8bc1b
JMF
270 def _real_initialize(self):
271 if self._downloader is None:
272 return
b2e8bc1b
JMF
273 if not self._login():
274 return
c5e8d7af 275
a1c5d2ca 276 _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
8bdd16b4 277 _DEFAULT_API_DATA = {
278 'context': {
279 'client': {
280 'clientName': 'WEB',
a1c5d2ca 281 'clientVersion': _YT_WEB_CLIENT_VERSION,
8bdd16b4 282 }
283 },
284 }
8377574c 285
a1c5d2ca
M
286 _DEFAULT_BASIC_API_HEADERS = {
287 'X-YouTube-Client-Name': '1',
288 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
289 }
290
a0566bbf 291 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 292 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
293 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 294
a5c56234
M
295 def _generate_sapisidhash_header(self):
296 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
297 if sapisid_cookie is None:
298 return
299 time_now = round(time.time())
300 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
301 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
302
303 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
304 note='Downloading API JSON', errnote='Unable to download API page'):
8bdd16b4 305 data = self._DEFAULT_API_DATA.copy()
306 data.update(query)
a5c56234
M
307 headers = headers or {}
308 headers.update({'content-type': 'application/json'})
309 auth = self._generate_sapisidhash_header()
310 if auth is not None:
311 headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
545cc85d 312 return self._download_json(
a5c56234
M
313 'https://www.youtube.com/youtubei/v1/%s' % ep,
314 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
315 data=json.dumps(data).encode('utf8'), headers=headers,
8bdd16b4 316 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 317
8bdd16b4 318 def _extract_yt_initial_data(self, video_id, webpage):
319 return self._parse_json(
320 self._search_regex(
29f7c58a 321 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 322 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 323 video_id)
0c148415 324
a1c5d2ca
M
325 def _extract_identity_token(self, webpage, item_id):
326 ytcfg = self._extract_ytcfg(item_id, webpage)
327 if ytcfg:
328 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
329 if token:
330 return token
331 return self._search_regex(
332 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
333 'identity token', default=None)
334
335 @staticmethod
336 def _extract_account_syncid(data):
337 """Extract syncId required to download private playlists of secondary channels"""
338 sync_ids = (
339 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
340 or '').split("||")
341 if len(sync_ids) >= 2 and sync_ids[1]:
342 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
343 # and just "user_syncid||" for primary channel. We only want the channel_syncid
344 return sync_ids[0]
345
29f7c58a 346 def _extract_ytcfg(self, video_id, webpage):
347 return self._parse_json(
348 self._search_regex(
349 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
350 default='{}'), video_id, fatal=False)
351
30a074c2 352 def _extract_video(self, renderer):
353 video_id = renderer.get('videoId')
354 title = try_get(
355 renderer,
356 (lambda x: x['title']['runs'][0]['text'],
357 lambda x: x['title']['simpleText']), compat_str)
358 description = try_get(
359 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
360 compat_str)
361 duration = parse_duration(try_get(
362 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
363 view_count_text = try_get(
364 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
365 view_count = str_to_int(self._search_regex(
366 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
367 'view count', default=None))
368 uploader = try_get(
bc2ca1bb 369 renderer,
370 (lambda x: x['ownerText']['runs'][0]['text'],
371 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 372 return {
373 '_type': 'url_transparent',
374 'ie_key': YoutubeIE.ie_key(),
375 'id': video_id,
376 'url': video_id,
377 'title': title,
378 'description': description,
379 'duration': duration,
380 'view_count': view_count,
381 'uploader': uploader,
382 }
383
0c148415 384
360e1ca5 385class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 386 IE_DESC = 'YouTube.com'
bc2ca1bb 387 _INVIDIOUS_SITES = (
388 # invidious-redirect websites
389 r'(?:www\.)?redirect\.invidious\.io',
390 r'(?:(?:www|dev)\.)?invidio\.us',
391 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
392 r'(?:www\.)?invidious\.pussthecat\.org',
393 r'(?:www\.)?invidious\.048596\.xyz',
394 r'(?:www\.)?invidious\.zee\.li',
395 r'(?:www\.)?vid\.puffyan\.us',
396 r'(?:(?:www|au)\.)?ytprivate\.com',
397 r'(?:www\.)?invidious\.namazso\.eu',
398 r'(?:www\.)?invidious\.ethibox\.fr',
399 r'(?:www\.)?inv\.skyn3t\.in',
400 r'(?:www\.)?invidious\.himiko\.cloud',
401 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
402 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
403 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
404 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
405 # youtube-dl invidious instances list
406 r'(?:(?:www|no)\.)?invidiou\.sh',
407 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
408 r'(?:www\.)?invidious\.kabi\.tk',
409 r'(?:www\.)?invidious\.13ad\.de',
410 r'(?:www\.)?invidious\.mastodon\.host',
411 r'(?:www\.)?invidious\.zapashcanon\.fr',
412 r'(?:www\.)?invidious\.kavin\.rocks',
413 r'(?:www\.)?invidious\.tube',
414 r'(?:www\.)?invidiou\.site',
415 r'(?:www\.)?invidious\.site',
416 r'(?:www\.)?invidious\.xyz',
417 r'(?:www\.)?invidious\.nixnet\.xyz',
418 r'(?:www\.)?invidious\.drycat\.fr',
419 r'(?:www\.)?tube\.poal\.co',
420 r'(?:www\.)?tube\.connect\.cafe',
421 r'(?:www\.)?vid\.wxzm\.sx',
422 r'(?:www\.)?vid\.mint\.lgbt',
423 r'(?:www\.)?yewtu\.be',
424 r'(?:www\.)?yt\.elukerio\.org',
425 r'(?:www\.)?yt\.lelux\.fi',
426 r'(?:www\.)?invidious\.ggc-project\.de',
427 r'(?:www\.)?yt\.maisputain\.ovh',
428 r'(?:www\.)?invidious\.toot\.koeln',
429 r'(?:www\.)?invidious\.fdn\.fr',
430 r'(?:www\.)?watch\.nettohikari\.com',
431 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
432 r'(?:www\.)?qklhadlycap4cnod\.onion',
433 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
434 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
435 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
436 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
437 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
438 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
439 )
cb7dfeea 440 _VALID_URL = r"""(?x)^
c5e8d7af 441 (
edb53e2d 442 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 443 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
444 (?:www\.)?deturl\.com/www\.youtube\.com|
445 (?:www\.)?pwnyoutube\.com|
446 (?:www\.)?hooktube\.com|
447 (?:www\.)?yourepeat\.com|
448 tube\.majestyc\.net|
449 %(invidious)s|
450 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
451 (?:.*?\#/)? # handle anchor (#/) redirect urls
452 (?: # the various things that can precede the ID:
ac7553d0 453 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 454 |(?: # or the v= param in all its forms
f7000f3a 455 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 456 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 457 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
458 v=
459 )
f4b05232 460 ))
cbaed4bb
S
461 |(?:
462 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
463 vid\.plus| # or vid.plus/xxxx
464 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 465 %(invidious)s
cbaed4bb 466 )/
edb53e2d 467 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 468 )
c5e8d7af 469 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 470 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
471 (?!.*?\blist=
472 (?:
473 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
474 WL # WL are handled by the watch later IE
475 )
476 )
c5e8d7af 477 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 478 $""" % {
479 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
480 'invidious': '|'.join(_INVIDIOUS_SITES),
481 }
e40c758c 482 _PLAYER_INFO_RE = (
cc2db878 483 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
484 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 485 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 486 )
2c62dc26 487 _formats = {
c2d3cb4c 488 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
489 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
490 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
491 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
492 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
493 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
494 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
495 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 496 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 497 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
498 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
499 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
500 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
501 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
502 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 503 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 504 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
505 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 506
507
508 # 3D videos
c2d3cb4c 509 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
510 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
511 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
512 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 513 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
514 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
515 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 516
96fb5605 517 # Apple HTTP Live Streaming
11f12195 518 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 519 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
520 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
521 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
522 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
523 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 524 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
525 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
526
527 # DASH mp4 video
d23028a8
S
528 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
529 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
530 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
531 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
532 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 533 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
534 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
535 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
536 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
537 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
538 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
539 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 540
f6f1fc92 541 # Dash mp4 audio
d23028a8
S
542 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
543 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
544 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
545 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
546 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
547 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
548 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
549
550 # Dash webm
d23028a8
S
551 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
552 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
553 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
554 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
555 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
556 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
557 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
558 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
559 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
560 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
561 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
562 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
563 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
564 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
565 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 566 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
567 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
568 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
569 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
570 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
571 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
572 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
573
574 # Dash webm audio
d23028a8
S
575 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
576 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 577
0857baad 578 # Dash webm audio with opus inside
d23028a8
S
579 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
580 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
581 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 582
ce6b9a2d
PH
583 # RTMP (unnamed)
584 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
585
586 # av01 video only formats sometimes served with "unknown" codecs
587 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
588 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
589 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
590 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 591 }
29f7c58a 592 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 593
fd5c4aab
S
594 _GEO_BYPASS = False
595
78caa52a 596 IE_NAME = 'youtube'
2eb88d95
PH
597 _TESTS = [
598 {
2d3d2997 599 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
600 'info_dict': {
601 'id': 'BaW_jenozKc',
602 'ext': 'mp4',
3867038a 603 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
604 'uploader': 'Philipp Hagemeister',
605 'uploader_id': 'phihag',
ec85ded8 606 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
607 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
608 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 609 'upload_date': '20121002',
3867038a 610 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 611 'categories': ['Science & Technology'],
3867038a 612 'tags': ['youtube-dl'],
556dbe7f 613 'duration': 10,
dbdaaa23 614 'view_count': int,
3e7c1224
PH
615 'like_count': int,
616 'dislike_count': int,
7c80519c 617 'start_time': 1,
297a564b 618 'end_time': 9,
2eb88d95 619 }
0e853ca4 620 },
fccd3771 621 {
4bc3a23e
PH
622 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
623 'note': 'Embed-only video (#1746)',
624 'info_dict': {
625 'id': 'yZIXLfi8CZQ',
626 'ext': 'mp4',
627 'upload_date': '20120608',
628 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
629 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
630 'uploader': 'SET India',
94bfcd23 631 'uploader_id': 'setindia',
ec85ded8 632 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 633 'age_limit': 18,
545cc85d 634 },
635 'skip': 'Private video',
fccd3771 636 },
11b56058 637 {
8bdd16b4 638 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
639 'note': 'Use the first video ID in the URL',
640 'info_dict': {
641 'id': 'BaW_jenozKc',
642 'ext': 'mp4',
3867038a 643 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
644 'uploader': 'Philipp Hagemeister',
645 'uploader_id': 'phihag',
ec85ded8 646 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 647 'upload_date': '20121002',
3867038a 648 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 649 'categories': ['Science & Technology'],
3867038a 650 'tags': ['youtube-dl'],
556dbe7f 651 'duration': 10,
dbdaaa23 652 'view_count': int,
11b56058
PM
653 'like_count': int,
654 'dislike_count': int,
34a7de29
S
655 },
656 'params': {
657 'skip_download': True,
658 },
11b56058 659 },
dd27fd17 660 {
2d3d2997 661 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
662 'note': '256k DASH audio (format 141) via DASH manifest',
663 'info_dict': {
664 'id': 'a9LDPn-MO4I',
665 'ext': 'm4a',
666 'upload_date': '20121002',
667 'uploader_id': '8KVIDEO',
ec85ded8 668 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
669 'description': '',
670 'uploader': '8KVIDEO',
671 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 672 },
4bc3a23e
PH
673 'params': {
674 'youtube_include_dash_manifest': True,
675 'format': '141',
4919603f 676 },
de3c7fe0 677 'skip': 'format 141 not served anymore',
dd27fd17 678 },
8bdd16b4 679 # DASH manifest with encrypted signature
680 {
681 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
682 'info_dict': {
683 'id': 'IB3lcPjvWLA',
684 'ext': 'm4a',
685 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
686 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
687 'duration': 244,
688 'uploader': 'AfrojackVEVO',
689 'uploader_id': 'AfrojackVEVO',
690 'upload_date': '20131011',
cc2db878 691 'abr': 129.495,
8bdd16b4 692 },
693 'params': {
694 'youtube_include_dash_manifest': True,
695 'format': '141/bestaudio[ext=m4a]',
696 },
697 },
aa79ac0c
PH
698 # Controversy video
699 {
700 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
701 'info_dict': {
702 'id': 'T4XJQO3qol8',
703 'ext': 'mp4',
556dbe7f 704 'duration': 219,
aa79ac0c 705 'upload_date': '20100909',
4fe54c12 706 'uploader': 'Amazing Atheist',
aa79ac0c 707 'uploader_id': 'TheAmazingAtheist',
ec85ded8 708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 709 'title': 'Burning Everyone\'s Koran',
545cc85d 710 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 711 }
c522adb1 712 },
dd2d55f1 713 # Normal age-gate video (embed allowed)
c522adb1 714 {
2d3d2997 715 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
716 'info_dict': {
717 'id': 'HtVdAasjOgU',
718 'ext': 'mp4',
719 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 720 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 721 'duration': 142,
c522adb1
JMF
722 'uploader': 'The Witcher',
723 'uploader_id': 'WitcherGame',
ec85ded8 724 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 725 'upload_date': '20140605',
34952f09 726 'age_limit': 18,
c522adb1
JMF
727 },
728 },
8bdd16b4 729 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
730 # YouTube Red ad is not captured for creator
731 {
732 'url': '__2ABJjxzNo',
733 'info_dict': {
734 'id': '__2ABJjxzNo',
735 'ext': 'mp4',
736 'duration': 266,
737 'upload_date': '20100430',
738 'uploader_id': 'deadmau5',
739 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 740 'creator': 'deadmau5',
741 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 742 'uploader': 'deadmau5',
743 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 744 'alt_title': 'Some Chords',
8bdd16b4 745 },
746 'expected_warnings': [
747 'DASH manifest missing',
748 ]
749 },
067aa17e 750 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
751 {
752 'url': 'lqQg6PlCWgI',
753 'info_dict': {
754 'id': 'lqQg6PlCWgI',
755 'ext': 'mp4',
556dbe7f 756 'duration': 6085,
90227264 757 'upload_date': '20150827',
cbe2bd91 758 'uploader_id': 'olympic',
ec85ded8 759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 760 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 761 'uploader': 'Olympic',
cbe2bd91
PH
762 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
763 },
764 'params': {
765 'skip_download': 'requires avconv',
e52a40ab 766 }
cbe2bd91 767 },
6271f1ca
PH
768 # Non-square pixels
769 {
770 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
771 'info_dict': {
772 'id': '_b-2C3KPAM0',
773 'ext': 'mp4',
774 'stretched_ratio': 16 / 9.,
556dbe7f 775 'duration': 85,
6271f1ca
PH
776 'upload_date': '20110310',
777 'uploader_id': 'AllenMeow',
ec85ded8 778 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 779 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 780 'uploader': '孫ᄋᄅ',
6271f1ca
PH
781 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
782 },
06b491eb
S
783 },
784 # url_encoded_fmt_stream_map is empty string
785 {
786 'url': 'qEJwOuvDf7I',
787 'info_dict': {
788 'id': 'qEJwOuvDf7I',
f57b7835 789 'ext': 'webm',
06b491eb
S
790 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
791 'description': '',
792 'upload_date': '20150404',
793 'uploader_id': 'spbelect',
794 'uploader': 'Наблюдатели Петербурга',
795 },
796 'params': {
797 'skip_download': 'requires avconv',
e323cf3f
S
798 },
799 'skip': 'This live event has ended.',
06b491eb 800 },
067aa17e 801 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
802 {
803 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
804 'info_dict': {
805 'id': 'FIl7x6_3R5Y',
eb6793ba 806 'ext': 'webm',
da77d856
S
807 'title': 'md5:7b81415841e02ecd4313668cde88737a',
808 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 809 'duration': 220,
da77d856
S
810 'upload_date': '20150625',
811 'uploader_id': 'dorappi2000',
ec85ded8 812 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 813 'uploader': 'dorappi2000',
eb6793ba 814 'formats': 'mincount:31',
da77d856 815 },
eb6793ba 816 'skip': 'not actual anymore',
2ee8f5d8 817 },
8a1a26ce
YCH
818 # DASH manifest with segment_list
819 {
820 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
821 'md5': '8ce563a1d667b599d21064e982ab9e31',
822 'info_dict': {
823 'id': 'CsmdDsKjzN8',
824 'ext': 'mp4',
17ee98e1 825 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
826 'uploader': 'Airtek',
827 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
828 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
829 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
830 },
831 'params': {
832 'youtube_include_dash_manifest': True,
833 'format': '135', # bestvideo
be49068d
S
834 },
835 'skip': 'This live event has ended.',
2ee8f5d8 836 },
cf7e015f
S
837 {
838 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 839 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 840 'info_dict': {
545cc85d 841 'id': 'jvGDaLqkpTg',
842 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
843 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
844 },
845 'playlist': [{
846 'info_dict': {
545cc85d 847 'id': 'jvGDaLqkpTg',
cf7e015f 848 'ext': 'mp4',
545cc85d 849 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
850 'description': 'md5:e03b909557865076822aa169218d6a5d',
851 'duration': 10643,
852 'upload_date': '20161111',
853 'uploader': 'Team PGP',
854 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
855 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
856 },
857 }, {
858 'info_dict': {
545cc85d 859 'id': '3AKt1R1aDnw',
cf7e015f 860 'ext': 'mp4',
545cc85d 861 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
862 'description': 'md5:e03b909557865076822aa169218d6a5d',
863 'duration': 10991,
864 'upload_date': '20161111',
865 'uploader': 'Team PGP',
866 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
867 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
868 },
869 }, {
870 'info_dict': {
545cc85d 871 'id': 'RtAMM00gpVc',
cf7e015f 872 'ext': 'mp4',
545cc85d 873 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
874 'description': 'md5:e03b909557865076822aa169218d6a5d',
875 'duration': 10995,
876 'upload_date': '20161111',
877 'uploader': 'Team PGP',
878 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
879 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
880 },
881 }, {
882 'info_dict': {
545cc85d 883 'id': '6N2fdlP3C5U',
cf7e015f 884 'ext': 'mp4',
545cc85d 885 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
886 'description': 'md5:e03b909557865076822aa169218d6a5d',
887 'duration': 10990,
888 'upload_date': '20161111',
889 'uploader': 'Team PGP',
890 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
891 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
892 },
893 }],
894 'params': {
895 'skip_download': True,
896 },
cbaed4bb 897 },
f9f49d87 898 {
067aa17e 899 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
900 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
901 'info_dict': {
902 'id': 'gVfLd0zydlo',
903 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
904 },
905 'playlist_count': 2,
be49068d 906 'skip': 'Not multifeed anymore',
f9f49d87 907 },
cbaed4bb 908 {
2d3d2997 909 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 910 'only_matching': True,
0e49d9a6 911 },
6d4fc66b 912 {
2d3d2997 913 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
914 'only_matching': True,
915 },
0e49d9a6 916 {
067aa17e 917 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 918 # Also tests cut-off URL expansion in video description (see
067aa17e
S
919 # https://github.com/ytdl-org/youtube-dl/issues/1892,
920 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
921 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
922 'info_dict': {
923 'id': 'lsguqyKfVQg',
924 'ext': 'mp4',
925 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 926 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 927 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 928 'duration': 133,
0e49d9a6
LL
929 'upload_date': '20151119',
930 'uploader_id': 'IronSoulElf',
ec85ded8 931 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 932 'uploader': 'IronSoulElf',
eb6793ba
S
933 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
934 'track': 'Dark Walk - Position Music',
935 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 936 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
937 },
938 'params': {
939 'skip_download': True,
940 },
941 },
61f92af1 942 {
067aa17e 943 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
944 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
945 'only_matching': True,
946 },
313dfc45
LL
947 {
948 # Video with yt:stretch=17:0
949 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
950 'info_dict': {
951 'id': 'Q39EVAstoRM',
952 'ext': 'mp4',
953 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
954 'description': 'md5:ee18a25c350637c8faff806845bddee9',
955 'upload_date': '20151107',
956 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
957 'uploader': 'CH GAMER DROID',
958 },
959 'params': {
960 'skip_download': True,
961 },
be49068d 962 'skip': 'This video does not exist.',
313dfc45 963 },
7caf9830
S
964 {
965 # Video licensed under Creative Commons
966 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
967 'info_dict': {
968 'id': 'M4gD1WSo5mA',
969 'ext': 'mp4',
970 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
971 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 972 'duration': 721,
7caf9830
S
973 'upload_date': '20150127',
974 'uploader_id': 'BerkmanCenter',
ec85ded8 975 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 976 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
977 'license': 'Creative Commons Attribution license (reuse allowed)',
978 },
979 'params': {
980 'skip_download': True,
981 },
982 },
fd050249
S
983 {
984 # Channel-like uploader_url
985 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
986 'info_dict': {
987 'id': 'eQcmzGIKrzg',
988 'ext': 'mp4',
989 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 990 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 991 'duration': 4060,
fd050249 992 'upload_date': '20151119',
eb6793ba 993 'uploader': 'Bernie Sanders',
fd050249 994 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
996 'license': 'Creative Commons Attribution license (reuse allowed)',
997 },
998 'params': {
999 'skip_download': True,
1000 },
1001 },
040ac686
S
1002 {
1003 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1004 'only_matching': True,
7f29cf54
S
1005 },
1006 {
067aa17e 1007 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1008 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1009 'only_matching': True,
6496ccb4
S
1010 },
1011 {
1012 # Rental video preview
1013 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1014 'info_dict': {
1015 'id': 'uGpuVWrhIzE',
1016 'ext': 'mp4',
1017 'title': 'Piku - Trailer',
1018 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1019 'upload_date': '20150811',
1020 'uploader': 'FlixMatrix',
1021 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1023 'license': 'Standard YouTube License',
1024 },
1025 'params': {
1026 'skip_download': True,
1027 },
eb6793ba 1028 'skip': 'This video is not available.',
022a5d66 1029 },
12afdc2a
S
1030 {
1031 # YouTube Red video with episode data
1032 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1033 'info_dict': {
1034 'id': 'iqKdEhx-dD4',
1035 'ext': 'mp4',
1036 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1037 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1038 'duration': 2085,
12afdc2a
S
1039 'upload_date': '20170118',
1040 'uploader': 'Vsauce',
1041 'uploader_id': 'Vsauce',
1042 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1043 'series': 'Mind Field',
1044 'season_number': 1,
1045 'episode_number': 1,
1046 },
1047 'params': {
1048 'skip_download': True,
1049 },
1050 'expected_warnings': [
1051 'Skipping DASH manifest',
1052 ],
1053 },
c7121fa7
S
1054 {
1055 # The following content has been identified by the YouTube community
1056 # as inappropriate or offensive to some audiences.
1057 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1058 'info_dict': {
1059 'id': '6SJNVb0GnPI',
1060 'ext': 'mp4',
1061 'title': 'Race Differences in Intelligence',
1062 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1063 'duration': 965,
1064 'upload_date': '20140124',
1065 'uploader': 'New Century Foundation',
1066 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1068 },
1069 'params': {
1070 'skip_download': True,
1071 },
545cc85d 1072 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1073 },
022a5d66
S
1074 {
1075 # itag 212
1076 'url': '1t24XAntNCY',
1077 'only_matching': True,
fd5c4aab
S
1078 },
1079 {
1080 # geo restricted to JP
1081 'url': 'sJL6WA-aGkQ',
1082 'only_matching': True,
1083 },
cd5a74a2
S
1084 {
1085 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1086 'only_matching': True,
1087 },
bc2ca1bb 1088 {
1089 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1090 'only_matching': True,
1091 },
1092 {
1093 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1094 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1095 'only_matching': True,
1096 },
825cd268
RA
1097 {
1098 # DRM protected
1099 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1100 'only_matching': True,
4fe54c12
S
1101 },
1102 {
1103 # Video with unsupported adaptive stream type formats
1104 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1105 'info_dict': {
1106 'id': 'Z4Vy8R84T1U',
1107 'ext': 'mp4',
1108 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1109 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1110 'duration': 433,
1111 'upload_date': '20130923',
1112 'uploader': 'Amelia Putri Harwita',
1113 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1115 'formats': 'maxcount:10',
1116 },
1117 'params': {
1118 'skip_download': True,
1119 'youtube_include_dash_manifest': False,
1120 },
5429d6a9 1121 'skip': 'not actual anymore',
5caabd3c 1122 },
1123 {
822b9d9c 1124 # Youtube Music Auto-generated description
5caabd3c 1125 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1126 'info_dict': {
1127 'id': 'MgNrAu2pzNs',
1128 'ext': 'mp4',
1129 'title': 'Voyeur Girl',
1130 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1131 'upload_date': '20190312',
5429d6a9
S
1132 'uploader': 'Stephen - Topic',
1133 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1134 'artist': 'Stephen',
1135 'track': 'Voyeur Girl',
1136 'album': 'it\'s too much love to know my dear',
1137 'release_date': '20190313',
1138 'release_year': 2019,
1139 },
1140 'params': {
1141 'skip_download': True,
1142 },
1143 },
66b48727
RA
1144 {
1145 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1146 'only_matching': True,
1147 },
011e75e6
S
1148 {
1149 # invalid -> valid video id redirection
1150 'url': 'DJztXj2GPfl',
1151 'info_dict': {
1152 'id': 'DJztXj2GPfk',
1153 'ext': 'mp4',
1154 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1155 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1156 'upload_date': '20090125',
1157 'uploader': 'Prochorowka',
1158 'uploader_id': 'Prochorowka',
1159 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1160 'artist': 'Panjabi MC',
1161 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1162 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1163 },
1164 'params': {
1165 'skip_download': True,
1166 },
545cc85d 1167 'skip': 'Video unavailable',
ea74e00b
DP
1168 },
1169 {
1170 # empty description results in an empty string
1171 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1172 'info_dict': {
1173 'id': 'x41yOUIvK2k',
1174 'ext': 'mp4',
1175 'title': 'IMG 3456',
1176 'description': '',
1177 'upload_date': '20170613',
1178 'uploader_id': 'ElevageOrVert',
1179 'uploader': 'ElevageOrVert',
1180 },
1181 'params': {
1182 'skip_download': True,
1183 },
1184 },
a0566bbf 1185 {
29f7c58a 1186 # with '};' inside yt initial data (see [1])
1187 # see [2] for an example with '};' inside ytInitialPlayerResponse
1188 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1189 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1190 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1191 'info_dict': {
1192 'id': 'CHqg6qOn4no',
1193 'ext': 'mp4',
1194 'title': 'Part 77 Sort a list of simple types in c#',
1195 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1196 'upload_date': '20130831',
1197 'uploader_id': 'kudvenkat',
1198 'uploader': 'kudvenkat',
1199 },
1200 'params': {
1201 'skip_download': True,
1202 },
1203 },
29f7c58a 1204 {
1205 # another example of '};' in ytInitialData
1206 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1207 'only_matching': True,
1208 },
1209 {
1210 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1211 'only_matching': True,
1212 },
545cc85d 1213 {
cc2db878 1214 # https://github.com/ytdl-org/youtube-dl/pull/28094
1215 'url': 'OtqTfy26tG0',
1216 'info_dict': {
1217 'id': 'OtqTfy26tG0',
1218 'ext': 'mp4',
1219 'title': 'Burn Out',
1220 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1221 'upload_date': '20141120',
1222 'uploader': 'The Cinematic Orchestra - Topic',
1223 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1224 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1225 'artist': 'The Cinematic Orchestra',
1226 'track': 'Burn Out',
1227 'album': 'Every Day',
1228 'release_data': None,
1229 'release_year': None,
1230 },
1231 'params': {
1232 'skip_download': True,
1233 },
545cc85d 1234 },
bc2ca1bb 1235 {
1236 # controversial video, only works with bpctr when authenticated with cookies
1237 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1238 'only_matching': True,
1239 },
2eb88d95
PH
1240 ]
1241
e0df6211
PH
1242 def __init__(self, *args, **kwargs):
1243 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1244 self._code_cache = {}
83799698 1245 self._player_cache = {}
e0df6211 1246
60064c53
PH
1247 def _signature_cache_id(self, example_sig):
1248 """ Return a string representation of a signature """
78caa52a 1249 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1250
e40c758c
S
1251 @classmethod
1252 def _extract_player_info(cls, player_url):
1253 for player_re in cls._PLAYER_INFO_RE:
1254 id_m = re.search(player_re, player_url)
1255 if id_m:
1256 break
1257 else:
c081b35c 1258 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1259 return id_m.group('id')
e40c758c
S
1260
1261 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1262 player_id = self._extract_player_info(player_url)
e0df6211 1263
c4417ddb 1264 # Read from filesystem cache
545cc85d 1265 func_id = 'js_%s_%s' % (
1266 player_id, self._signature_cache_id(example_sig))
c4417ddb 1267 assert os.path.basename(func_id) == func_id
a0e07d31 1268
69ea8ca4 1269 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1270 if cache_spec is not None:
78caa52a 1271 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1272
545cc85d 1273 if player_id not in self._code_cache:
1274 self._code_cache[player_id] = self._download_webpage(
e0df6211 1275 player_url, video_id,
545cc85d 1276 note='Downloading player ' + player_id,
69ea8ca4 1277 errnote='Download of %s failed' % player_url)
545cc85d 1278 code = self._code_cache[player_id]
1279 res = self._parse_sig_js(code)
e0df6211 1280
785521bf
PH
1281 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1282 cache_res = res(test_string)
1283 cache_spec = [ord(c) for c in cache_res]
83799698 1284
69ea8ca4 1285 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1286 return res
1287
60064c53 1288 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1289 def gen_sig_code(idxs):
1290 def _genslice(start, end, step):
78caa52a 1291 starts = '' if start == 0 else str(start)
8bcc8756 1292 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1293 steps = '' if step == 1 else (':%d' % step)
78caa52a 1294 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1295
1296 step = None
7af808a5
PH
1297 # Quelch pyflakes warnings - start will be set when step is set
1298 start = '(Never used)'
edf3e38e
PH
1299 for i, prev in zip(idxs[1:], idxs[:-1]):
1300 if step is not None:
1301 if i - prev == step:
1302 continue
1303 yield _genslice(start, prev, step)
1304 step = None
1305 continue
1306 if i - prev in [-1, 1]:
1307 step = i - prev
1308 start = prev
1309 continue
1310 else:
78caa52a 1311 yield 's[%d]' % prev
edf3e38e 1312 if step is None:
78caa52a 1313 yield 's[%d]' % i
edf3e38e
PH
1314 else:
1315 yield _genslice(start, i, step)
1316
78caa52a 1317 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1318 cache_res = func(test_string)
edf3e38e 1319 cache_spec = [ord(c) for c in cache_res]
78caa52a 1320 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1321 signature_id_tuple = '(%s)' % (
1322 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1323 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1324 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1325 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1326
e0df6211
PH
1327 def _parse_sig_js(self, jscode):
1328 funcname = self._search_regex(
abefc03f
S
1329 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1330 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1331 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1332 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1333 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1334 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1335 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1336 # Obsolete patterns
1337 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1338 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1339 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1343 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1344 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1345 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1346
1347 jsi = JSInterpreter(jscode)
1348 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1349 return lambda s: initial_function([s])
1350
545cc85d 1351 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1352 """Turn the encrypted s field into a working signature"""
6b37f0be 1353
c8bf86d5 1354 if player_url is None:
69ea8ca4 1355 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1356
69ea8ca4 1357 if player_url.startswith('//'):
78caa52a 1358 player_url = 'https:' + player_url
3c90cc8b
S
1359 elif not re.match(r'https?://', player_url):
1360 player_url = compat_urlparse.urljoin(
1361 'https://www.youtube.com', player_url)
c8bf86d5 1362 try:
62af3a0e 1363 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1364 if player_id not in self._player_cache:
1365 func = self._extract_signature_function(
60064c53 1366 video_id, player_url, s
c8bf86d5
PH
1367 )
1368 self._player_cache[player_id] = func
1369 func = self._player_cache[player_id]
1370 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1371 self._print_sig_code(func, s)
c8bf86d5
PH
1372 return func(s)
1373 except Exception as e:
1374 tb = traceback.format_exc()
1375 raise ExtractorError(
78caa52a 1376 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1377
545cc85d 1378 def _mark_watched(self, video_id, player_response):
21c340b8
S
1379 playback_url = url_or_none(try_get(
1380 player_response,
545cc85d 1381 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1382 if not playback_url:
1383 return
1384 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1385 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1386
1387 # cpn generation algorithm is reverse engineered from base.js.
1388 # In fact it works even with dummy cpn.
1389 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1390 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1391
1392 qs.update({
1393 'ver': ['2'],
1394 'cpn': [cpn],
1395 })
1396 playback_url = compat_urlparse.urlunparse(
15707c7e 1397 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1398
1399 self._download_webpage(
1400 playback_url, video_id, 'Marking watched',
1401 'Unable to mark watched', fatal=False)
1402
66c9fa36
S
1403 @staticmethod
1404 def _extract_urls(webpage):
1405 # Embedded YouTube player
1406 entries = [
1407 unescapeHTML(mobj.group('url'))
1408 for mobj in re.finditer(r'''(?x)
1409 (?:
1410 <iframe[^>]+?src=|
1411 data-video-url=|
1412 <embed[^>]+?src=|
1413 embedSWF\(?:\s*|
1414 <object[^>]+data=|
1415 new\s+SWFObject\(
1416 )
1417 (["\'])
1418 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1419 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1420 \1''', webpage)]
1421
1422 # lazyYT YouTube embed
1423 entries.extend(list(map(
1424 unescapeHTML,
1425 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1426
1427 # Wordpress "YouTube Video Importer" plugin
1428 matches = re.findall(r'''(?x)<div[^>]+
1429 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1430 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1431 entries.extend(m[-1] for m in matches)
1432
1433 return entries
1434
1435 @staticmethod
1436 def _extract_url(webpage):
1437 urls = YoutubeIE._extract_urls(webpage)
1438 return urls[0] if urls else None
1439
97665381
PH
1440 @classmethod
1441 def extract_id(cls, url):
1442 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1443 if mobj is None:
69ea8ca4 1444 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1445 video_id = mobj.group(2)
1446 return video_id
1447
545cc85d 1448 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1449 chapters_list = try_get(
8bdd16b4 1450 data,
84213ea8
S
1451 lambda x: x['playerOverlays']
1452 ['playerOverlayRenderer']
1453 ['decoratedPlayerBarRenderer']
1454 ['decoratedPlayerBarRenderer']
1455 ['playerBar']
1456 ['chapteredPlayerBarRenderer']
1457 ['chapters'],
1458 list)
1459 if not chapters_list:
1460 return
1461
1462 def chapter_time(chapter):
1463 return float_or_none(
1464 try_get(
1465 chapter,
1466 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1467 int),
1468 scale=1000)
1469 chapters = []
1470 for next_num, chapter in enumerate(chapters_list, start=1):
1471 start_time = chapter_time(chapter)
1472 if start_time is None:
1473 continue
1474 end_time = (chapter_time(chapters_list[next_num])
1475 if next_num < len(chapters_list) else duration)
1476 if end_time is None:
1477 continue
1478 title = try_get(
1479 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1480 compat_str)
1481 chapters.append({
1482 'start_time': start_time,
1483 'end_time': end_time,
1484 'title': title,
1485 })
1486 return chapters
1487
545cc85d 1488 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1489 return self._parse_json(self._search_regex(
1490 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1491 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1492
a1c5d2ca
M
1493 @staticmethod
1494 def _join_text_entries(runs):
1495 text = None
1496 for run in runs:
1497 if not isinstance(run, dict):
1498 continue
1499 sub_text = try_get(run, lambda x: x['text'], compat_str)
1500 if sub_text:
1501 if not text:
1502 text = sub_text
1503 continue
1504 text += sub_text
1505 return text
1506
1507 def _extract_comment(self, comment_renderer, parent=None):
1508 comment_id = comment_renderer.get('commentId')
1509 if not comment_id:
1510 return
1511 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1512 text = self._join_text_entries(comment_text_runs) or ''
1513 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1514 time_text = self._join_text_entries(comment_time_text)
1515
1516 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1517 author_id = try_get(comment_renderer,
1518 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1519 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1520 lambda x: x['likeCount']), compat_str)) or 0
1521 author_thumbnail = try_get(comment_renderer,
1522 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1523
1524 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1525 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
1526
1527 return {
1528 'id': comment_id,
1529 'text': text,
1530 # TODO: This should be parsed to timestamp
1531 'time_text': time_text,
1532 'like_count': votes,
1533 'is_favorited': is_liked,
1534 'author': author,
1535 'author_id': author_id,
1536 'author_thumbnail': author_thumbnail,
1537 'author_is_uploader': author_is_uploader,
1538 'parent': parent or 'root'
1539 }
1540
1541 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
1542 session_token_list, parent=None, comment_counts=None):
1543
1544 def extract_thread(parent_renderer):
1545 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1546 if not parent:
1547 comment_counts[2] = 0
1548 for content in contents:
1549 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1550 comment_renderer = try_get(
1551 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1552 content, (lambda x: x['commentRenderer'], dict))
1553
1554 if not comment_renderer:
1555 continue
1556 comment = self._extract_comment(comment_renderer, parent)
1557 if not comment:
1558 continue
1559 comment_counts[0] += 1
1560 yield comment
1561 # Attempt to get the replies
1562 comment_replies_renderer = try_get(
1563 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1564
1565 if comment_replies_renderer:
1566 comment_counts[2] += 1
1567 comment_entries_iter = self._comment_entries(
1568 comment_replies_renderer, identity_token, account_syncid,
1569 parent=comment.get('id'), session_token_list=session_token_list,
1570 comment_counts=comment_counts)
1571
1572 for reply_comment in comment_entries_iter:
1573 yield reply_comment
1574
1575 if not comment_counts:
1576 # comment so far, est. total comments, current comment thread #
1577 comment_counts = [0, 0, 0]
1578 headers = self._DEFAULT_BASIC_API_HEADERS.copy()
1579
1580 # TODO: Generalize the download code with TabIE
1581 if identity_token:
1582 headers['x-youtube-identity-token'] = identity_token
1583
1584 if account_syncid:
1585 headers['X-Goog-PageId'] = account_syncid
1586 headers['X-Goog-AuthUser'] = 0
1587
1588 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1589 first_continuation = False
1590 if parent is None:
1591 first_continuation = True
1592
1593 for page_num in itertools.count(0):
1594 if not continuation:
1595 break
1596 retries = self._downloader.params.get('extractor_retries', 3)
1597 count = -1
1598 last_error = None
1599
1600 while count < retries:
1601 count += 1
1602 if last_error:
1603 self.report_warning('%s. Retrying ...' % last_error)
1604 try:
1605 query = {
1606 'ctoken': continuation['ctoken'],
1607 'pbj': 1,
1608 'type': 'next',
1609 }
1610 if parent:
1611 query['action_get_comment_replies'] = 1
1612 else:
1613 query['action_get_comments'] = 1
1614
1615 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1616 if page_num == 0:
1617 if first_continuation:
1618 note_prefix = "Downloading initial comment continuation page"
1619 else:
1620 note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
1621 else:
1622 note_prefix = "%sDownloading comment%s page %d %s" % (
1623 " " if parent else "",
1624 ' replies' if parent else '',
1625 page_num,
1626 comment_prog_str)
1627
1628 browse = self._download_json(
1629 'https://www.youtube.com/comment_service_ajax', None,
1630 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1631 headers=headers, query=query,
1632 data=urlencode_postdata({
1633 'session_token': session_token_list[0]
1634 }))
1635 except ExtractorError as e:
1636 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1637 if e.cause.code == 413:
1638 self.report_warning("Assumed end of comments (received HTTP Error 413)")
1639 return
1640 # Downloading page may result in intermittent 5xx HTTP error
1641 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1642 last_error = 'HTTP Error %s' % e.cause.code
1643 if e.cause.code == 404:
1644 last_error = last_error + " (this API is probably deprecated)"
1645 if count < retries:
1646 continue
1647 raise
1648 else:
1649 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1650 if session_token:
1651 session_token_list[0] = session_token
1652
1653 response = try_get(browse,
1654 (lambda x: x['response'],
1655 lambda x: x[1]['response'])) or {}
1656
1657 if response.get('continuationContents'):
1658 break
1659
1660 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1661 if browse.get('reload'):
1662 raise ExtractorError("Invalid or missing params in continuation request", expected=False)
1663
1664 # TODO: not tested, merged from old extractor
1665 err_msg = browse.get('externalErrorMessage')
1666 if err_msg:
1667 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1668
1669 # Youtube sometimes sends incomplete data
1670 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1671 last_error = 'Incomplete data received'
1672 if count >= retries:
1673 self._downloader.report_error(last_error)
1674
1675 if not response:
1676 break
1677
1678 known_continuation_renderers = {
1679 'itemSectionContinuation': extract_thread,
1680 'commentRepliesContinuation': extract_thread
1681 }
1682
1683 # extract next root continuation from the results
1684 continuation_contents = try_get(
1685 response, lambda x: x['continuationContents'], dict) or {}
1686
1687 for key, value in continuation_contents.items():
1688 if key not in known_continuation_renderers:
1689 continue
1690 continuation_renderer = value
1691
1692 if first_continuation:
1693 first_continuation = False
1694 expected_comment_count = try_get(
1695 continuation_renderer,
1696 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1697 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1698 compat_str)
1699
1700 if expected_comment_count:
1701 comment_counts[1] = str_to_int(expected_comment_count)
1702 self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
1703 yield comment_counts[1]
1704
1705 # TODO: cli arg.
1706 # 1/True for newest, 0/False for popular (default)
1707 comment_sort_index = int(True)
1708 sort_continuation_renderer = try_get(
1709 continuation_renderer,
1710 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1711 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1712 # If this fails, the initial continuation page
1713 # starts off with popular anyways.
1714 if sort_continuation_renderer:
1715 continuation = YoutubeTabIE._build_continuation_query(
1716 continuation=sort_continuation_renderer.get('continuation'),
1717 ctp=sort_continuation_renderer.get('clickTrackingParams'))
1718 self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
1719 break
1720
1721 for entry in known_continuation_renderers[key](continuation_renderer):
1722 yield entry
1723
1724 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1725 break
1726
1727 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1728 """Entry for comment extraction"""
1729 comments = []
1730 known_entry_comment_renderers = (
1731 'itemSectionRenderer',
1732 )
1733 estimated_total = 0
1734 for entry in contents:
1735 for key, renderer in entry.items():
1736 if key not in known_entry_comment_renderers:
1737 continue
1738
1739 comment_iter = self._comment_entries(
1740 renderer,
1741 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1742 account_syncid=self._extract_account_syncid(ytcfg),
1743 session_token_list=[xsrf_token])
1744
1745 for comment in comment_iter:
1746 if isinstance(comment, int):
1747 estimated_total = comment
1748 continue
1749 comments.append(comment)
1750 break
1751 self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
1752 return {
1753 'comments': comments,
1754 'comment_count': len(comments),
1755 }
1756
c5e8d7af 1757 def _real_extract(self, url):
cf7e015f 1758 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1759 video_id = self._match_id(url)
1760 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1761 webpage_url = base_url + 'watch?v=' + video_id
1762 webpage = self._download_webpage(
1763 webpage_url + '&has_verified=1&bpctr=9999999999',
1764 video_id, fatal=False)
545cc85d 1765
1766 player_response = None
1767 if webpage:
1768 player_response = self._extract_yt_initial_variable(
1769 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1770 video_id, 'initial player response')
1771 if not player_response:
1772 player_response = self._call_api(
1773 'player', {'videoId': video_id}, video_id)
1774
1775 playability_status = player_response.get('playabilityStatus') or {}
1776 if playability_status.get('reason') == 'Sign in to confirm your age':
1777 pr = self._parse_json(try_get(compat_parse_qs(
1778 self._download_webpage(
1779 base_url + 'get_video_info', video_id,
1780 'Refetching age-gated info webpage',
1781 'unable to download video info webpage', query={
1782 'video_id': video_id,
7c60c33e 1783 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1784 }, fatal=False)),
1785 lambda x: x['player_response'][0],
1786 compat_str) or '{}', video_id)
1787 if pr:
1788 player_response = pr
1789
1790 trailer_video_id = try_get(
1791 playability_status,
1792 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1793 compat_str)
1794 if trailer_video_id:
1795 return self.url_result(
1796 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1797
545cc85d 1798 def get_text(x):
1799 if not x:
c2d125d9 1800 return
545cc85d 1801 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1802
545cc85d 1803 search_meta = (
1804 lambda x: self._html_search_meta(x, webpage, default=None)) \
1805 if webpage else lambda x: None
dbdaaa23 1806
545cc85d 1807 video_details = player_response.get('videoDetails') or {}
37357d21 1808 microformat = try_get(
545cc85d 1809 player_response,
1810 lambda x: x['microformat']['playerMicroformatRenderer'],
1811 dict) or {}
1812 video_title = video_details.get('title') \
1813 or get_text(microformat.get('title')) \
1814 or search_meta(['og:title', 'twitter:title', 'title'])
1815 video_description = video_details.get('shortDescription')
cf7e015f 1816
8fe10494 1817 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1818 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1819 multifeed_metadata_list = try_get(
1820 player_response,
1821 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1822 compat_str)
8fe10494
S
1823 if multifeed_metadata_list:
1824 entries = []
1825 feed_ids = []
1826 for feed in multifeed_metadata_list.split(','):
1827 # Unquote should take place before split on comma (,) since textual
1828 # fields may contain comma as well (see
067aa17e 1829 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1830 feed_data = compat_parse_qs(
1831 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1832
1833 def feed_entry(name):
545cc85d 1834 return try_get(
1835 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1836
1837 feed_id = feed_entry('id')
1838 if not feed_id:
1839 continue
1840 feed_title = feed_entry('title')
1841 title = video_title
1842 if feed_title:
1843 title += ' (%s)' % feed_title
8fe10494
S
1844 entries.append({
1845 '_type': 'url_transparent',
1846 'ie_key': 'Youtube',
1847 'url': smuggle_url(
545cc85d 1848 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1849 {'force_singlefeed': True}),
6b09401b 1850 'title': title,
8fe10494 1851 })
6b09401b 1852 feed_ids.append(feed_id)
8fe10494
S
1853 self.to_screen(
1854 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1855 % (', '.join(feed_ids), video_id))
545cc85d 1856 return self.playlist_result(
1857 entries, video_id, video_title, video_description)
8fe10494
S
1858 else:
1859 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1860
545cc85d 1861 formats = []
1862 itags = []
cc2db878 1863 itag_qualities = {}
545cc85d 1864 player_url = None
dca3ff4a 1865 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1866 streaming_data = player_response.get('streamingData') or {}
1867 streaming_formats = streaming_data.get('formats') or []
1868 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1869 for fmt in streaming_formats:
1870 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1871 continue
321bf820 1872
cc2db878 1873 itag = str_or_none(fmt.get('itag'))
1874 quality = fmt.get('quality')
1875 if itag and quality:
1876 itag_qualities[itag] = quality
1877 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1878 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1879 # number of fragment that would subsequently requested with (`&sq=N`)
1880 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1881 continue
1882
545cc85d 1883 fmt_url = fmt.get('url')
1884 if not fmt_url:
1885 sc = compat_parse_qs(fmt.get('signatureCipher'))
1886 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1887 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1888 if not (sc and fmt_url and encrypted_sig):
1889 continue
1890 if not player_url:
1891 if not webpage:
1892 continue
1893 player_url = self._search_regex(
1894 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1895 webpage, 'player URL', fatal=False)
1896 if not player_url:
201e9eaa 1897 continue
545cc85d 1898 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1899 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1900 fmt_url += '&' + sp + '=' + signature
1901
545cc85d 1902 if itag:
1903 itags.append(itag)
cc2db878 1904 tbr = float_or_none(
1905 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1906 dct = {
1907 'asr': int_or_none(fmt.get('audioSampleRate')),
1908 'filesize': int_or_none(fmt.get('contentLength')),
1909 'format_id': itag,
1910 'format_note': fmt.get('qualityLabel') or quality,
1911 'fps': int_or_none(fmt.get('fps')),
1912 'height': int_or_none(fmt.get('height')),
dca3ff4a 1913 'quality': q(quality),
cc2db878 1914 'tbr': tbr,
545cc85d 1915 'url': fmt_url,
1916 'width': fmt.get('width'),
1917 }
1918 mimetype = fmt.get('mimeType')
1919 if mimetype:
1920 mobj = re.match(
1921 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1922 if mobj:
1923 dct['ext'] = mimetype2ext(mobj.group(1))
1924 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1925 no_audio = dct.get('acodec') == 'none'
1926 no_video = dct.get('vcodec') == 'none'
1927 if no_audio:
1928 dct['vbr'] = tbr
1929 if no_video:
1930 dct['abr'] = tbr
1931 if no_audio or no_video:
545cc85d 1932 dct['downloader_options'] = {
1933 # Youtube throttles chunks >~10M
1934 'http_chunk_size': 10485760,
bf1317d2 1935 }
7c60c33e 1936 if dct.get('ext'):
1937 dct['container'] = dct['ext'] + '_dash'
545cc85d 1938 formats.append(dct)
1939
1940 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1941 if hls_manifest_url:
1942 for f in self._extract_m3u8_formats(
1943 hls_manifest_url, video_id, 'mp4', fatal=False):
1944 itag = self._search_regex(
1945 r'/itag/(\d+)', f['url'], 'itag', default=None)
1946 if itag:
1947 f['format_id'] = itag
1948 formats.append(f)
1949
1418a043 1950 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 1951 dash_manifest_url = streaming_data.get('dashManifestUrl')
1952 if dash_manifest_url:
545cc85d 1953 for f in self._extract_mpd_formats(
1954 dash_manifest_url, video_id, fatal=False):
cc2db878 1955 itag = f['format_id']
1956 if itag in itags:
1957 continue
dca3ff4a 1958 if itag in itag_qualities:
1959 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
1960 # but kept to maintain feature parity (and code similarity) with youtube-dl
1961 # Remove if this causes any issues with sorting in future
1962 f['quality'] = q(itag_qualities[itag])
545cc85d 1963 filesize = int_or_none(self._search_regex(
1964 r'/clen/(\d+)', f.get('fragment_base_url')
1965 or f['url'], 'file size', default=None))
1966 if filesize:
1967 f['filesize'] = filesize
cc2db878 1968 formats.append(f)
bf1317d2 1969
545cc85d 1970 if not formats:
63ad4d43 1971 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1972 raise ExtractorError(
1973 'This video is DRM protected.', expected=True)
1974 pemr = try_get(
1975 playability_status,
1976 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1977 dict) or {}
1978 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1979 subreason = pemr.get('subreason')
1980 if subreason:
1981 subreason = clean_html(get_text(subreason))
1982 if subreason == 'The uploader has not made this video available in your country.':
1983 countries = microformat.get('availableCountries')
1984 if not countries:
1985 regions_allowed = search_meta('regionsAllowed')
1986 countries = regions_allowed.split(',') if regions_allowed else None
1987 self.raise_geo_restricted(
1988 subreason, countries)
1989 reason += '\n' + subreason
1990 if reason:
1991 raise ExtractorError(reason, expected=True)
bf1317d2 1992
545cc85d 1993 self._sort_formats(formats)
bf1317d2 1994
545cc85d 1995 keywords = video_details.get('keywords') or []
1996 if not keywords and webpage:
1997 keywords = [
1998 unescapeHTML(m.group('content'))
1999 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2000 for keyword in keywords:
2001 if keyword.startswith('yt:stretch='):
2002 w, h = keyword.split('=')[1].split(':')
2003 w, h = int(w), int(h)
2004 if w > 0 and h > 0:
2005 ratio = w / h
2006 for f in formats:
2007 if f.get('vcodec') != 'none':
2008 f['stretched_ratio'] = ratio
6449cd80 2009
545cc85d 2010 thumbnails = []
2011 for container in (video_details, microformat):
2012 for thumbnail in (try_get(
2013 container,
2014 lambda x: x['thumbnail']['thumbnails'], list) or []):
2015 thumbnail_url = thumbnail.get('url')
2016 if not thumbnail_url:
bf1317d2 2017 continue
545cc85d 2018 thumbnails.append({
2019 'height': int_or_none(thumbnail.get('height')),
2020 'url': thumbnail_url,
2021 'width': int_or_none(thumbnail.get('width')),
2022 })
2023 if thumbnails:
2024 break
a6211d23 2025 else:
545cc85d 2026 thumbnail = search_meta(['og:image', 'twitter:image'])
2027 if thumbnail:
2028 thumbnails = [{'url': thumbnail}]
2029
2030 category = microformat.get('category') or search_meta('genre')
2031 channel_id = video_details.get('channelId') \
2032 or microformat.get('externalChannelId') \
2033 or search_meta('channelId')
2034 duration = int_or_none(
2035 video_details.get('lengthSeconds')
2036 or microformat.get('lengthSeconds')) \
2037 or parse_duration(search_meta('duration'))
2038 is_live = video_details.get('isLive')
2039 owner_profile_url = microformat.get('ownerProfileUrl')
2040
2041 info = {
2042 'id': video_id,
2043 'title': self._live_title(video_title) if is_live else video_title,
2044 'formats': formats,
2045 'thumbnails': thumbnails,
2046 'description': video_description,
2047 'upload_date': unified_strdate(
2048 microformat.get('uploadDate')
2049 or search_meta('uploadDate')),
2050 'uploader': video_details['author'],
2051 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2052 'uploader_url': owner_profile_url,
2053 'channel_id': channel_id,
2054 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2055 'duration': duration,
2056 'view_count': int_or_none(
2057 video_details.get('viewCount')
2058 or microformat.get('viewCount')
2059 or search_meta('interactionCount')),
2060 'average_rating': float_or_none(video_details.get('averageRating')),
2061 'age_limit': 18 if (
2062 microformat.get('isFamilySafe') is False
2063 or search_meta('isFamilyFriendly') == 'false'
2064 or search_meta('og:restrictions:age') == '18+') else 0,
2065 'webpage_url': webpage_url,
2066 'categories': [category] if category else None,
2067 'tags': keywords,
2068 'is_live': is_live,
2069 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2070 'was_live': video_details.get('isLiveContent'),
545cc85d 2071 }
b477fc13 2072
545cc85d 2073 pctr = try_get(
2074 player_response,
2075 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2076 subtitles = {}
2077 if pctr:
2078 def process_language(container, base_url, lang_code, query):
2079 lang_subs = []
2080 for fmt in self._SUBTITLE_FORMATS:
2081 query.update({
2082 'fmt': fmt,
2083 })
2084 lang_subs.append({
2085 'ext': fmt,
2086 'url': update_url_query(base_url, query),
2087 })
2088 container[lang_code] = lang_subs
7e72694b 2089
545cc85d 2090 for caption_track in (pctr.get('captionTracks') or []):
2091 base_url = caption_track.get('baseUrl')
2092 if not base_url:
2093 continue
2094 if caption_track.get('kind') != 'asr':
2095 lang_code = caption_track.get('languageCode')
2096 if not lang_code:
2097 continue
2098 process_language(
2099 subtitles, base_url, lang_code, {})
2100 continue
2101 automatic_captions = {}
2102 for translation_language in (pctr.get('translationLanguages') or []):
2103 translation_language_code = translation_language.get('languageCode')
2104 if not translation_language_code:
2105 continue
2106 process_language(
2107 automatic_captions, base_url, translation_language_code,
2108 {'tlang': translation_language_code})
2109 info['automatic_captions'] = automatic_captions
2110 info['subtitles'] = subtitles
7e72694b 2111
545cc85d 2112 parsed_url = compat_urllib_parse_urlparse(url)
2113 for component in [parsed_url.fragment, parsed_url.query]:
2114 query = compat_parse_qs(component)
2115 for k, v in query.items():
2116 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2117 d_k += '_time'
2118 if d_k not in info and k in s_ks:
2119 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2120
2121 # Youtube Music Auto-generated description
822b9d9c 2122 if video_description:
38d70284 2123 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2124 if mobj:
822b9d9c
RA
2125 release_year = mobj.group('release_year')
2126 release_date = mobj.group('release_date')
2127 if release_date:
2128 release_date = release_date.replace('-', '')
2129 if not release_year:
545cc85d 2130 release_year = release_date[:4]
2131 info.update({
2132 'album': mobj.group('album'.strip()),
2133 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2134 'track': mobj.group('track').strip(),
2135 'release_date': release_date,
cc2db878 2136 'release_year': int_or_none(release_year),
545cc85d 2137 })
7e72694b 2138
545cc85d 2139 initial_data = None
2140 if webpage:
2141 initial_data = self._extract_yt_initial_variable(
2142 webpage, self._YT_INITIAL_DATA_RE, video_id,
2143 'yt initial data')
2144 if not initial_data:
2145 initial_data = self._call_api(
2146 'next', {'videoId': video_id}, video_id, fatal=False)
2147
2148 if not is_live:
2149 try:
2150 # This will error if there is no livechat
2151 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2152 info['subtitles']['live_chat'] = [{
394dcd44 2153 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2154 'video_id': video_id,
2155 'ext': 'json',
2156 'protocol': 'youtube_live_chat_replay',
2157 }]
2158 except (KeyError, IndexError, TypeError):
2159 pass
2160
2161 if initial_data:
2162 chapters = self._extract_chapters_from_json(
2163 initial_data, video_id, duration)
2164 if not chapters:
2165 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2166 contents = try_get(
2167 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2168 list)
2169 if not contents:
2170 continue
2171
2172 def chapter_time(mmlir):
2173 return parse_duration(
2174 get_text(mmlir.get('timeDescription')))
2175
2176 chapters = []
2177 for next_num, content in enumerate(contents, start=1):
2178 mmlir = content.get('macroMarkersListItemRenderer') or {}
2179 start_time = chapter_time(mmlir)
2180 end_time = chapter_time(try_get(
2181 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2182 if next_num < len(contents) else duration
2183 if start_time is None or end_time is None:
2184 continue
2185 chapters.append({
2186 'start_time': start_time,
2187 'end_time': end_time,
2188 'title': get_text(mmlir.get('title')),
2189 })
2190 if chapters:
2191 break
2192 if chapters:
2193 info['chapters'] = chapters
2194
2195 contents = try_get(
2196 initial_data,
2197 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2198 list) or []
2199 for content in contents:
2200 vpir = content.get('videoPrimaryInfoRenderer')
2201 if vpir:
2202 stl = vpir.get('superTitleLink')
2203 if stl:
2204 stl = get_text(stl)
2205 if try_get(
2206 vpir,
2207 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2208 info['location'] = stl
2209 else:
2210 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2211 if mobj:
2212 info.update({
2213 'series': mobj.group(1),
2214 'season_number': int(mobj.group(2)),
2215 'episode_number': int(mobj.group(3)),
2216 })
2217 for tlb in (try_get(
2218 vpir,
2219 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2220 list) or []):
2221 tbr = tlb.get('toggleButtonRenderer') or {}
2222 for getter, regex in [(
2223 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2224 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2225 lambda x: x['accessibility'],
2226 lambda x: x['accessibilityData']['accessibilityData'],
2227 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2228 label = (try_get(tbr, getter, dict) or {}).get('label')
2229 if label:
2230 mobj = re.match(regex, label)
2231 if mobj:
2232 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2233 break
2234 sbr_tooltip = try_get(
2235 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2236 if sbr_tooltip:
2237 like_count, dislike_count = sbr_tooltip.split(' / ')
2238 info.update({
2239 'like_count': str_to_int(like_count),
2240 'dislike_count': str_to_int(dislike_count),
2241 })
2242 vsir = content.get('videoSecondaryInfoRenderer')
2243 if vsir:
2244 info['channel'] = get_text(try_get(
2245 vsir,
2246 lambda x: x['owner']['videoOwnerRenderer']['title'],
2247 compat_str))
2248 rows = try_get(
2249 vsir,
2250 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2251 list) or []
2252 multiple_songs = False
2253 for row in rows:
2254 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2255 multiple_songs = True
2256 break
2257 for row in rows:
2258 mrr = row.get('metadataRowRenderer') or {}
2259 mrr_title = mrr.get('title')
2260 if not mrr_title:
2261 continue
2262 mrr_title = get_text(mrr['title'])
2263 mrr_contents_text = get_text(mrr['contents'][0])
2264 if mrr_title == 'License':
2265 info['license'] = mrr_contents_text
2266 elif not multiple_songs:
2267 if mrr_title == 'Album':
2268 info['album'] = mrr_contents_text
2269 elif mrr_title == 'Artist':
2270 info['artist'] = mrr_contents_text
2271 elif mrr_title == 'Song':
2272 info['track'] = mrr_contents_text
2273
2274 fallbacks = {
2275 'channel': 'uploader',
2276 'channel_id': 'uploader_id',
2277 'channel_url': 'uploader_url',
2278 }
2279 for to, frm in fallbacks.items():
2280 if not info.get(to):
2281 info[to] = info.get(frm)
2282
2283 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2284 v = info.get(s_k)
2285 if v:
2286 info[d_k] = v
b84071c0 2287
c224251a
M
2288 is_private = bool_or_none(video_details.get('isPrivate'))
2289 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2290 is_membersonly = None
2291 if initial_data and is_private is not None:
2292 is_membersonly = False
2293 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2294 for content in contents or []:
2295 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2296 for badge in badges or []:
2297 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2298 if label.lower() == 'members only':
2299 is_membersonly = True
2300 break
2301 if is_membersonly:
2302 break
2303
2304 # TODO: Add this for playlists
2305 info['availability'] = self._availability(
2306 is_private=is_private,
2307 needs_premium=False, # Youtube no longer have premium-only videos?
2308 needs_subscription=is_membersonly,
2309 needs_auth=info['age_limit'] >= 18,
2310 is_unlisted=None if is_private is None else is_unlisted)
2311
06167fbb 2312 # get xsrf for annotations or comments
2313 get_annotations = self._downloader.params.get('writeannotations', False)
2314 get_comments = self._downloader.params.get('getcomments', False)
2315 if get_annotations or get_comments:
29f7c58a 2316 xsrf_token = None
545cc85d 2317 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2318 if ytcfg:
2319 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2320 if not xsrf_token:
2321 xsrf_token = self._search_regex(
2322 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2323 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2324
2325 # annotations
06167fbb 2326 if get_annotations:
64b6a4e9
RA
2327 invideo_url = try_get(
2328 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2329 if xsrf_token and invideo_url:
29f7c58a 2330 xsrf_field_name = None
2331 if ytcfg:
2332 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2333 if not xsrf_field_name:
2334 xsrf_field_name = self._search_regex(
2335 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2336 webpage, 'xsrf field name',
29f7c58a 2337 group='xsrf_field_name', default='session_token')
8a784c74 2338 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2339 self._proto_relative_url(invideo_url),
2340 video_id, note='Downloading annotations',
2341 errnote='Unable to download video annotations', fatal=False,
2342 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2343
277d6ff5 2344 if get_comments:
a1c5d2ca 2345 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2346
545cc85d 2347 self.mark_watched(video_id, player_response)
d77ab8e2 2348
545cc85d 2349 return info
c5e8d7af 2350
5f6a1245 2351
8bdd16b4 2352class YoutubeTabIE(YoutubeBaseInfoExtractor):
2353 IE_DESC = 'YouTube.com tab'
70d5c17b 2354 _VALID_URL = r'''(?x)
2355 https?://
2356 (?:\w+\.)?
2357 (?:
2358 youtube(?:kids)?\.com|
2359 invidio\.us
2360 )/
2361 (?:
2362 (?:channel|c|user)/|
2363 (?P<not_channel>
9ba5705a 2364 feed/|hashtag/|
70d5c17b 2365 (?:playlist|watch)\?.*?\blist=
2366 )|
29f7c58a 2367 (?!(?:%s)\b) # Direct URLs
70d5c17b 2368 )
2369 (?P<id>[^/?\#&]+)
2370 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2371 IE_NAME = 'youtube:tab'
2372
81127aa5 2373 _TESTS = [{
8bdd16b4 2374 # playlists, multipage
2375 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2376 'playlist_mincount': 94,
2377 'info_dict': {
2378 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2379 'title': 'Игорь Клейнер - Playlists',
2380 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2381 'uploader': 'Игорь Клейнер',
2382 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2383 },
2384 }, {
2385 # playlists, multipage, different order
2386 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2387 'playlist_mincount': 94,
2388 'info_dict': {
2389 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2390 'title': 'Игорь Клейнер - Playlists',
2391 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2392 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2393 'uploader': 'Игорь Клейнер',
8bdd16b4 2394 },
2395 }, {
2396 # playlists, singlepage
2397 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2398 'playlist_mincount': 4,
2399 'info_dict': {
2400 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2401 'title': 'ThirstForScience - Playlists',
2402 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2403 'uploader': 'ThirstForScience',
2404 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2405 }
2406 }, {
2407 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2408 'only_matching': True,
2409 }, {
2410 # basic, single video playlist
0e30a7b9 2411 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2412 'info_dict': {
0e30a7b9 2413 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2414 'uploader': 'Sergey M.',
2415 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2416 'title': 'youtube-dl public playlist',
81127aa5 2417 },
0e30a7b9 2418 'playlist_count': 1,
9291475f 2419 }, {
8bdd16b4 2420 # empty playlist
0e30a7b9 2421 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2422 'info_dict': {
0e30a7b9 2423 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2424 'uploader': 'Sergey M.',
2425 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2426 'title': 'youtube-dl empty playlist',
9291475f
PH
2427 },
2428 'playlist_count': 0,
2429 }, {
8bdd16b4 2430 # Home tab
2431 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2432 'info_dict': {
8bdd16b4 2433 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2434 'title': 'lex will - Home',
2435 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2436 'uploader': 'lex will',
2437 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2438 },
8bdd16b4 2439 'playlist_mincount': 2,
9291475f 2440 }, {
8bdd16b4 2441 # Videos tab
2442 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2443 'info_dict': {
8bdd16b4 2444 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2445 'title': 'lex will - Videos',
2446 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2447 'uploader': 'lex will',
2448 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2449 },
8bdd16b4 2450 'playlist_mincount': 975,
9291475f 2451 }, {
8bdd16b4 2452 # Videos tab, sorted by popular
2453 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2454 'info_dict': {
8bdd16b4 2455 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2456 'title': 'lex will - Videos',
2457 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2458 'uploader': 'lex will',
2459 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2460 },
8bdd16b4 2461 'playlist_mincount': 199,
9291475f 2462 }, {
8bdd16b4 2463 # Playlists tab
2464 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2465 'info_dict': {
8bdd16b4 2466 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2467 'title': 'lex will - Playlists',
2468 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2469 'uploader': 'lex will',
2470 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2471 },
8bdd16b4 2472 'playlist_mincount': 17,
ac7553d0 2473 }, {
8bdd16b4 2474 # Community tab
2475 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2476 'info_dict': {
8bdd16b4 2477 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2478 'title': 'lex will - Community',
2479 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2480 'uploader': 'lex will',
2481 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2482 },
2483 'playlist_mincount': 18,
87dadd45 2484 }, {
8bdd16b4 2485 # Channels tab
2486 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2487 'info_dict': {
8bdd16b4 2488 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2489 'title': 'lex will - Channels',
2490 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2491 'uploader': 'lex will',
2492 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2493 },
deaec5af 2494 'playlist_mincount': 12,
6b08cdf6 2495 }, {
a0566bbf 2496 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2497 'only_matching': True,
2498 }, {
a0566bbf 2499 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2500 'only_matching': True,
2501 }, {
a0566bbf 2502 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2503 'only_matching': True,
2504 }, {
2505 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2506 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2507 'info_dict': {
2508 'title': '29C3: Not my department',
2509 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2510 'uploader': 'Christiaan008',
2511 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2512 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2513 },
2514 'playlist_count': 96,
2515 }, {
2516 'note': 'Large playlist',
2517 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2518 'info_dict': {
8bdd16b4 2519 'title': 'Uploads from Cauchemar',
2520 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2521 'uploader': 'Cauchemar',
2522 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2523 },
8bdd16b4 2524 'playlist_mincount': 1123,
2525 }, {
2526 # even larger playlist, 8832 videos
2527 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2528 'only_matching': True,
4b7df0d3
JMF
2529 }, {
2530 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2531 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2532 'info_dict': {
acf757f4
PH
2533 'title': 'Uploads from Interstellar Movie',
2534 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2535 'uploader': 'Interstellar Movie',
8bdd16b4 2536 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2537 },
481cc733 2538 'playlist_mincount': 21,
8bdd16b4 2539 }, {
2540 # https://github.com/ytdl-org/youtube-dl/issues/21844
2541 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2542 'info_dict': {
2543 'title': 'Data Analysis with Dr Mike Pound',
2544 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2545 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2546 'uploader': 'Computerphile',
deaec5af 2547 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2548 },
2549 'playlist_mincount': 11,
2550 }, {
a0566bbf 2551 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2552 'only_matching': True,
dacb3a86
S
2553 }, {
2554 # Playlist URL that does not actually serve a playlist
2555 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2556 'info_dict': {
2557 'id': 'FqZTN594JQw',
2558 'ext': 'webm',
2559 'title': "Smiley's People 01 detective, Adventure Series, Action",
2560 'uploader': 'STREEM',
2561 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2563 'upload_date': '20150526',
2564 'license': 'Standard YouTube License',
2565 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2566 'categories': ['People & Blogs'],
2567 'tags': list,
dbdaaa23 2568 'view_count': int,
dacb3a86
S
2569 'like_count': int,
2570 'dislike_count': int,
2571 },
2572 'params': {
2573 'skip_download': True,
2574 },
13a75688 2575 'skip': 'This video is not available.',
dacb3a86 2576 'add_ie': [YoutubeIE.ie_key()],
481cc733 2577 }, {
8bdd16b4 2578 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2579 'only_matching': True,
66b48727 2580 }, {
8bdd16b4 2581 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2582 'only_matching': True,
a0566bbf 2583 }, {
2584 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2585 'info_dict': {
2586 'id': '9Auq9mYxFEE',
2587 'ext': 'mp4',
deaec5af 2588 'title': compat_str,
a0566bbf 2589 'uploader': 'Sky News',
2590 'uploader_id': 'skynews',
2591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2592 'upload_date': '20191102',
deaec5af 2593 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2594 'categories': ['News & Politics'],
2595 'tags': list,
2596 'like_count': int,
2597 'dislike_count': int,
2598 },
2599 'params': {
2600 'skip_download': True,
2601 },
2602 }, {
2603 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2604 'info_dict': {
2605 'id': 'a48o2S1cPoo',
2606 'ext': 'mp4',
2607 'title': 'The Young Turks - Live Main Show',
2608 'uploader': 'The Young Turks',
2609 'uploader_id': 'TheYoungTurks',
2610 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2611 'upload_date': '20150715',
2612 'license': 'Standard YouTube License',
2613 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2614 'categories': ['News & Politics'],
2615 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2616 'like_count': int,
2617 'dislike_count': int,
2618 },
2619 'params': {
2620 'skip_download': True,
2621 },
2622 'only_matching': True,
2623 }, {
2624 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2625 'only_matching': True,
2626 }, {
2627 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2628 'only_matching': True,
3d3dddc9 2629 }, {
2630 'url': 'https://www.youtube.com/feed/trending',
2631 'only_matching': True,
2632 }, {
2633 # needs auth
2634 'url': 'https://www.youtube.com/feed/library',
2635 'only_matching': True,
2636 }, {
2637 # needs auth
2638 'url': 'https://www.youtube.com/feed/history',
2639 'only_matching': True,
2640 }, {
2641 # needs auth
2642 'url': 'https://www.youtube.com/feed/subscriptions',
2643 'only_matching': True,
2644 }, {
2645 # needs auth
2646 'url': 'https://www.youtube.com/feed/watch_later',
2647 'only_matching': True,
2648 }, {
2649 # no longer available?
2650 'url': 'https://www.youtube.com/feed/recommended',
2651 'only_matching': True,
29f7c58a 2652 }, {
2653 # inline playlist with not always working continuations
2654 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2655 'only_matching': True,
2656 }, {
2657 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2658 'only_matching': True,
2659 }, {
2660 'url': 'https://www.youtube.com/course',
2661 'only_matching': True,
2662 }, {
2663 'url': 'https://www.youtube.com/zsecurity',
2664 'only_matching': True,
2665 }, {
2666 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2667 'only_matching': True,
2668 }, {
2669 'url': 'https://www.youtube.com/TheYoungTurks/live',
2670 'only_matching': True,
2671 }]
2672
2673 @classmethod
2674 def suitable(cls, url):
2675 return False if YoutubeIE.suitable(url) else super(
2676 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2677
2678 def _extract_channel_id(self, webpage):
2679 channel_id = self._html_search_meta(
2680 'channelId', webpage, 'channel id', default=None)
2681 if channel_id:
2682 return channel_id
2683 channel_url = self._html_search_meta(
2684 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2685 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2686 'twitter:app:url:googleplay'), webpage, 'channel url')
2687 return self._search_regex(
2688 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2689 channel_url, 'channel id')
15f6397c 2690
8bdd16b4 2691 @staticmethod
cd7c66cf 2692 def _extract_basic_item_renderer(item):
2693 # Modified from _extract_grid_item_renderer
2694 known_renderers = (
e3c07697 2695 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2696 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2697 )
2698 for key, renderer in item.items():
2699 if key not in known_renderers:
2700 continue
2701 return renderer
8bdd16b4 2702
8bdd16b4 2703 def _grid_entries(self, grid_renderer):
2704 for item in grid_renderer['items']:
2705 if not isinstance(item, dict):
39b62db1 2706 continue
cd7c66cf 2707 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2708 if not isinstance(renderer, dict):
2709 continue
2710 title = try_get(
2711 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2712 # playlist
2713 playlist_id = renderer.get('playlistId')
2714 if playlist_id:
2715 yield self.url_result(
2716 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2717 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2718 video_title=title)
2719 # video
2720 video_id = renderer.get('videoId')
2721 if video_id:
2722 yield self._extract_video(renderer)
2723 # channel
2724 channel_id = renderer.get('channelId')
2725 if channel_id:
2726 title = try_get(
2727 renderer, lambda x: x['title']['simpleText'], compat_str)
2728 yield self.url_result(
2729 'https://www.youtube.com/channel/%s' % channel_id,
2730 ie=YoutubeTabIE.ie_key(), video_title=title)
2731
3d3dddc9 2732 def _shelf_entries_from_content(self, shelf_renderer):
2733 content = shelf_renderer.get('content')
2734 if not isinstance(content, dict):
8bdd16b4 2735 return
cd7c66cf 2736 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2737 if renderer:
2738 # TODO: add support for nested playlists so each shelf is processed
2739 # as separate playlist
2740 # TODO: this includes only first N items
2741 for entry in self._grid_entries(renderer):
2742 yield entry
2743 renderer = content.get('horizontalListRenderer')
2744 if renderer:
2745 # TODO
2746 pass
8bdd16b4 2747
29f7c58a 2748 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2749 ep = try_get(
2750 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2751 compat_str)
2752 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2753 if shelf_url:
29f7c58a 2754 # Skipping links to another channels, note that checking for
2755 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2756 # will not work
2757 if skip_channels and '/channels?' in shelf_url:
2758 return
3d3dddc9 2759 title = try_get(
2760 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2761 yield self.url_result(shelf_url, video_title=title)
2762 # Shelf may not contain shelf URL, fallback to extraction from content
2763 for entry in self._shelf_entries_from_content(shelf_renderer):
2764 yield entry
c5e8d7af 2765
8bdd16b4 2766 def _playlist_entries(self, video_list_renderer):
2767 for content in video_list_renderer['contents']:
2768 if not isinstance(content, dict):
2769 continue
2770 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2771 if not isinstance(renderer, dict):
2772 continue
2773 video_id = renderer.get('videoId')
2774 if not video_id:
2775 continue
2776 yield self._extract_video(renderer)
07aeced6 2777
3462ffa8 2778 def _rich_entries(self, rich_grid_renderer):
2779 renderer = try_get(
70d5c17b 2780 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2781 video_id = renderer.get('videoId')
2782 if not video_id:
2783 return
2784 yield self._extract_video(renderer)
2785
8bdd16b4 2786 def _video_entry(self, video_renderer):
2787 video_id = video_renderer.get('videoId')
2788 if video_id:
2789 return self._extract_video(video_renderer)
dacb3a86 2790
8bdd16b4 2791 def _post_thread_entries(self, post_thread_renderer):
2792 post_renderer = try_get(
2793 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2794 if not post_renderer:
2795 return
2796 # video attachment
2797 video_renderer = try_get(
2798 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2799 video_id = None
2800 if video_renderer:
2801 entry = self._video_entry(video_renderer)
2802 if entry:
2803 yield entry
2804 # inline video links
2805 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2806 for run in runs:
2807 if not isinstance(run, dict):
2808 continue
2809 ep_url = try_get(
2810 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2811 if not ep_url:
2812 continue
2813 if not YoutubeIE.suitable(ep_url):
2814 continue
2815 ep_video_id = YoutubeIE._match_id(ep_url)
2816 if video_id == ep_video_id:
2817 continue
2818 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2819
8bdd16b4 2820 def _post_thread_continuation_entries(self, post_thread_continuation):
2821 contents = post_thread_continuation.get('contents')
2822 if not isinstance(contents, list):
2823 return
2824 for content in contents:
2825 renderer = content.get('backstagePostThreadRenderer')
2826 if not isinstance(renderer, dict):
2827 continue
2828 for entry in self._post_thread_entries(renderer):
2829 yield entry
07aeced6 2830
29f7c58a 2831 @staticmethod
2832 def _build_continuation_query(continuation, ctp=None):
2833 query = {
2834 'ctoken': continuation,
2835 'continuation': continuation,
2836 }
2837 if ctp:
2838 query['itct'] = ctp
2839 return query
2840
8bdd16b4 2841 @staticmethod
2842 def _extract_next_continuation_data(renderer):
2843 next_continuation = try_get(
2844 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2845 if not next_continuation:
2846 return
2847 continuation = next_continuation.get('continuation')
2848 if not continuation:
2849 return
2850 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2851 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2852
8bdd16b4 2853 @classmethod
2854 def _extract_continuation(cls, renderer):
2855 next_continuation = cls._extract_next_continuation_data(renderer)
2856 if next_continuation:
2857 return next_continuation
cc2db878 2858 contents = []
2859 for key in ('contents', 'items'):
2860 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2861 for content in contents:
2862 if not isinstance(content, dict):
2863 continue
2864 continuation_ep = try_get(
2865 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2866 dict)
2867 if not continuation_ep:
2868 continue
2869 continuation = try_get(
2870 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2871 if not continuation:
2872 continue
2873 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2874 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2875
d069eca7 2876 def _entries(self, tab, item_id, identity_token, account_syncid):
3462ffa8 2877
70d5c17b 2878 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2879 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2880 for content in contents:
2881 if not isinstance(content, dict):
8bdd16b4 2882 continue
70d5c17b 2883 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2884 if not is_renderer:
70d5c17b 2885 renderer = content.get('richItemRenderer')
3462ffa8 2886 if renderer:
2887 for entry in self._rich_entries(renderer):
2888 yield entry
2889 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2890 continue
3462ffa8 2891 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2892 for isr_content in isr_contents:
2893 if not isinstance(isr_content, dict):
2894 continue
69184e41 2895
2896 known_renderers = {
2897 'playlistVideoListRenderer': self._playlist_entries,
2898 'gridRenderer': self._grid_entries,
2899 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2900 'backstagePostThreadRenderer': self._post_thread_entries,
2901 'videoRenderer': lambda x: [self._video_entry(x)],
2902 }
2903 for key, renderer in isr_content.items():
2904 if key not in known_renderers:
2905 continue
2906 for entry in known_renderers[key](renderer):
2907 if entry:
2908 yield entry
3462ffa8 2909 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2910 break
70d5c17b 2911
3462ffa8 2912 if not continuation_list[0]:
2913 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2914
2915 if not continuation_list[0]:
2916 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2917
2918 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2919 tab_content = try_get(tab, lambda x: x['content'], dict)
2920 if not tab_content:
2921 return
3462ffa8 2922 parent_renderer = (
29f7c58a 2923 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2924 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2925 for entry in extract_entries(parent_renderer):
2926 yield entry
3462ffa8 2927 continuation = continuation_list[0]
8bdd16b4 2928
2929 headers = {
2930 'x-youtube-client-name': '1',
2931 'x-youtube-client-version': '2.20201112.04.01',
2932 }
2933 if identity_token:
2934 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2935
d069eca7
M
2936 if account_syncid:
2937 headers['X-Goog-PageId'] = account_syncid
2938 headers['X-Goog-AuthUser'] = 0
2939
8bdd16b4 2940 for page_num in itertools.count(1):
2941 if not continuation:
2942 break
62bff2c1 2943 retries = self._downloader.params.get('extractor_retries', 3)
2944 count = -1
2945 last_error = None
2946 while count < retries:
2947 count += 1
2948 if last_error:
2949 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 2950 try:
a5c56234
M
2951 response = self._call_api(
2952 ep="browse", fatal=True, headers=headers,
2953 video_id='%s page %s' % (item_id, page_num),
2954 query={
2955 'continuation': continuation['continuation'],
2956 'clickTracking': {'clickTrackingParams': continuation['itct']},
2957 },
2958 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 2959 except ExtractorError as e:
62bff2c1 2960 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
2961 # Downloading page may result in intermittent 5xx HTTP error
2962 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2963 last_error = 'HTTP Error %s' % e.cause.code
2964 if count < retries:
29f7c58a 2965 continue
2966 raise
62bff2c1 2967 else:
62bff2c1 2968 # Youtube sometimes sends incomplete data
2969 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 2970 if dict_get(response,
2971 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 2972 break
f3eaa8dd
M
2973
2974 # Youtube may send alerts if there was an issue with the continuation page
2975 self._extract_alerts(response, expected=False)
2976
2977 last_error = 'Incomplete data received'
c705177d 2978 if count >= retries:
2979 self._downloader.report_error(last_error)
a5c56234
M
2980
2981 if not response:
8bdd16b4 2982 break
ebf1b291 2983
69184e41 2984 known_continuation_renderers = {
2985 'playlistVideoListContinuation': self._playlist_entries,
2986 'gridContinuation': self._grid_entries,
2987 'itemSectionContinuation': self._post_thread_continuation_entries,
2988 'sectionListContinuation': extract_entries, # for feeds
2989 }
8bdd16b4 2990 continuation_contents = try_get(
69184e41 2991 response, lambda x: x['continuationContents'], dict) or {}
2992 continuation_renderer = None
2993 for key, value in continuation_contents.items():
2994 if key not in known_continuation_renderers:
3462ffa8 2995 continue
69184e41 2996 continuation_renderer = value
2997 continuation_list = [None]
2998 for entry in known_continuation_renderers[key](continuation_renderer):
2999 yield entry
3000 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3001 break
3002 if continuation_renderer:
3003 continue
c5e8d7af 3004
a1b535bd 3005 known_renderers = {
3006 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3007 'gridVideoRenderer': (self._grid_entries, 'items'),
3008 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3009 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3010 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3011 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3012 }
8bdd16b4 3013 continuation_items = try_get(
26fe8ffe 3014 response,
3015 lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3016 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3017 video_items_renderer = None
3018 for key, value in continuation_item.items():
3019 if key not in known_renderers:
8bdd16b4 3020 continue
a1b535bd 3021 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3022 continuation_list = [None]
a1b535bd 3023 for entry in known_renderers[key][0](video_items_renderer):
3024 yield entry
9ba5705a 3025 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3026 break
3027 if video_items_renderer:
3028 continue
8bdd16b4 3029 break
9558dcec 3030
8bdd16b4 3031 @staticmethod
3032 def _extract_selected_tab(tabs):
3033 for tab in tabs:
3034 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3035 return tab['tabRenderer']
2b3c2546 3036 else:
8bdd16b4 3037 raise ExtractorError('Unable to find selected tab')
b82f815f 3038
8bdd16b4 3039 @staticmethod
3040 def _extract_uploader(data):
3041 uploader = {}
3042 sidebar_renderer = try_get(
3043 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3044 if sidebar_renderer:
3045 for item in sidebar_renderer:
3046 if not isinstance(item, dict):
3047 continue
3048 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3049 if not isinstance(renderer, dict):
3050 continue
3051 owner = try_get(
3052 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3053 if owner:
3054 uploader['uploader'] = owner.get('text')
3055 uploader['uploader_id'] = try_get(
3056 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3057 uploader['uploader_url'] = urljoin(
3058 'https://www.youtube.com/',
3059 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3060 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3061
d069eca7 3062 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3063 playlist_id = title = description = channel_url = channel_name = channel_id = None
3064 thumbnails_list = tags = []
3065
8bdd16b4 3066 selected_tab = self._extract_selected_tab(tabs)
3067 renderer = try_get(
3068 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3069 if renderer:
b60419c5 3070 channel_name = renderer.get('title')
3071 channel_url = renderer.get('channelUrl')
3072 channel_id = renderer.get('externalId')
64c0d954 3073
64c0d954 3074 if not renderer:
3075 renderer = try_get(
3076 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 3077 if renderer:
3078 title = renderer.get('title')
ecc97af3 3079 description = renderer.get('description', '')
b60419c5 3080 playlist_id = channel_id
3081 tags = renderer.get('keywords', '').split()
3082 thumbnails_list = (
3083 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3084 or try_get(
3085 data,
3086 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3087 list)
b60419c5 3088 or [])
3089
3090 thumbnails = []
3091 for t in thumbnails_list:
3092 if not isinstance(t, dict):
3093 continue
3094 thumbnail_url = url_or_none(t.get('url'))
3095 if not thumbnail_url:
3096 continue
3097 thumbnails.append({
3098 'url': thumbnail_url,
3099 'width': int_or_none(t.get('width')),
3100 'height': int_or_none(t.get('height')),
3101 })
64c0d954 3102
3462ffa8 3103 if playlist_id is None:
70d5c17b 3104 playlist_id = item_id
3105 if title is None:
b60419c5 3106 title = playlist_id
3107 title += format_field(selected_tab, 'title', ' - %s')
3108
3109 metadata = {
3110 'playlist_id': playlist_id,
3111 'playlist_title': title,
3112 'playlist_description': description,
3113 'uploader': channel_name,
3114 'uploader_id': channel_id,
3115 'uploader_url': channel_url,
3116 'thumbnails': thumbnails,
3117 'tags': tags,
3118 }
3119 if not channel_id:
3120 metadata.update(self._extract_uploader(data))
3121 metadata.update({
3122 'channel': metadata['uploader'],
3123 'channel_id': metadata['uploader_id'],
3124 'channel_url': metadata['uploader_url']})
3125 return self.playlist_result(
d069eca7
M
3126 self._entries(
3127 selected_tab, playlist_id,
3128 self._extract_identity_token(webpage, item_id),
3129 self._extract_account_syncid(data)),
b60419c5 3130 **metadata)
73c4ac2c 3131
cd7c66cf 3132 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3133 first_id = last_id = None
3134 for page_num in itertools.count(1):
cd7c66cf 3135 videos = list(self._playlist_entries(playlist))
3136 if not videos:
3137 return
2be71994 3138 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3139 if start >= len(videos):
3140 return
3141 for video in videos[start:]:
3142 if video['id'] == first_id:
3143 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3144 return
3145 yield video
3146 first_id = first_id or videos[0]['id']
3147 last_id = videos[-1]['id']
cd7c66cf 3148
cd7c66cf 3149 _, data = self._extract_webpage(
2be71994 3150 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3151 '%s page %d' % (playlist_id, page_num))
3152 playlist = try_get(
3153 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3154
29f7c58a 3155 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3156 title = playlist.get('title') or try_get(
3157 data, lambda x: x['titleText']['simpleText'], compat_str)
3158 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3159
3160 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3161 playlist_url = urljoin(url, try_get(
3162 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3163 compat_str))
3164 if playlist_url and playlist_url != url:
3165 return self.url_result(
3166 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3167 video_title=title)
cd7c66cf 3168
8bdd16b4 3169 return self.playlist_result(
cd7c66cf 3170 self._extract_mix_playlist(playlist, playlist_id),
3171 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3172
f3eaa8dd
M
3173 def _extract_alerts(self, data, expected=False):
3174
3175 def _real_extract_alerts():
3176 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3177 if not isinstance(alert_dict, dict):
02ced43c 3178 continue
f3eaa8dd
M
3179 for alert in alert_dict.values():
3180 alert_type = alert.get('type')
3181 if not alert_type:
3182 continue
3183 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
02ced43c 3184 if message:
3185 yield alert_type, message
f3eaa8dd
M
3186 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3187 message = try_get(run, lambda x: x['text'], compat_str)
3188 if message:
3189 yield alert_type, message
3190
3191 err_msg = None
3192 for alert_type, alert_message in _real_extract_alerts():
3193 if alert_type.lower() == 'error':
3194 if err_msg:
3195 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3196 err_msg = alert_message
3197 else:
3198 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3199
3200 if err_msg:
3201 raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
02ced43c 3202
cd7c66cf 3203 def _extract_webpage(self, url, item_id):
62bff2c1 3204 retries = self._downloader.params.get('extractor_retries', 3)
3205 count = -1
c705177d 3206 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3207 while count < retries:
62bff2c1 3208 count += 1
14fdfea9 3209 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3210 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3211 if count:
c705177d 3212 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3213 webpage = self._download_webpage(
3214 url, item_id,
cd7c66cf 3215 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3216 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3217 self._extract_alerts(data, expected=True)
14fdfea9 3218 if data.get('contents') or data.get('currentVideoEndpoint'):
3219 break
c705177d 3220 if count >= retries:
3221 self._downloader.report_error(last_error)
cd7c66cf 3222 return webpage, data
3223
3224 def _real_extract(self, url):
3225 item_id = self._match_id(url)
3226 url = compat_urlparse.urlunparse(
3227 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3228
3229 # This is not matched in a channel page with a tab selected
3230 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3231 mobj = mobj.groupdict() if mobj else {}
3232 if mobj and not mobj.get('not_channel'):
3233 self._downloader.report_warning(
3234 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3235 'To download only the videos in the home page, add a "/featured" to the URL')
3236 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3237
3238 # Handle both video/playlist URLs
3239 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3240 video_id = qs.get('v', [None])[0]
3241 playlist_id = qs.get('list', [None])[0]
3242
3243 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3244 if not playlist_id:
3245 # If there is neither video or playlist ids,
3246 # youtube redirects to home page, which is undesirable
3247 raise ExtractorError('Unable to recognize tab page')
3248 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3249 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3250
3251 if video_id and playlist_id:
3252 if self._downloader.params.get('noplaylist'):
3253 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3254 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3255 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3256
3257 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3258
8bdd16b4 3259 tabs = try_get(
3260 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3261 if tabs:
d069eca7 3262 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3263
8bdd16b4 3264 playlist = try_get(
3265 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3266 if playlist:
29f7c58a 3267 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3268
a0566bbf 3269 video_id = try_get(
3270 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3271 compat_str) or video_id
8bdd16b4 3272 if video_id:
cd7c66cf 3273 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3274 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3275
8bdd16b4 3276 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3277
c5e8d7af 3278
8bdd16b4 3279class YoutubePlaylistIE(InfoExtractor):
3280 IE_DESC = 'YouTube.com playlists'
3281 _VALID_URL = r'''(?x)(?:
3282 (?:https?://)?
3283 (?:\w+\.)?
3284 (?:
3285 (?:
3286 youtube(?:kids)?\.com|
29f7c58a 3287 invidio\.us
8bdd16b4 3288 )
3289 /.*?\?.*?\blist=
3290 )?
3291 (?P<id>%(playlist_id)s)
3292 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3293 IE_NAME = 'youtube:playlist'
cdc628a4 3294 _TESTS = [{
8bdd16b4 3295 'note': 'issue #673',
3296 'url': 'PLBB231211A4F62143',
cdc628a4 3297 'info_dict': {
8bdd16b4 3298 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3299 'id': 'PLBB231211A4F62143',
3300 'uploader': 'Wickydoo',
3301 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3302 },
3303 'playlist_mincount': 29,
3304 }, {
3305 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3306 'info_dict': {
3307 'title': 'YDL_safe_search',
3308 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3309 },
3310 'playlist_count': 2,
3311 'skip': 'This playlist is private',
9558dcec 3312 }, {
8bdd16b4 3313 'note': 'embedded',
3314 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3315 'playlist_count': 4,
9558dcec 3316 'info_dict': {
8bdd16b4 3317 'title': 'JODA15',
3318 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3319 'uploader': 'milan',
3320 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3321 }
cdc628a4 3322 }, {
8bdd16b4 3323 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3324 'playlist_mincount': 982,
3325 'info_dict': {
3326 'title': '2018 Chinese New Singles (11/6 updated)',
3327 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3328 'uploader': 'LBK',
3329 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3330 }
daa0df9e 3331 }, {
29f7c58a 3332 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3333 'only_matching': True,
3334 }, {
3335 # music album playlist
3336 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3337 'only_matching': True,
3338 }]
3339
3340 @classmethod
3341 def suitable(cls, url):
3342 return False if YoutubeTabIE.suitable(url) else super(
3343 YoutubePlaylistIE, cls).suitable(url)
3344
3345 def _real_extract(self, url):
3346 playlist_id = self._match_id(url)
3347 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3348 if not qs:
3349 qs = {'list': playlist_id}
3350 return self.url_result(
3351 update_url_query('https://www.youtube.com/playlist', qs),
3352 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3353
3354
3355class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3356 IE_DESC = 'youtu.be'
29f7c58a 3357 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3358 _TESTS = [{
8bdd16b4 3359 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3360 'info_dict': {
3361 'id': 'yeWKywCrFtk',
3362 'ext': 'mp4',
3363 'title': 'Small Scale Baler and Braiding Rugs',
3364 'uploader': 'Backus-Page House Museum',
3365 'uploader_id': 'backuspagemuseum',
3366 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3367 'upload_date': '20161008',
3368 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3369 'categories': ['Nonprofits & Activism'],
3370 'tags': list,
3371 'like_count': int,
3372 'dislike_count': int,
3373 },
3374 'params': {
3375 'noplaylist': True,
3376 'skip_download': True,
3377 },
39e7107d 3378 }, {
8bdd16b4 3379 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3380 'only_matching': True,
cdc628a4
PH
3381 }]
3382
8bdd16b4 3383 def _real_extract(self, url):
29f7c58a 3384 mobj = re.match(self._VALID_URL, url)
3385 video_id = mobj.group('id')
3386 playlist_id = mobj.group('playlist_id')
8bdd16b4 3387 return self.url_result(
29f7c58a 3388 update_url_query('https://www.youtube.com/watch', {
3389 'v': video_id,
3390 'list': playlist_id,
3391 'feature': 'youtu.be',
3392 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3393
3394
3395class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3396 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3397 _VALID_URL = r'ytuser:(?P<id>.+)'
3398 _TESTS = [{
3399 'url': 'ytuser:phihag',
3400 'only_matching': True,
3401 }]
3402
3403 def _real_extract(self, url):
3404 user_id = self._match_id(url)
3405 return self.url_result(
3406 'https://www.youtube.com/user/%s' % user_id,
3407 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3408
b05654f0 3409
3d3dddc9 3410class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3411 IE_NAME = 'youtube:favorites'
3412 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3413 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3414 _LOGIN_REQUIRED = True
3415 _TESTS = [{
3416 'url': ':ytfav',
3417 'only_matching': True,
3418 }, {
3419 'url': ':ytfavorites',
3420 'only_matching': True,
3421 }]
3422
3423 def _real_extract(self, url):
3424 return self.url_result(
3425 'https://www.youtube.com/playlist?list=LL',
3426 ie=YoutubeTabIE.ie_key())
3427
3428
8bdd16b4 3429class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3430 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3431 # there doesn't appear to be a real limit, for example if you search for
3432 # 'python' you get more than 8.000.000 results
3433 _MAX_RESULTS = float('inf')
78caa52a 3434 IE_NAME = 'youtube:search'
b05654f0 3435 _SEARCH_KEY = 'ytsearch'
6c894ea1 3436 _SEARCH_PARAMS = None
9dd8e46a 3437 _TESTS = []
b05654f0 3438
6c894ea1 3439 def _entries(self, query, n):
a5c56234 3440 data = {'query': query}
6c894ea1
U
3441 if self._SEARCH_PARAMS:
3442 data['params'] = self._SEARCH_PARAMS
3443 total = 0
3444 for page_num in itertools.count(1):
a5c56234
M
3445 search = self._call_api(
3446 ep='search', video_id='query "%s"' % query, fatal=False,
3447 note='Downloading page %s' % page_num, query=data)
6c894ea1 3448 if not search:
b4c08069 3449 break
6c894ea1
U
3450 slr_contents = try_get(
3451 search,
3452 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3453 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3454 list)
3455 if not slr_contents:
a22b2fd1 3456 break
0366ae87 3457
0366ae87
M
3458 # Youtube sometimes adds promoted content to searches,
3459 # changing the index location of videos and token.
3460 # So we search through all entries till we find them.
30a074c2 3461 continuation_token = None
3462 for slr_content in slr_contents:
a96c6d15 3463 if continuation_token is None:
3464 continuation_token = try_get(
3465 slr_content,
3466 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3467 compat_str)
3468
30a074c2 3469 isr_contents = try_get(
3470 slr_content,
3471 lambda x: x['itemSectionRenderer']['contents'],
3472 list)
9da76d30 3473 if not isr_contents:
30a074c2 3474 continue
3475 for content in isr_contents:
3476 if not isinstance(content, dict):
3477 continue
3478 video = content.get('videoRenderer')
3479 if not isinstance(video, dict):
3480 continue
3481 video_id = video.get('videoId')
3482 if not video_id:
3483 continue
3484
3485 yield self._extract_video(video)
3486 total += 1
3487 if total == n:
3488 return
0366ae87 3489
0366ae87 3490 if not continuation_token:
6c894ea1 3491 break
0366ae87 3492 data['continuation'] = continuation_token
b05654f0 3493
6c894ea1
U
3494 def _get_n_results(self, query, n):
3495 """Get a specified number of results for a query"""
3496 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3497
c9ae7b95 3498
a3dd9248 3499class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3500 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3501 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3502 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3503 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3504
c9ae7b95 3505
386e1dd9 3506class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3507 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3508 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3509 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3510 # _MAX_RESULTS = 100
3462ffa8 3511 _TESTS = [{
3512 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3513 'playlist_mincount': 5,
3514 'info_dict': {
3515 'title': 'youtube-dl test video',
3516 }
3517 }, {
3518 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3519 'only_matching': True,
3520 }]
3521
386e1dd9 3522 @classmethod
3523 def _make_valid_url(cls):
3524 return cls._VALID_URL
3525
3462ffa8 3526 def _real_extract(self, url):
386e1dd9 3527 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3528 query = (qs.get('search_query') or qs.get('q'))[0]
3529 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3530 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3531
3532
3533class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3534 """
25f14e9f 3535 Base class for feed extractors
3d3dddc9 3536 Subclasses must define the _FEED_NAME property.
d7ae0639 3537 """
b2e8bc1b 3538 _LOGIN_REQUIRED = True
ef2f3c7f 3539 _TESTS = []
d7ae0639
JMF
3540
3541 @property
3542 def IE_NAME(self):
78caa52a 3543 return 'youtube:%s' % self._FEED_NAME
04cc9617 3544
81f0259b 3545 def _real_initialize(self):
b2e8bc1b 3546 self._login()
81f0259b 3547
3853309f 3548 def _real_extract(self, url):
3d3dddc9 3549 return self.url_result(
3550 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3551 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3552
3553
ef2f3c7f 3554class YoutubeWatchLaterIE(InfoExtractor):
3555 IE_NAME = 'youtube:watchlater'
70d5c17b 3556 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3557 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3558 _TESTS = [{
8bdd16b4 3559 'url': ':ytwatchlater',
bc7a9cd8
S
3560 'only_matching': True,
3561 }]
25f14e9f
S
3562
3563 def _real_extract(self, url):
ef2f3c7f 3564 return self.url_result(
3565 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3566
3567
25f14e9f
S
3568class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3569 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3570 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3571 _FEED_NAME = 'recommended'
3d3dddc9 3572 _TESTS = [{
3573 'url': ':ytrec',
3574 'only_matching': True,
3575 }, {
3576 'url': ':ytrecommended',
3577 'only_matching': True,
3578 }, {
3579 'url': 'https://youtube.com',
3580 'only_matching': True,
3581 }]
1ed5b5c9 3582
1ed5b5c9 3583
25f14e9f 3584class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3585 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3586 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3587 _FEED_NAME = 'subscriptions'
3d3dddc9 3588 _TESTS = [{
3589 'url': ':ytsubs',
3590 'only_matching': True,
3591 }, {
3592 'url': ':ytsubscriptions',
3593 'only_matching': True,
3594 }]
1ed5b5c9 3595
1ed5b5c9 3596
25f14e9f 3597class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3598 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3599 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3600 _FEED_NAME = 'history'
3d3dddc9 3601 _TESTS = [{
3602 'url': ':ythistory',
3603 'only_matching': True,
3604 }]
1ed5b5c9
JMF
3605
3606
15870e90
PH
3607class YoutubeTruncatedURLIE(InfoExtractor):
3608 IE_NAME = 'youtube:truncated_url'
3609 IE_DESC = False # Do not list
975d35db 3610 _VALID_URL = r'''(?x)
b95aab84
PH
3611 (?:https?://)?
3612 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3613 (?:watch\?(?:
c4808c60 3614 feature=[a-z_]+|
b95aab84
PH
3615 annotation_id=annotation_[^&]+|
3616 x-yt-cl=[0-9]+|
c1708b89 3617 hl=[^&]*|
287be8c6 3618 t=[0-9]+
b95aab84
PH
3619 )?
3620 |
3621 attribution_link\?a=[^&]+
3622 )
3623 $
975d35db 3624 '''
15870e90 3625
c4808c60 3626 _TESTS = [{
2d3d2997 3627 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3628 'only_matching': True,
dc2fc736 3629 }, {
2d3d2997 3630 'url': 'https://www.youtube.com/watch?',
dc2fc736 3631 'only_matching': True,
b95aab84
PH
3632 }, {
3633 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3634 'only_matching': True,
3635 }, {
3636 'url': 'https://www.youtube.com/watch?feature=foo',
3637 'only_matching': True,
c1708b89
PH
3638 }, {
3639 'url': 'https://www.youtube.com/watch?hl=en-GB',
3640 'only_matching': True,
287be8c6
PH
3641 }, {
3642 'url': 'https://www.youtube.com/watch?t=2372',
3643 'only_matching': True,
c4808c60
PH
3644 }]
3645
15870e90
PH
3646 def _real_extract(self, url):
3647 raise ExtractorError(
78caa52a
PH
3648 'Did you forget to quote the URL? Remember that & is a meta '
3649 'character in most shells, so you want to put the URL in quotes, '
3867038a 3650 'like youtube-dl '
2d3d2997 3651 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3652 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3653 expected=True)
772fd5cc
PH
3654
3655
3656class YoutubeTruncatedIDIE(InfoExtractor):
3657 IE_NAME = 'youtube:truncated_id'
3658 IE_DESC = False # Do not list
b95aab84 3659 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3660
3661 _TESTS = [{
3662 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3663 'only_matching': True,
3664 }]
3665
3666 def _real_extract(self, url):
3667 video_id = self._match_id(url)
3668 raise ExtractorError(
3669 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3670 expected=True)