]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
[FormatSort] fix bug where `quality` had more priority than `hasvid`
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
4bb4a188 36 get_element_by_id,
dd27fd17 37 int_or_none,
94278f72 38 mimetype2ext,
6310acf5 39 parse_codecs,
b84071c0 40 parse_count,
7c80519c 41 parse_duration,
0cb58b02 42 remove_quotes,
3995d37d 43 remove_start,
cf7e015f 44 smuggle_url,
dbdaaa23 45 str_or_none,
c93d53f5 46 str_to_int,
556dbe7f 47 try_get,
c5e8d7af
PH
48 unescapeHTML,
49 unified_strdate,
cf7e015f 50 unsmuggle_url,
8bdd16b4 51 update_url_query,
81c2f20b 52 uppercase_escape,
21c340b8 53 url_or_none,
6e6bc8da 54 urlencode_postdata,
8bdd16b4 55 urljoin,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
3462ffa8 68 _RESERVED_NAMES = (
29f7c58a 69 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
70 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
71 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 72
b2e8bc1b
JMF
73 _NETRC_MACHINE = 'youtube'
74 # If True it will raise an error if no login info is provided
75 _LOGIN_REQUIRED = False
76
70d5c17b 77 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 78
b2e8bc1b 79 def _set_language(self):
810fb84d 80 self._set_cookie(
ee0b726c 81 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 82 # YouTube sets the expire time to about two months
810fb84d 83 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 84
25f14e9f
S
85 def _ids_to_results(self, ids):
86 return [
87 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 for vid_id in ids]
89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
68217024 98 username, password = self._get_login_info()
b2e8bc1b
JMF
99 # No authentication to be performed
100 if username is None:
70d35d16 101 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 102 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
103 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
104 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 105 return True
b2e8bc1b 106
7cc3570e
PH
107 login_page = self._download_webpage(
108 self._LOGIN_URL, None,
69ea8ca4
PH
109 note='Downloading login page',
110 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
111 if login_page is False:
112 return
b2e8bc1b 113
1212e997 114 login_form = self._hidden_inputs(login_page)
c5e8d7af 115
e00eb564
S
116 def req(url, f_req, note, errnote):
117 data = login_form.copy()
118 data.update({
119 'pstMsg': 1,
120 'checkConnection': 'youtube',
121 'checkedDomains': 'youtube',
122 'hl': 'en',
123 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 124 'f.req': json.dumps(f_req),
e00eb564
S
125 'flowName': 'GlifWebSignIn',
126 'flowEntry': 'ServiceLogin',
baf67a60
S
127 # TODO: reverse actual botguard identifier generation algo
128 'bgRequest': '["identifier",""]',
041bc3ad 129 })
e00eb564
S
130 return self._download_json(
131 url, None, note=note, errnote=errnote,
132 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
133 fatal=False,
134 data=urlencode_postdata(data), headers={
135 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
136 'Google-Accounts-XSRF': 1,
137 })
138
3995d37d
S
139 def warn(message):
140 self._downloader.report_warning(message)
141
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
30226342 275 def _download_webpage_handle(self, *args, **kwargs):
c1148516 276 query = kwargs.get('query', {}).copy()
c1148516 277 kwargs['query'] = query
30226342 278 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
279 *args, **compat_kwargs(kwargs))
280
b2e8bc1b
JMF
281 def _real_initialize(self):
282 if self._downloader is None:
283 return
42939b61 284 self._set_language()
b2e8bc1b
JMF
285 if not self._login():
286 return
c5e8d7af 287
8bdd16b4 288 _DEFAULT_API_DATA = {
289 'context': {
290 'client': {
291 'clientName': 'WEB',
292 'clientVersion': '2.20201021.03.00',
293 }
294 },
295 }
8377574c 296
a0566bbf 297 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 298 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
299 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 300
8bdd16b4 301 def _call_api(self, ep, query, video_id):
302 data = self._DEFAULT_API_DATA.copy()
303 data.update(query)
9833e7a0 304
8bdd16b4 305 response = self._download_json(
306 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
307 note='Downloading API JSON', errnote='Unable to download API page',
308 data=json.dumps(data).encode('utf8'),
309 headers={'content-type': 'application/json'},
310 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 311
8bdd16b4 312 return response
061a75ed 313
8bdd16b4 314 def _extract_yt_initial_data(self, video_id, webpage):
315 return self._parse_json(
316 self._search_regex(
29f7c58a 317 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 318 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 319 video_id)
0c148415 320
29f7c58a 321 def _extract_ytcfg(self, video_id, webpage):
322 return self._parse_json(
323 self._search_regex(
324 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
325 default='{}'), video_id, fatal=False)
326
30a074c2 327 def _extract_video(self, renderer):
328 video_id = renderer.get('videoId')
329 title = try_get(
330 renderer,
331 (lambda x: x['title']['runs'][0]['text'],
332 lambda x: x['title']['simpleText']), compat_str)
333 description = try_get(
334 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
335 compat_str)
336 duration = parse_duration(try_get(
337 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
338 view_count_text = try_get(
339 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
340 view_count = str_to_int(self._search_regex(
341 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
342 'view count', default=None))
343 uploader = try_get(
344 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
345 return {
346 '_type': 'url_transparent',
347 'ie_key': YoutubeIE.ie_key(),
348 'id': video_id,
349 'url': video_id,
350 'title': title,
351 'description': description,
352 'duration': duration,
353 'view_count': view_count,
354 'uploader': uploader,
355 }
356
0c148415 357
360e1ca5 358class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 359 IE_DESC = 'YouTube.com'
cb7dfeea 360 _VALID_URL = r"""(?x)^
c5e8d7af 361 (
edb53e2d 362 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 363 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 364 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 365 (?:www\.)?pwnyoutube\.com/|
8b561bfc 366 (?:www\.)?hooktube\.com/|
f7000f3a 367 (?:www\.)?yourepeat\.com/|
e69ae5b9 368 tube\.majestyc\.net/|
ba036333 369 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 370 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 371 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 372 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 373 (?:www\.)?invidious\.kabi\.tk/|
ba036333 374 (?:www\.)?invidious\.13ad\.de/|
791d2e81 375 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 376 (?:www\.)?invidious\.zapashcanon\.fr/|
377 (?:www\.)?invidious\.kavin\.rocks/|
378 (?:www\.)?invidious\.tube/|
379 (?:www\.)?invidiou\.site/|
380 (?:www\.)?invidious\.site/|
381 (?:www\.)?invidious\.xyz/|
494d664e 382 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 383 (?:www\.)?invidious\.drycat\.fr/|
ba036333 384 (?:www\.)?tube\.poal\.co/|
29f7c58a 385 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 386 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 387 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 388 (?:www\.)?yewtu\.be/|
494d664e 389 (?:www\.)?yt\.elukerio\.org/|
894b3826 390 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 391 (?:www\.)?invidious\.ggc-project\.de/|
392 (?:www\.)?yt\.maisputain\.ovh/|
393 (?:www\.)?invidious\.13ad\.de/|
394 (?:www\.)?invidious\.toot\.koeln/|
395 (?:www\.)?invidious\.fdn\.fr/|
396 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 397 (?:www\.)?kgg2m7yk5aybusll\.onion/|
398 (?:www\.)?qklhadlycap4cnod\.onion/|
399 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
400 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
401 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
402 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 403 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 404 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 405 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
406 (?:.*?\#/)? # handle anchor (#/) redirect urls
407 (?: # the various things that can precede the ID:
ac7553d0 408 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 409 |(?: # or the v= param in all its forms
f7000f3a 410 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 411 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 412 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
413 v=
414 )
f4b05232 415 ))
cbaed4bb
S
416 |(?:
417 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
418 vid\.plus| # or vid.plus/xxxx
419 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 420 )/
edb53e2d 421 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 422 )
c5e8d7af 423 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 424 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
425 (?!.*?\blist=
426 (?:
427 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
428 WL # WL are handled by the watch later IE
429 )
430 )
c5e8d7af 431 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 432 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 433 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
434 _PLAYER_INFO_RE = (
435 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
436 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
437 )
2c62dc26 438 _formats = {
c2d3cb4c 439 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
440 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
441 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
442 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
443 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
444 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
445 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
446 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 447 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 448 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
449 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
450 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
451 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
452 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
453 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 454 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 455 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
456 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 457
458
459 # 3D videos
c2d3cb4c 460 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
461 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
462 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
463 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 464 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
465 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
466 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 467
96fb5605 468 # Apple HTTP Live Streaming
11f12195 469 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 470 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
471 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
472 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
473 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
474 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 475 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
476 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
477
478 # DASH mp4 video
d23028a8
S
479 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
482 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 484 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
485 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
487 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
488 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
489 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
490 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 491
f6f1fc92 492 # Dash mp4 audio
d23028a8
S
493 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
494 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
495 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
496 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
497 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
498 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
499 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
500
501 # Dash webm
d23028a8
S
502 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
504 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
505 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
506 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
507 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
508 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
509 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 517 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
518 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
520 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
521 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
522 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
524
525 # Dash webm audio
d23028a8
S
526 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
527 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 528
0857baad 529 # Dash webm audio with opus inside
d23028a8
S
530 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
531 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
532 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 533
ce6b9a2d
PH
534 # RTMP (unnamed)
535 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
536
537 # av01 video only formats sometimes served with "unknown" codecs
538 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
539 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
540 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
541 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 542 }
29f7c58a 543 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 544
fd5c4aab
S
545 _GEO_BYPASS = False
546
78caa52a 547 IE_NAME = 'youtube'
2eb88d95
PH
548 _TESTS = [
549 {
2d3d2997 550 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
551 'info_dict': {
552 'id': 'BaW_jenozKc',
553 'ext': 'mp4',
3867038a 554 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
555 'uploader': 'Philipp Hagemeister',
556 'uploader_id': 'phihag',
ec85ded8 557 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
558 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
559 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 560 'upload_date': '20121002',
3867038a 561 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 562 'categories': ['Science & Technology'],
3867038a 563 'tags': ['youtube-dl'],
556dbe7f 564 'duration': 10,
dbdaaa23 565 'view_count': int,
3e7c1224
PH
566 'like_count': int,
567 'dislike_count': int,
7c80519c 568 'start_time': 1,
297a564b 569 'end_time': 9,
2eb88d95 570 }
0e853ca4 571 },
fccd3771 572 {
4bc3a23e
PH
573 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
574 'note': 'Embed-only video (#1746)',
575 'info_dict': {
576 'id': 'yZIXLfi8CZQ',
577 'ext': 'mp4',
578 'upload_date': '20120608',
579 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
580 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
581 'uploader': 'SET India',
94bfcd23 582 'uploader_id': 'setindia',
ec85ded8 583 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 584 'age_limit': 18,
fccd3771
PH
585 }
586 },
11b56058 587 {
8bdd16b4 588 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
589 'note': 'Use the first video ID in the URL',
590 'info_dict': {
591 'id': 'BaW_jenozKc',
592 'ext': 'mp4',
3867038a 593 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
594 'uploader': 'Philipp Hagemeister',
595 'uploader_id': 'phihag',
ec85ded8 596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 597 'upload_date': '20121002',
3867038a 598 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 599 'categories': ['Science & Technology'],
3867038a 600 'tags': ['youtube-dl'],
556dbe7f 601 'duration': 10,
dbdaaa23 602 'view_count': int,
11b56058
PM
603 'like_count': int,
604 'dislike_count': int,
34a7de29
S
605 },
606 'params': {
607 'skip_download': True,
608 },
11b56058 609 },
dd27fd17 610 {
2d3d2997 611 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
612 'note': '256k DASH audio (format 141) via DASH manifest',
613 'info_dict': {
614 'id': 'a9LDPn-MO4I',
615 'ext': 'm4a',
616 'upload_date': '20121002',
617 'uploader_id': '8KVIDEO',
ec85ded8 618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
619 'description': '',
620 'uploader': '8KVIDEO',
621 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 622 },
4bc3a23e
PH
623 'params': {
624 'youtube_include_dash_manifest': True,
625 'format': '141',
4919603f 626 },
de3c7fe0 627 'skip': 'format 141 not served anymore',
dd27fd17 628 },
8bdd16b4 629 # DASH manifest with encrypted signature
630 {
631 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
632 'info_dict': {
633 'id': 'IB3lcPjvWLA',
634 'ext': 'm4a',
635 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
636 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
637 'duration': 244,
638 'uploader': 'AfrojackVEVO',
639 'uploader_id': 'AfrojackVEVO',
640 'upload_date': '20131011',
641 },
642 'params': {
643 'youtube_include_dash_manifest': True,
644 'format': '141/bestaudio[ext=m4a]',
645 },
646 },
aa79ac0c
PH
647 # Controversy video
648 {
649 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
650 'info_dict': {
651 'id': 'T4XJQO3qol8',
652 'ext': 'mp4',
556dbe7f 653 'duration': 219,
aa79ac0c 654 'upload_date': '20100909',
4fe54c12 655 'uploader': 'Amazing Atheist',
aa79ac0c 656 'uploader_id': 'TheAmazingAtheist',
ec85ded8 657 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
658 'title': 'Burning Everyone\'s Koran',
659 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
660 }
c522adb1 661 },
dd2d55f1 662 # Normal age-gate video (embed allowed)
c522adb1 663 {
2d3d2997 664 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
665 'info_dict': {
666 'id': 'HtVdAasjOgU',
667 'ext': 'mp4',
668 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 669 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 670 'duration': 142,
c522adb1
JMF
671 'uploader': 'The Witcher',
672 'uploader_id': 'WitcherGame',
ec85ded8 673 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 674 'upload_date': '20140605',
34952f09 675 'age_limit': 18,
c522adb1
JMF
676 },
677 },
8bdd16b4 678 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
679 # YouTube Red ad is not captured for creator
680 {
681 'url': '__2ABJjxzNo',
682 'info_dict': {
683 'id': '__2ABJjxzNo',
684 'ext': 'mp4',
685 'duration': 266,
686 'upload_date': '20100430',
687 'uploader_id': 'deadmau5',
688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
689 'creator': 'Dada Life, deadmau5',
690 'description': 'md5:12c56784b8032162bb936a5f76d55360',
691 'uploader': 'deadmau5',
692 'title': 'Deadmau5 - Some Chords (HD)',
693 'alt_title': 'This Machine Kills Some Chords',
694 },
695 'expected_warnings': [
696 'DASH manifest missing',
697 ]
698 },
067aa17e 699 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
700 {
701 'url': 'lqQg6PlCWgI',
702 'info_dict': {
703 'id': 'lqQg6PlCWgI',
704 'ext': 'mp4',
556dbe7f 705 'duration': 6085,
90227264 706 'upload_date': '20150827',
cbe2bd91 707 'uploader_id': 'olympic',
ec85ded8 708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 709 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 710 'uploader': 'Olympic',
cbe2bd91
PH
711 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
712 },
713 'params': {
714 'skip_download': 'requires avconv',
e52a40ab 715 }
cbe2bd91 716 },
6271f1ca
PH
717 # Non-square pixels
718 {
719 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
720 'info_dict': {
721 'id': '_b-2C3KPAM0',
722 'ext': 'mp4',
723 'stretched_ratio': 16 / 9.,
556dbe7f 724 'duration': 85,
6271f1ca
PH
725 'upload_date': '20110310',
726 'uploader_id': 'AllenMeow',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 728 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 729 'uploader': '孫ᄋᄅ',
6271f1ca
PH
730 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
731 },
06b491eb
S
732 },
733 # url_encoded_fmt_stream_map is empty string
734 {
735 'url': 'qEJwOuvDf7I',
736 'info_dict': {
737 'id': 'qEJwOuvDf7I',
f57b7835 738 'ext': 'webm',
06b491eb
S
739 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
740 'description': '',
741 'upload_date': '20150404',
742 'uploader_id': 'spbelect',
743 'uploader': 'Наблюдатели Петербурга',
744 },
745 'params': {
746 'skip_download': 'requires avconv',
e323cf3f
S
747 },
748 'skip': 'This live event has ended.',
06b491eb 749 },
067aa17e 750 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
751 {
752 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
753 'info_dict': {
754 'id': 'FIl7x6_3R5Y',
eb6793ba 755 'ext': 'webm',
da77d856
S
756 'title': 'md5:7b81415841e02ecd4313668cde88737a',
757 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 758 'duration': 220,
da77d856
S
759 'upload_date': '20150625',
760 'uploader_id': 'dorappi2000',
ec85ded8 761 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 762 'uploader': 'dorappi2000',
eb6793ba 763 'formats': 'mincount:31',
da77d856 764 },
eb6793ba 765 'skip': 'not actual anymore',
2ee8f5d8 766 },
8a1a26ce
YCH
767 # DASH manifest with segment_list
768 {
769 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
770 'md5': '8ce563a1d667b599d21064e982ab9e31',
771 'info_dict': {
772 'id': 'CsmdDsKjzN8',
773 'ext': 'mp4',
17ee98e1 774 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
775 'uploader': 'Airtek',
776 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
777 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
778 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
779 },
780 'params': {
781 'youtube_include_dash_manifest': True,
782 'format': '135', # bestvideo
be49068d
S
783 },
784 'skip': 'This live event has ended.',
2ee8f5d8 785 },
cf7e015f
S
786 {
787 # Multifeed videos (multiple cameras), URL is for Main Camera
788 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
789 'info_dict': {
790 'id': 'jqWvoWXjCVs',
791 'title': 'teamPGP: Rocket League Noob Stream',
792 'description': 'md5:dc7872fb300e143831327f1bae3af010',
793 },
794 'playlist': [{
795 'info_dict': {
796 'id': 'jqWvoWXjCVs',
797 'ext': 'mp4',
798 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
799 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 800 'duration': 7335,
cf7e015f
S
801 'upload_date': '20150721',
802 'uploader': 'Beer Games Beer',
803 'uploader_id': 'beergamesbeer',
ec85ded8 804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 805 'license': 'Standard YouTube License',
cf7e015f
S
806 },
807 }, {
808 'info_dict': {
809 'id': '6h8e8xoXJzg',
810 'ext': 'mp4',
811 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
812 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 813 'duration': 7337,
cf7e015f
S
814 'upload_date': '20150721',
815 'uploader': 'Beer Games Beer',
816 'uploader_id': 'beergamesbeer',
ec85ded8 817 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 818 'license': 'Standard YouTube License',
cf7e015f
S
819 },
820 }, {
821 'info_dict': {
822 'id': 'PUOgX5z9xZw',
823 'ext': 'mp4',
824 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
825 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 826 'duration': 7337,
cf7e015f
S
827 'upload_date': '20150721',
828 'uploader': 'Beer Games Beer',
829 'uploader_id': 'beergamesbeer',
ec85ded8 830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 831 'license': 'Standard YouTube License',
cf7e015f
S
832 },
833 }, {
834 'info_dict': {
835 'id': 'teuwxikvS5k',
836 'ext': 'mp4',
837 'title': 'teamPGP: Rocket League Noob Stream (zim)',
838 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 839 'duration': 7334,
cf7e015f
S
840 'upload_date': '20150721',
841 'uploader': 'Beer Games Beer',
842 'uploader_id': 'beergamesbeer',
ec85ded8 843 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 844 'license': 'Standard YouTube License',
cf7e015f
S
845 },
846 }],
847 'params': {
848 'skip_download': True,
849 },
4fe54c12 850 'skip': 'This video is not available.',
cbaed4bb 851 },
f9f49d87 852 {
067aa17e 853 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
854 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
855 'info_dict': {
856 'id': 'gVfLd0zydlo',
857 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
858 },
859 'playlist_count': 2,
be49068d 860 'skip': 'Not multifeed anymore',
f9f49d87 861 },
cbaed4bb 862 {
2d3d2997 863 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 864 'only_matching': True,
0e49d9a6 865 },
6d4fc66b 866 {
2d3d2997 867 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
868 'only_matching': True,
869 },
0e49d9a6 870 {
067aa17e 871 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 872 # Also tests cut-off URL expansion in video description (see
067aa17e
S
873 # https://github.com/ytdl-org/youtube-dl/issues/1892,
874 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
875 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
876 'info_dict': {
877 'id': 'lsguqyKfVQg',
878 'ext': 'mp4',
879 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 880 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 881 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 882 'duration': 133,
0e49d9a6
LL
883 'upload_date': '20151119',
884 'uploader_id': 'IronSoulElf',
ec85ded8 885 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 886 'uploader': 'IronSoulElf',
eb6793ba
S
887 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
888 'track': 'Dark Walk - Position Music',
889 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 890 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
891 },
892 'params': {
893 'skip_download': True,
894 },
895 },
61f92af1 896 {
067aa17e 897 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
898 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
899 'only_matching': True,
900 },
313dfc45
LL
901 {
902 # Video with yt:stretch=17:0
903 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
904 'info_dict': {
905 'id': 'Q39EVAstoRM',
906 'ext': 'mp4',
907 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
908 'description': 'md5:ee18a25c350637c8faff806845bddee9',
909 'upload_date': '20151107',
910 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
911 'uploader': 'CH GAMER DROID',
912 },
913 'params': {
914 'skip_download': True,
915 },
be49068d 916 'skip': 'This video does not exist.',
313dfc45 917 },
7caf9830
S
918 {
919 # Video licensed under Creative Commons
920 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
921 'info_dict': {
922 'id': 'M4gD1WSo5mA',
923 'ext': 'mp4',
924 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
925 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 926 'duration': 721,
7caf9830
S
927 'upload_date': '20150127',
928 'uploader_id': 'BerkmanCenter',
ec85ded8 929 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 930 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
931 'license': 'Creative Commons Attribution license (reuse allowed)',
932 },
933 'params': {
934 'skip_download': True,
935 },
936 },
fd050249
S
937 {
938 # Channel-like uploader_url
939 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
940 'info_dict': {
941 'id': 'eQcmzGIKrzg',
942 'ext': 'mp4',
943 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
944 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 945 'duration': 4060,
fd050249 946 'upload_date': '20151119',
eb6793ba 947 'uploader': 'Bernie Sanders',
fd050249 948 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 949 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
950 'license': 'Creative Commons Attribution license (reuse allowed)',
951 },
952 'params': {
953 'skip_download': True,
954 },
955 },
040ac686
S
956 {
957 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
958 'only_matching': True,
7f29cf54
S
959 },
960 {
067aa17e 961 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
962 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
963 'only_matching': True,
6496ccb4
S
964 },
965 {
966 # Rental video preview
967 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
968 'info_dict': {
969 'id': 'uGpuVWrhIzE',
970 'ext': 'mp4',
971 'title': 'Piku - Trailer',
972 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
973 'upload_date': '20150811',
974 'uploader': 'FlixMatrix',
975 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
977 'license': 'Standard YouTube License',
978 },
979 'params': {
980 'skip_download': True,
981 },
eb6793ba 982 'skip': 'This video is not available.',
022a5d66 983 },
12afdc2a
S
984 {
985 # YouTube Red video with episode data
986 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
987 'info_dict': {
988 'id': 'iqKdEhx-dD4',
989 'ext': 'mp4',
990 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 991 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 992 'duration': 2085,
12afdc2a
S
993 'upload_date': '20170118',
994 'uploader': 'Vsauce',
995 'uploader_id': 'Vsauce',
996 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
997 'series': 'Mind Field',
998 'season_number': 1,
999 'episode_number': 1,
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
1004 'expected_warnings': [
1005 'Skipping DASH manifest',
1006 ],
1007 },
c7121fa7
S
1008 {
1009 # The following content has been identified by the YouTube community
1010 # as inappropriate or offensive to some audiences.
1011 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1012 'info_dict': {
1013 'id': '6SJNVb0GnPI',
1014 'ext': 'mp4',
1015 'title': 'Race Differences in Intelligence',
1016 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1017 'duration': 965,
1018 'upload_date': '20140124',
1019 'uploader': 'New Century Foundation',
1020 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1021 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1022 },
1023 'params': {
1024 'skip_download': True,
1025 },
1026 },
022a5d66
S
1027 {
1028 # itag 212
1029 'url': '1t24XAntNCY',
1030 'only_matching': True,
fd5c4aab
S
1031 },
1032 {
1033 # geo restricted to JP
1034 'url': 'sJL6WA-aGkQ',
1035 'only_matching': True,
1036 },
cd5a74a2
S
1037 {
1038 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1039 'only_matching': True,
1040 },
825cd268
RA
1041 {
1042 # DRM protected
1043 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1044 'only_matching': True,
4fe54c12
S
1045 },
1046 {
1047 # Video with unsupported adaptive stream type formats
1048 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1049 'info_dict': {
1050 'id': 'Z4Vy8R84T1U',
1051 'ext': 'mp4',
1052 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1053 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1054 'duration': 433,
1055 'upload_date': '20130923',
1056 'uploader': 'Amelia Putri Harwita',
1057 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1058 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1059 'formats': 'maxcount:10',
1060 },
1061 'params': {
1062 'skip_download': True,
1063 'youtube_include_dash_manifest': False,
1064 },
5429d6a9 1065 'skip': 'not actual anymore',
5caabd3c 1066 },
1067 {
822b9d9c 1068 # Youtube Music Auto-generated description
5caabd3c 1069 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1070 'info_dict': {
1071 'id': 'MgNrAu2pzNs',
1072 'ext': 'mp4',
1073 'title': 'Voyeur Girl',
1074 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1075 'upload_date': '20190312',
5429d6a9
S
1076 'uploader': 'Stephen - Topic',
1077 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1078 'artist': 'Stephen',
1079 'track': 'Voyeur Girl',
1080 'album': 'it\'s too much love to know my dear',
1081 'release_date': '20190313',
1082 'release_year': 2019,
1083 },
1084 'params': {
1085 'skip_download': True,
1086 },
1087 },
66b48727
RA
1088 {
1089 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1090 'only_matching': True,
1091 },
011e75e6
S
1092 {
1093 # invalid -> valid video id redirection
1094 'url': 'DJztXj2GPfl',
1095 'info_dict': {
1096 'id': 'DJztXj2GPfk',
1097 'ext': 'mp4',
1098 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1099 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1100 'upload_date': '20090125',
1101 'uploader': 'Prochorowka',
1102 'uploader_id': 'Prochorowka',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1104 'artist': 'Panjabi MC',
1105 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1106 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1107 },
1108 'params': {
1109 'skip_download': True,
1110 },
ea74e00b
DP
1111 },
1112 {
1113 # empty description results in an empty string
1114 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1115 'info_dict': {
1116 'id': 'x41yOUIvK2k',
1117 'ext': 'mp4',
1118 'title': 'IMG 3456',
1119 'description': '',
1120 'upload_date': '20170613',
1121 'uploader_id': 'ElevageOrVert',
1122 'uploader': 'ElevageOrVert',
1123 },
1124 'params': {
1125 'skip_download': True,
1126 },
1127 },
a0566bbf 1128 {
29f7c58a 1129 # with '};' inside yt initial data (see [1])
1130 # see [2] for an example with '};' inside ytInitialPlayerResponse
1131 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1132 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1133 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1134 'info_dict': {
1135 'id': 'CHqg6qOn4no',
1136 'ext': 'mp4',
1137 'title': 'Part 77 Sort a list of simple types in c#',
1138 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1139 'upload_date': '20130831',
1140 'uploader_id': 'kudvenkat',
1141 'uploader': 'kudvenkat',
1142 },
1143 'params': {
1144 'skip_download': True,
1145 },
1146 },
29f7c58a 1147 {
1148 # another example of '};' in ytInitialData
1149 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1150 'only_matching': True,
1151 },
1152 {
1153 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1154 'only_matching': True,
1155 },
2eb88d95
PH
1156 ]
1157
e0df6211
PH
1158 def __init__(self, *args, **kwargs):
1159 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1160 self._player_cache = {}
e0df6211 1161
c5e8d7af
PH
1162 def report_video_info_webpage_download(self, video_id):
1163 """Report attempt to download video info webpage."""
69ea8ca4 1164 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1165
c5e8d7af
PH
1166 def report_information_extraction(self, video_id):
1167 """Report attempt to extract video information."""
69ea8ca4 1168 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1169
1170 def report_unavailable_format(self, video_id, format):
1171 """Report extracted video URL."""
69ea8ca4 1172 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1173
1174 def report_rtmp_download(self):
1175 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1176 self.to_screen('RTMP download detected')
c5e8d7af 1177
60064c53
PH
1178 def _signature_cache_id(self, example_sig):
1179 """ Return a string representation of a signature """
78caa52a 1180 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1181
e40c758c
S
1182 @classmethod
1183 def _extract_player_info(cls, player_url):
1184 for player_re in cls._PLAYER_INFO_RE:
1185 id_m = re.search(player_re, player_url)
1186 if id_m:
1187 break
1188 else:
c081b35c 1189 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1190 return id_m.group('ext'), id_m.group('id')
1191
1192 def _extract_signature_function(self, video_id, player_url, example_sig):
1193 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1194
c4417ddb 1195 # Read from filesystem cache
60064c53
PH
1196 func_id = '%s_%s_%s' % (
1197 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1198 assert os.path.basename(func_id) == func_id
a0e07d31 1199
69ea8ca4 1200 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1201 if cache_spec is not None:
78caa52a 1202 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1203
6d1a55a5
PH
1204 download_note = (
1205 'Downloading player %s' % player_url
1206 if self._downloader.params.get('verbose') else
1207 'Downloading %s player %s' % (player_type, player_id)
1208 )
e0df6211
PH
1209 if player_type == 'js':
1210 code = self._download_webpage(
1211 player_url, video_id,
6d1a55a5 1212 note=download_note,
69ea8ca4 1213 errnote='Download of %s failed' % player_url)
83799698 1214 res = self._parse_sig_js(code)
c4417ddb 1215 elif player_type == 'swf':
e0df6211
PH
1216 urlh = self._request_webpage(
1217 player_url, video_id,
6d1a55a5 1218 note=download_note,
69ea8ca4 1219 errnote='Download of %s failed' % player_url)
e0df6211 1220 code = urlh.read()
83799698 1221 res = self._parse_sig_swf(code)
e0df6211
PH
1222 else:
1223 assert False, 'Invalid player type %r' % player_type
1224
785521bf
PH
1225 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1226 cache_res = res(test_string)
1227 cache_spec = [ord(c) for c in cache_res]
83799698 1228
69ea8ca4 1229 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1230 return res
1231
60064c53 1232 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1233 def gen_sig_code(idxs):
1234 def _genslice(start, end, step):
78caa52a 1235 starts = '' if start == 0 else str(start)
8bcc8756 1236 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1237 steps = '' if step == 1 else (':%d' % step)
78caa52a 1238 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1239
1240 step = None
7af808a5
PH
1241 # Quelch pyflakes warnings - start will be set when step is set
1242 start = '(Never used)'
edf3e38e
PH
1243 for i, prev in zip(idxs[1:], idxs[:-1]):
1244 if step is not None:
1245 if i - prev == step:
1246 continue
1247 yield _genslice(start, prev, step)
1248 step = None
1249 continue
1250 if i - prev in [-1, 1]:
1251 step = i - prev
1252 start = prev
1253 continue
1254 else:
78caa52a 1255 yield 's[%d]' % prev
edf3e38e 1256 if step is None:
78caa52a 1257 yield 's[%d]' % i
edf3e38e
PH
1258 else:
1259 yield _genslice(start, i, step)
1260
78caa52a 1261 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1262 cache_res = func(test_string)
edf3e38e 1263 cache_spec = [ord(c) for c in cache_res]
78caa52a 1264 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1265 signature_id_tuple = '(%s)' % (
1266 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1267 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1268 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1269 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1270
e0df6211
PH
1271 def _parse_sig_js(self, jscode):
1272 funcname = self._search_regex(
abefc03f
S
1273 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1274 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1275 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1276 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1277 # Obsolete patterns
1278 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1279 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1280 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1281 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1283 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1284 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1285 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1286 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1287
1288 jsi = JSInterpreter(jscode)
1289 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1290 return lambda s: initial_function([s])
1291
1292 def _parse_sig_swf(self, file_contents):
54256267 1293 swfi = SWFInterpreter(file_contents)
78caa52a 1294 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1295 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1296 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1297 return lambda s: initial_function([s])
1298
83799698 1299 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1300 """Turn the encrypted s field into a working signature"""
6b37f0be 1301
c8bf86d5 1302 if player_url is None:
69ea8ca4 1303 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1304
69ea8ca4 1305 if player_url.startswith('//'):
78caa52a 1306 player_url = 'https:' + player_url
3c90cc8b
S
1307 elif not re.match(r'https?://', player_url):
1308 player_url = compat_urlparse.urljoin(
1309 'https://www.youtube.com', player_url)
c8bf86d5 1310 try:
62af3a0e 1311 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1312 if player_id not in self._player_cache:
1313 func = self._extract_signature_function(
60064c53 1314 video_id, player_url, s
c8bf86d5
PH
1315 )
1316 self._player_cache[player_id] = func
1317 func = self._player_cache[player_id]
1318 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1319 self._print_sig_code(func, s)
c8bf86d5
PH
1320 return func(s)
1321 except Exception as e:
1322 tb = traceback.format_exc()
1323 raise ExtractorError(
78caa52a 1324 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1325
f96f5dda 1326 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1327 try:
60e47a26 1328 subs_doc = self._download_xml(
38c2e5b8 1329 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1330 video_id, note=False)
1331 except ExtractorError as err:
9b9c5355 1332 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1333 return {}
de7f3446
JMF
1334
1335 sub_lang_list = {}
60e47a26
JMF
1336 for track in subs_doc.findall('track'):
1337 lang = track.attrib['lang_code']
7e660ac1
LD
1338 if lang in sub_lang_list:
1339 continue
360e1ca5 1340 sub_formats = []
23d17e4b 1341 for ext in self._SUBTITLE_FORMATS:
15707c7e 1342 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1343 'lang': lang,
1344 'v': video_id,
1345 'fmt': ext,
1346 'name': track.attrib['name'].encode('utf-8'),
1347 })
1348 sub_formats.append({
1349 'url': 'https://www.youtube.com/api/timedtext?' + params,
1350 'ext': ext,
1351 })
1352 sub_lang_list[lang] = sub_formats
9f448fcb 1353 if has_live_chat_replay:
321bf820 1354 sub_lang_list['live_chat'] = [
1355 {
1356 'video_id': video_id,
1357 'ext': 'json',
1358 'protocol': 'youtube_live_chat_replay',
1359 },
9f448fcb 1360 ]
de7f3446 1361 if not sub_lang_list:
69ea8ca4 1362 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1363 return {}
1364 return sub_lang_list
1365
a72778d3
S
1366 def _get_ytplayer_config(self, video_id, webpage):
1367 patterns = (
526b3b07
S
1368 # User data may contain arbitrary character sequences that may affect
1369 # JSON extraction with regex, e.g. when '};' is contained the second
1370 # regex won't capture the whole JSON. Yet working around by trying more
1371 # concrete regex first keeping in mind proper quoted string handling
1372 # to be implemented in future that will replace this workaround (see
067aa17e
S
1373 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1374 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1375 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1376 r';ytplayer\.config\s*=\s*({.+?});',
1377 )
1378 config = self._search_regex(
1379 patterns, webpage, 'ytplayer.config', default=None)
1380 if config:
1381 return self._parse_json(
1382 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1383
29f7c58a 1384 def _get_automatic_captions(self, video_id, player_response, player_config):
de7f3446
JMF
1385 """We need the webpage for getting the captions url, pass it as an
1386 argument to speed up the process."""
69ea8ca4 1387 self.to_screen('%s: Looking for automatic captions' % video_id)
78caa52a 1388 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
29f7c58a 1389 if not (player_response or player_config):
de7f3446
JMF
1390 self._downloader.report_warning(err_msg)
1391 return {}
de7f3446 1392 try:
29f7c58a 1393 args = player_config.get('args') if player_config else {}
8bdd16b4 1394 caption_url = args.get('ttsurl')
1395 if caption_url:
b78b292f
S
1396 timestamp = args['timestamp']
1397 # We get the available subtitles
15707c7e 1398 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1399 'type': 'list',
1400 'tlangs': 1,
1401 'asrs': 1,
1402 })
1403 list_url = caption_url + '&' + list_params
1404 caption_list = self._download_xml(list_url, video_id)
1405 original_lang_node = caption_list.find('track')
1406 if original_lang_node is None:
1407 self._downloader.report_warning('Video doesn\'t have automatic captions')
1408 return {}
1409 original_lang = original_lang_node.attrib['lang_code']
1410 caption_kind = original_lang_node.attrib.get('kind', '')
1411
1412 sub_lang_list = {}
1413 for lang_node in caption_list.findall('target'):
1414 sub_lang = lang_node.attrib['lang_code']
1415 sub_formats = []
1416 for ext in self._SUBTITLE_FORMATS:
15707c7e 1417 params = compat_urllib_parse_urlencode({
b78b292f
S
1418 'lang': original_lang,
1419 'tlang': sub_lang,
1420 'fmt': ext,
1421 'ts': timestamp,
1422 'kind': caption_kind,
1423 })
1424 sub_formats.append({
1425 'url': caption_url + '&' + params,
1426 'ext': ext,
1427 })
1428 sub_lang_list[sub_lang] = sub_formats
1429 return sub_lang_list
1430
ddbb4c5c
S
1431 def make_captions(sub_url, sub_langs):
1432 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1433 caption_qs = compat_parse_qs(parsed_sub_url.query)
1434 captions = {}
1435 for sub_lang in sub_langs:
1436 sub_formats = []
1437 for ext in self._SUBTITLE_FORMATS:
1438 caption_qs.update({
1439 'tlang': [sub_lang],
1440 'fmt': [ext],
1441 })
1442 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1443 query=compat_urllib_parse_urlencode(caption_qs, True)))
1444 sub_formats.append({
1445 'url': sub_url,
1446 'ext': ext,
1447 })
1448 captions[sub_lang] = sub_formats
1449 return captions
1450
1451 # New captions format as of 22.06.2017
29f7c58a 1452 if player_response:
1453 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1454 base_url = renderer['captionTracks'][0]['baseUrl']
1455 sub_lang_list = []
1456 for lang in renderer['translationLanguages']:
1457 lang_code = lang.get('languageCode')
1458 if lang_code:
1459 sub_lang_list.append(lang_code)
1460 return make_captions(base_url, sub_lang_list)
59c5fa91 1461
8bdd16b4 1462 # Some videos don't provide ttsurl but rather caption_tracks and
1463 # caption_translation_languages (e.g. 20LmZk1hakA)
1464 # Does not used anymore as of 22.06.2017
1465 caption_tracks = args['caption_tracks']
1466 caption_translation_languages = args['caption_translation_languages']
1467 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1468 sub_lang_list = []
1469 for lang in caption_translation_languages.split(','):
1470 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1471 sub_lang = lang_qs.get('lc', [None])[0]
1472 if sub_lang:
1473 sub_lang_list.append(sub_lang)
1474 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1475 # An extractor error can be raise by the download process if there are
1476 # no automatic captions but there are subtitles
ddbb4c5c 1477 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1478 self._downloader.report_warning(err_msg)
1479 return {}
1480
21c340b8
S
1481 def _mark_watched(self, video_id, video_info, player_response):
1482 playback_url = url_or_none(try_get(
1483 player_response,
1484 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1485 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1486 if not playback_url:
1487 return
1488 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1489 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1490
1491 # cpn generation algorithm is reverse engineered from base.js.
1492 # In fact it works even with dummy cpn.
1493 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1494 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1495
1496 qs.update({
1497 'ver': ['2'],
1498 'cpn': [cpn],
1499 })
1500 playback_url = compat_urlparse.urlunparse(
15707c7e 1501 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1502
1503 self._download_webpage(
1504 playback_url, video_id, 'Marking watched',
1505 'Unable to mark watched', fatal=False)
1506
66c9fa36
S
1507 @staticmethod
1508 def _extract_urls(webpage):
1509 # Embedded YouTube player
1510 entries = [
1511 unescapeHTML(mobj.group('url'))
1512 for mobj in re.finditer(r'''(?x)
1513 (?:
1514 <iframe[^>]+?src=|
1515 data-video-url=|
1516 <embed[^>]+?src=|
1517 embedSWF\(?:\s*|
1518 <object[^>]+data=|
1519 new\s+SWFObject\(
1520 )
1521 (["\'])
1522 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1523 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1524 \1''', webpage)]
1525
1526 # lazyYT YouTube embed
1527 entries.extend(list(map(
1528 unescapeHTML,
1529 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1530
1531 # Wordpress "YouTube Video Importer" plugin
1532 matches = re.findall(r'''(?x)<div[^>]+
1533 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1534 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1535 entries.extend(m[-1] for m in matches)
1536
1537 return entries
1538
1539 @staticmethod
1540 def _extract_url(webpage):
1541 urls = YoutubeIE._extract_urls(webpage)
1542 return urls[0] if urls else None
1543
97665381
PH
1544 @classmethod
1545 def extract_id(cls, url):
1546 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1547 if mobj is None:
69ea8ca4 1548 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1549 video_id = mobj.group(2)
1550 return video_id
1551
84213ea8
S
1552 def _extract_chapters_from_json(self, webpage, video_id, duration):
1553 if not webpage:
1554 return
8bdd16b4 1555 data = self._extract_yt_initial_data(video_id, webpage)
1556 if not data or not isinstance(data, dict):
84213ea8
S
1557 return
1558 chapters_list = try_get(
8bdd16b4 1559 data,
84213ea8
S
1560 lambda x: x['playerOverlays']
1561 ['playerOverlayRenderer']
1562 ['decoratedPlayerBarRenderer']
1563 ['decoratedPlayerBarRenderer']
1564 ['playerBar']
1565 ['chapteredPlayerBarRenderer']
1566 ['chapters'],
1567 list)
1568 if not chapters_list:
1569 return
1570
1571 def chapter_time(chapter):
1572 return float_or_none(
1573 try_get(
1574 chapter,
1575 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1576 int),
1577 scale=1000)
1578 chapters = []
1579 for next_num, chapter in enumerate(chapters_list, start=1):
1580 start_time = chapter_time(chapter)
1581 if start_time is None:
1582 continue
1583 end_time = (chapter_time(chapters_list[next_num])
1584 if next_num < len(chapters_list) else duration)
1585 if end_time is None:
1586 continue
1587 title = try_get(
1588 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1589 compat_str)
1590 chapters.append({
1591 'start_time': start_time,
1592 'end_time': end_time,
1593 'title': title,
1594 })
1595 return chapters
1596
9cafc3fd 1597 @staticmethod
84213ea8 1598 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1599 if not description:
1600 return None
1601 chapter_lines = re.findall(
1602 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1603 description)
1604 if not chapter_lines:
1605 return None
1606 chapters = []
1607 for next_num, (chapter_line, time_point) in enumerate(
1608 chapter_lines, start=1):
1609 start_time = parse_duration(time_point)
1610 if start_time is None:
1611 continue
39d4c1be
S
1612 if start_time > duration:
1613 break
9cafc3fd
S
1614 end_time = (duration if next_num == len(chapter_lines)
1615 else parse_duration(chapter_lines[next_num][1]))
1616 if end_time is None:
1617 continue
39d4c1be
S
1618 if end_time > duration:
1619 end_time = duration
1620 if start_time > end_time:
1621 break
9cafc3fd
S
1622 chapter_title = re.sub(
1623 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1624 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1625 chapters.append({
1626 'start_time': start_time,
1627 'end_time': end_time,
1628 'title': chapter_title,
1629 })
1630 return chapters
1631
84213ea8
S
1632 def _extract_chapters(self, webpage, description, video_id, duration):
1633 return (self._extract_chapters_from_json(webpage, video_id, duration)
1634 or self._extract_chapters_from_description(description, duration))
1635
c5e8d7af 1636 def _real_extract(self, url):
cf7e015f
S
1637 url, smuggled_data = unsmuggle_url(url, {})
1638
7e8c0af0 1639 proto = (
78caa52a
PH
1640 'http' if self._downloader.params.get('prefer_insecure', False)
1641 else 'https')
7e8c0af0 1642
7c80519c 1643 start_time = None
297a564b 1644 end_time = None
7c80519c
JMF
1645 parsed_url = compat_urllib_parse_urlparse(url)
1646 for component in [parsed_url.fragment, parsed_url.query]:
1647 query = compat_parse_qs(component)
297a564b 1648 if start_time is None and 't' in query:
7c80519c 1649 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1650 if start_time is None and 'start' in query:
1651 start_time = parse_duration(query['start'][0])
297a564b
JMF
1652 if end_time is None and 'end' in query:
1653 end_time = parse_duration(query['end'][0])
7c80519c 1654
c5e8d7af
PH
1655 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1656 mobj = re.search(self._NEXT_URL_RE, url)
1657 if mobj:
7fd002c0 1658 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1659 video_id = self.extract_id(url)
c5e8d7af
PH
1660
1661 # Get video webpage
aa79ac0c 1662 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1663 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1664
1665 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1666 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1667
1668 # Attempt to extract SWF player URL
e0df6211 1669 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1670 if mobj is not None:
1671 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1672 else:
1673 player_url = None
1674
d8d24a92
S
1675 dash_mpds = []
1676
1677 def add_dash_mpd(video_info):
1678 dash_mpd = video_info.get('dashmpd')
1679 if dash_mpd and dash_mpd[0] not in dash_mpds:
1680 dash_mpds.append(dash_mpd[0])
1681
561b456e
S
1682 def add_dash_mpd_pr(pl_response):
1683 dash_mpd = url_or_none(try_get(
1684 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1685 compat_str))
1686 if dash_mpd and dash_mpd not in dash_mpds:
1687 dash_mpds.append(dash_mpd)
1688
c7121fa7
S
1689 is_live = None
1690 view_count = None
1691
1692 def extract_view_count(v_info):
1693 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1694
c2d125d9
S
1695 def extract_player_response(player_response, video_id):
1696 pl_response = str_or_none(player_response)
1697 if not pl_response:
1698 return
1699 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1700 if isinstance(pl_response, dict):
1701 add_dash_mpd_pr(pl_response)
1702 return pl_response
1703
fb2c9277
U
1704 def extract_embedded_config(embed_webpage, video_id):
1705 embedded_config = self._search_regex(
1706 r'setConfig\(({.*})\);',
1707 embed_webpage, 'ytInitialData', default=None)
1708 if embedded_config:
1709 return embedded_config
1710
62d80ba1 1711 video_info = {}
dbdaaa23 1712 player_response = {}
62d80ba1 1713 ytplayer_config = None
1714 embed_webpage = None
dbdaaa23 1715
c5e8d7af 1716 # Get video info
39e7107d
U
1717 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1718 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1719 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1720 age_gate = True
1721 # We simulate the access to the video from www.youtube.com/v/{video_id}
1722 # this can be viewed without login into Youtube
beb95e77
CL
1723 url = proto + '://www.youtube.com/embed/%s' % video_id
1724 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
fb2c9277
U
1725 ext = extract_embedded_config(embed_webpage, video_id)
1726 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1727 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1728 if not playable_in_embed:
1729 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1730 playable_in_embed = ''
1731 else:
1732 playable_in_embed = playable_in_embed.group('playableinEmbed')
1733 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1734 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1735 if playable_in_embed == 'false':
c73baf23
U
1736 '''
1737 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1738 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1739 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1740 '''
1741 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1742 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1743 age_gate = False
1744 # Try looking directly into the video webpage
1745 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1746 if ytplayer_config:
59c5fa91
PO
1747 args = ytplayer_config.get("args")
1748 if args is not None:
1749 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1750 # Convert to the same format returned by compat_parse_qs
1751 video_info = dict((k, [v]) for k, v in args.items())
1752 add_dash_mpd(video_info)
1753 # Rental video is not rented but preview is available (e.g.
1754 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1755 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1756 if not video_info and args.get('ypc_vid'):
1757 return self.url_result(
1758 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1759 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1760 is_live = True
1761 if not player_response:
1762 player_response = extract_player_response(args.get('player_response'), video_id)
1763 elif not player_response:
1764 player_response = ytplayer_config
4bb9c880
U
1765 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1766 add_dash_mpd_pr(player_response)
9d9314cb
U
1767 else:
1768 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1769 else:
1770 data = compat_urllib_parse_urlencode({
1771 'video_id': video_id,
1772 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1773 'sts': self._search_regex(
1774 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1775 })
1776 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1777 try:
1778 video_info_webpage = self._download_webpage(
1779 video_info_url, video_id,
1780 note='Refetching age-gated info webpage',
1781 errnote='unable to download video info webpage')
1782 except ExtractorError:
1783 video_info_webpage = None
1784 if video_info_webpage:
1785 video_info = compat_parse_qs(video_info_webpage)
1786 pl_response = video_info.get('player_response', [None])[0]
1787 player_response = extract_player_response(pl_response, video_id)
1788 add_dash_mpd(video_info)
1789 view_count = extract_view_count(video_info)
c108eb73
JMF
1790 else:
1791 age_gate = False
d8d24a92 1792 # Try looking directly into the video webpage
a72778d3 1793 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
8bdd16b4 1794 if ytplayer_config:
1795 args = ytplayer_config.get('args', {})
4c76aa06 1796 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1797 # Convert to the same format returned by compat_parse_qs
1798 video_info = dict((k, [v]) for k, v in args.items())
1799 add_dash_mpd(video_info)
6496ccb4
S
1800 # Rental video is not rented but preview is available (e.g.
1801 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1802 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1803 if not video_info and args.get('ypc_vid'):
1804 return self.url_result(
1805 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1806 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1807 is_live = True
dbdaaa23 1808 if not player_response:
c2d125d9 1809 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1810 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1811 add_dash_mpd_pr(player_response)
bbb7c3f7 1812
8bdd16b4 1813 if not video_info and not player_response:
1814 player_response = extract_player_response(
1815 self._search_regex(
29f7c58a 1816 (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1817 self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
8bdd16b4 1818 'initial player response', default='{}'),
1819 video_id)
1820
bbb7c3f7 1821 def extract_unavailable_message():
0add33ab
S
1822 messages = []
1823 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1824 msg = self._html_search_regex(
1825 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1826 video_webpage, 'unavailable %s' % kind, default=None)
1827 if msg:
1828 messages.append(msg)
1829 if messages:
1830 return '\n'.join(messages)
bbb7c3f7 1831
f93abcf1 1832 if not video_info and not player_response:
15be3eb5
RA
1833 unavailable_message = extract_unavailable_message()
1834 if not unavailable_message:
1835 unavailable_message = 'Unable to extract video data'
1836 raise ExtractorError(
1837 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1838
f93abcf1
S
1839 if not isinstance(video_info, dict):
1840 video_info = {}
1841
5ac23244 1842 playable_in_embed = try_get(
1843 player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1844
dbdaaa23
S
1845 video_details = try_get(
1846 player_response, lambda x: x['videoDetails'], dict) or {}
1847
37357d21
S
1848 microformat = try_get(
1849 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1850
8dbf751a
RA
1851 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1852 if not video_title:
cf7e015f
S
1853 self._downloader.report_warning('Unable to extract video title')
1854 video_title = '_'
1855
9cafc3fd 1856 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1857 if video_description:
fa4bc6e7
RA
1858
1859 def replace_url(m):
1860 redir_url = compat_urlparse.urljoin(url, m.group(1))
1861 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1862 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1863 qs = compat_parse_qs(parsed_redir_url.query)
1864 q = qs.get('q')
1865 if q and q[0]:
1866 return q[0]
1867 return redir_url
1868
9cafc3fd 1869 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1870 <a\s+
25cb7a0e 1871 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1872 (?:title|href)="([^"]+)"\s+
25cb7a0e 1873 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1874 class="[^"]*"[^>]*>
23f13e97 1875 [^<]+\.{3}\s*
cf7e015f 1876 </a>
fa4bc6e7 1877 ''', replace_url, video_description)
cf7e015f
S
1878 video_description = clean_html(video_description)
1879 else:
ea74e00b
DP
1880 video_description = video_details.get('shortDescription')
1881 if video_description is None:
1882 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 1883
8fe10494 1884 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1885 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1886 multifeed_metadata_list = try_get(
1887 player_response,
1888 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1889 compat_str) or try_get(
1890 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1891 if multifeed_metadata_list:
1892 entries = []
1893 feed_ids = []
1894 for feed in multifeed_metadata_list.split(','):
1895 # Unquote should take place before split on comma (,) since textual
1896 # fields may contain comma as well (see
067aa17e 1897 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1898 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1899
1900 def feed_entry(name):
1901 return try_get(feed_data, lambda x: x[name][0], compat_str)
1902
1903 feed_id = feed_entry('id')
1904 if not feed_id:
1905 continue
1906 feed_title = feed_entry('title')
1907 title = video_title
1908 if feed_title:
1909 title += ' (%s)' % feed_title
8fe10494
S
1910 entries.append({
1911 '_type': 'url_transparent',
1912 'ie_key': 'Youtube',
1913 'url': smuggle_url(
1914 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1915 {'force_singlefeed': True}),
6b09401b 1916 'title': title,
8fe10494 1917 })
6b09401b 1918 feed_ids.append(feed_id)
8fe10494
S
1919 self.to_screen(
1920 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1921 % (', '.join(feed_ids), video_id))
1922 return self.playlist_result(entries, video_id, video_title, video_description)
1923 else:
1924 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1925
c7121fa7 1926 if view_count is None:
1c9c8de2 1927 view_count = extract_view_count(video_info)
dbdaaa23
S
1928 if view_count is None and video_details:
1929 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
1930 if view_count is None and microformat:
1931 view_count = int_or_none(microformat.get('viewCount'))
1d699755 1932
27019dbb 1933 if is_live is None:
898238e9 1934 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1935
321bf820 1936 has_live_chat_replay = False
f0f76a33 1937 if not is_live:
82e3f6eb 1938 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
321bf820 1939 try:
1940 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1941 has_live_chat_replay = True
f0f76a33 1942 except (KeyError, IndexError, TypeError):
321bf820 1943 pass
1944
c5e8d7af
PH
1945 # Check for "rental" videos
1946 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1947 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1948
c63ca0ee
S
1949 def _extract_filesize(media_url):
1950 return int_or_none(self._search_regex(
1951 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1952
bf1317d2
S
1953 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1954 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1955
c5e8d7af
PH
1956 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1957 self.report_rtmp_download()
dd27fd17
PH
1958 formats = [{
1959 'format_id': '_rtmp',
1960 'protocol': 'rtmp',
1961 'url': video_info['conn'][0],
1962 'player_url': player_url,
1963 }]
bf1317d2 1964 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1965 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1966 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1967 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1968 formats = []
3318832e 1969 formats_spec = {}
82156fdb 1970 fmt_list = video_info.get('fmt_list', [''])[0]
1971 if fmt_list:
1972 for fmt in fmt_list.split(','):
1973 spec = fmt.split('/')
3318832e 1974 if len(spec) > 1:
1975 width_height = spec[1].split('x')
1976 if len(width_height) == 2:
1977 formats_spec[spec[0]] = {
1978 'resolution': spec[1],
1979 'width': int_or_none(width_height[0]),
1980 'height': int_or_none(width_height[1]),
1981 }
bf1317d2
S
1982 for fmt in streaming_formats:
1983 itag = str_or_none(fmt.get('itag'))
1984 if not itag:
201e9eaa 1985 continue
bf1317d2
S
1986 quality = fmt.get('quality')
1987 quality_label = fmt.get('qualityLabel') or quality
1988 formats_spec[itag] = {
1989 'asr': int_or_none(fmt.get('audioSampleRate')),
1990 'filesize': int_or_none(fmt.get('contentLength')),
1991 'format_note': quality_label,
1992 'fps': int_or_none(fmt.get('fps')),
1993 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1994 # bitrate for itag 43 is always 2147483647
1995 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1996 'width': int_or_none(fmt.get('width')),
1997 }
1998
1999 for fmt in streaming_formats:
00eb865b 2000 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2001 continue
2002 url = url_or_none(fmt.get('url'))
2003
2004 if not url:
fa3db383 2005 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2006 if not cipher:
2007 continue
2008 url_data = compat_parse_qs(cipher)
2009 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2010 if not url:
2011 continue
2012 else:
2013 cipher = None
2014 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2015
2f483bc1
S
2016 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2017 # Unsupported FORMAT_STREAM_TYPE_OTF
2018 if stream_type == 3:
2019 continue
6449cd80 2020
bf1317d2
S
2021 format_id = fmt.get('itag') or url_data['itag'][0]
2022 if not format_id:
2023 continue
2024 format_id = compat_str(format_id)
a49eccdf 2025
bf1317d2
S
2026 if cipher:
2027 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
8bdd16b4 2028 ASSETS_RE = (
2029 r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2030 r'"jsUrl"\s*:\s*("[^"]+")',
2031 r'"assets":.+?"js":\s*("[^"]+")')
bf1317d2
S
2032 jsplayer_url_json = self._search_regex(
2033 ASSETS_RE,
2034 embed_webpage if age_gate else video_webpage,
2035 'JS player URL (1)', default=None)
2036 if not jsplayer_url_json and not age_gate:
2037 # We need the embed website after all
2038 if embed_webpage is None:
2039 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2040 embed_webpage = self._download_webpage(
2041 embed_url, video_id, 'Downloading embed webpage')
2042 jsplayer_url_json = self._search_regex(
2043 ASSETS_RE, embed_webpage, 'JS player URL')
2044
2045 player_url = json.loads(jsplayer_url_json)
cf010131 2046 if player_url is None:
bf1317d2
S
2047 player_url_json = self._search_regex(
2048 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2049 video_webpage, 'age gate player URL')
2050 player_url = json.loads(player_url_json)
2051
2052 if 'sig' in url_data:
2053 url += '&signature=' + url_data['sig'][0]
2054 elif 's' in url_data:
2055 encrypted_sig = url_data['s'][0]
2056
2057 if self._downloader.params.get('verbose'):
2058 if player_url is None:
bf1317d2 2059 player_desc = 'unknown'
cf010131 2060 else:
e40c758c
S
2061 player_type, player_version = self._extract_player_info(player_url)
2062 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2063 parts_sizes = self._signature_cache_id(encrypted_sig)
2064 self.to_screen('{%s} signature length %s, %s' %
2065 (format_id, parts_sizes, player_desc))
2066
2067 signature = self._decrypt_signature(
2068 encrypted_sig, video_id, player_url, age_gate)
2069 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2070 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2071 if 'ratebypass' not in url:
2072 url += '&ratebypass=yes'
c9afb51c 2073
94278f72
YCH
2074 dct = {
2075 'format_id': format_id,
2076 'url': url,
2077 'player_url': player_url,
2078 }
2079 if format_id in self._formats:
2080 dct.update(self._formats[format_id])
3318832e 2081 if format_id in formats_spec:
2082 dct.update(formats_spec[format_id])
94278f72 2083
aabc2be6 2084 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2085 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2086 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2087 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2088 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2089
bf1317d2
S
2090 if width is None:
2091 width = int_or_none(fmt.get('width'))
2092 if height is None:
2093 height = int_or_none(fmt.get('height'))
2094
c63ca0ee
S
2095 filesize = int_or_none(url_data.get(
2096 'clen', [None])[0]) or _extract_filesize(url)
2097
bf1317d2
S
2098 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2099 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2100
4878759f
S
2101 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2102 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2103 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2104
94278f72 2105 more_fields = {
c63ca0ee 2106 'filesize': filesize,
bf1317d2 2107 'tbr': tbr,
c9afb51c
AH
2108 'width': width,
2109 'height': height,
bf1317d2
S
2110 'fps': fps,
2111 'format_note': quality_label or quality,
c9afb51c 2112 }
94278f72
YCH
2113 for key, value in more_fields.items():
2114 if value:
2115 dct[key] = value
bf1317d2 2116 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2117 if type_:
2118 type_split = type_.split(';')
2119 kind_ext = type_split[0].split('/')
2120 if len(kind_ext) == 2:
94278f72
YCH
2121 kind, _ = kind_ext
2122 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2123 if kind in ('audio', 'video'):
2124 codecs = None
2125 for mobj in re.finditer(
2126 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2127 if mobj.group('key') == 'codecs':
2128 codecs = mobj.group('val')
2129 break
2130 if codecs:
6310acf5 2131 dct.update(parse_codecs(codecs))
e4a60912
S
2132 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2133 dct['downloader_options'] = {
2134 # Youtube throttles chunks >~10M
2135 'http_chunk_size': 10485760,
2136 }
aabc2be6 2137 formats.append(dct)
c5e8d7af 2138 else:
c3e54389
S
2139 manifest_url = (
2140 url_or_none(try_get(
2141 player_response,
2142 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2143 compat_str))
2144 or url_or_none(try_get(
c3e54389
S
2145 video_info, lambda x: x['hlsvp'][0], compat_str)))
2146 if manifest_url:
2147 formats = []
2148 m3u8_formats = self._extract_m3u8_formats(
2149 manifest_url, video_id, 'mp4', fatal=False)
2150 for a_format in m3u8_formats:
2151 itag = self._search_regex(
2152 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2153 if itag:
2154 a_format['format_id'] = itag
2155 if itag in self._formats:
2156 dct = self._formats[itag].copy()
2157 dct.update(a_format)
2158 a_format = dct
2159 a_format['player_url'] = player_url
2160 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2161 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2162 if self._downloader.params.get('youtube_include_hls_manifest', True):
2163 formats.append(a_format)
c3e54389 2164 else:
13577349 2165 error_message = extract_unavailable_message()
a0566bbf 2166 if not error_message:
2167 reason_list = try_get(
2168 player_response,
2169 lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2170 list) or []
2171 for reason in reason_list:
2172 if not isinstance(reason, dict):
2173 continue
2174 reason_text = try_get(reason, lambda x: x['text'], compat_str)
2175 if reason_text:
2176 if not error_message:
2177 error_message = ''
2178 error_message += reason_text
2179 if error_message:
2180 error_message = clean_html(error_message)
c3e54389 2181 if not error_message:
13577349
S
2182 error_message = clean_html(try_get(
2183 player_response, lambda x: x['playabilityStatus']['reason'],
2184 compat_str))
2185 if not error_message:
2186 error_message = clean_html(
2187 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2188 if error_message:
2189 raise ExtractorError(error_message, expected=True)
2190 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2191
7e72694b 2192 # uploader
dbdaaa23
S
2193 video_uploader = try_get(
2194 video_info, lambda x: x['author'][0],
2195 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2196 if video_uploader:
2197 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2198 else:
2199 self._downloader.report_warning('unable to extract uploader name')
2200
2201 # uploader_id
2202 video_uploader_id = None
2203 video_uploader_url = None
2204 mobj = re.search(
2205 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2206 video_webpage)
2207 if mobj is not None:
2208 video_uploader_id = mobj.group('uploader_id')
2209 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2210 else:
2211 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2212 if owner_profile_url:
2213 video_uploader_id = self._search_regex(
2214 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2215 default=None)
2216 video_uploader_url = owner_profile_url
7e72694b 2217
b45a9e69 2218 channel_id = (
3089bc74
S
2219 str_or_none(video_details.get('channelId'))
2220 or self._html_search_meta(
2221 'channelId', video_webpage, 'channel id', default=None)
2222 or self._search_regex(
b45a9e69 2223 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2224 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2225 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2226
b477fc13
S
2227 thumbnails = []
2228 thumbnails_list = try_get(
2229 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2230 for t in thumbnails_list:
2231 if not isinstance(t, dict):
2232 continue
2233 thumbnail_url = url_or_none(t.get('url'))
2234 if not thumbnail_url:
2235 continue
2236 thumbnails.append({
2237 'url': thumbnail_url,
2238 'width': int_or_none(t.get('width')),
2239 'height': int_or_none(t.get('height')),
2240 })
2241
2242 if not thumbnails:
7e72694b 2243 video_thumbnail = None
b477fc13
S
2244 # We try first to get a high quality image:
2245 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2246 video_webpage, re.DOTALL)
2247 if m_thumb is not None:
2248 video_thumbnail = m_thumb.group(1)
2249 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2250 if thumbnail_url:
2251 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2252 if video_thumbnail:
2253 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2254
2255 # upload date
2256 upload_date = self._html_search_meta(
2257 'datePublished', video_webpage, 'upload date', default=None)
2258 if not upload_date:
2259 upload_date = self._search_regex(
2260 [r'(?s)id="eow-date.*?>(.*?)</span>',
2261 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2262 video_webpage, 'upload date', default=None)
37357d21
S
2263 if not upload_date:
2264 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2265 upload_date = unified_strdate(upload_date)
2266
2267 video_license = self._html_search_regex(
2268 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2269 video_webpage, 'license', default=None)
2270
2271 m_music = re.search(
2272 r'''(?x)
2273 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2274 <ul[^>]*>\s*
2275 <li>(?P<title>.+?)
2276 by (?P<creator>.+?)
2277 (?:
2278 \(.+?\)|
2279 <a[^>]*
2280 (?:
2281 \bhref=["\']/red[^>]*>| # drop possible
2282 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2283 )
2284 .*?
2285 )?</li
2286 ''',
2287 video_webpage)
2288 if m_music:
2289 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2290 video_creator = clean_html(m_music.group('creator'))
2291 else:
2292 video_alt_title = video_creator = None
2293
2294 def extract_meta(field):
2295 return self._html_search_regex(
2296 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2297 video_webpage, field, default=None)
2298
2299 track = extract_meta('Song')
2300 artist = extract_meta('Artist')
92bc97d3 2301 album = extract_meta('Album')
822b9d9c
RA
2302
2303 # Youtube Music Auto-generated description
92bc97d3 2304 release_date = release_year = None
822b9d9c 2305 if video_description:
38d70284 2306 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c
RA
2307 if mobj:
2308 if not track:
2309 track = mobj.group('track').strip()
2310 if not artist:
2311 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2312 if not album:
2313 album = mobj.group('album'.strip())
822b9d9c
RA
2314 release_year = mobj.group('release_year')
2315 release_date = mobj.group('release_date')
2316 if release_date:
2317 release_date = release_date.replace('-', '')
2318 if not release_year:
2319 release_year = int(release_date[:4])
2320 if release_year:
2321 release_year = int(release_year)
7e72694b 2322
38d70284 2323 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2324 contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2325 for content in contents:
2326 rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2327 multiple_songs = False
2328 for row in rows:
2329 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2330 multiple_songs = True
2331 break
2332 for row in rows:
2333 mrr = row.get('metadataRowRenderer') or {}
2334 mrr_title = try_get(
2335 mrr, lambda x: x['title']['simpleText'], compat_str)
2336 mrr_contents = try_get(
2337 mrr, lambda x: x['contents'][0], dict) or {}
2338 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2339 if not (mrr_title and mrr_contents_text):
2340 continue
2341 if mrr_title == 'License':
2342 video_license = mrr_contents_text
2343 elif not multiple_songs:
2344 if mrr_title == 'Album':
2345 album = mrr_contents_text
2346 elif mrr_title == 'Artist':
2347 artist = mrr_contents_text
2348 elif mrr_title == 'Song':
2349 track = mrr_contents_text
9322f116 2350
7e72694b
S
2351 m_episode = re.search(
2352 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2353 video_webpage)
2354 if m_episode:
c2dd2dc0 2355 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2356 season_number = int(m_episode.group('season'))
2357 episode_number = int(m_episode.group('episode'))
2358 else:
2359 series = season_number = episode_number = None
2360
2361 m_cat_container = self._search_regex(
2362 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2363 video_webpage, 'categories', default=None)
dbeafce5 2364 category = None
7e72694b
S
2365 if m_cat_container:
2366 category = self._html_search_regex(
2367 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2368 default=None)
dbeafce5
S
2369 if not category:
2370 category = try_get(
2371 microformat, lambda x: x['category'], compat_str)
2372 video_categories = None if category is None else [category]
7e72694b
S
2373
2374 video_tags = [
2375 unescapeHTML(m.group('content'))
2376 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2377 if not video_tags:
2378 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2379
2380 def _extract_count(count_name):
2381 return str_to_int(self._search_regex(
a0566bbf 2382 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2383 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
7e72694b
S
2384 video_webpage, count_name, default=None))
2385
2386 like_count = _extract_count('like')
2387 dislike_count = _extract_count('dislike')
2388
dbdaaa23
S
2389 if view_count is None:
2390 view_count = str_to_int(self._search_regex(
2391 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2392 'view count', default=None))
2393
bf3c9326
S
2394 average_rating = (
2395 float_or_none(video_details.get('averageRating'))
2396 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2397
7e72694b 2398 # subtitles
321bf820 2399 video_subtitles = self.extract_subtitles(
2400 video_id, video_webpage, has_live_chat_replay)
29f7c58a 2401 automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
7e72694b
S
2402
2403 video_duration = try_get(
2404 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2405 if not video_duration:
2406 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2407 if not video_duration:
2408 video_duration = parse_duration(self._html_search_meta(
2409 'duration', video_webpage, 'video duration'))
2410
b84071c0
JP
2411 # Get Subscriber Count of channel
2412 subscriber_count = parse_count(self._search_regex(
2413 r'"text":"([\d\.]+\w?) subscribers"',
2414 video_webpage,
2415 'subscriber count',
2416 default=None
2417 ))
2418
06167fbb 2419 # get xsrf for annotations or comments
2420 get_annotations = self._downloader.params.get('writeannotations', False)
2421 get_comments = self._downloader.params.get('getcomments', False)
2422 if get_annotations or get_comments:
29f7c58a 2423 xsrf_token = None
2424 ytcfg = self._extract_ytcfg(video_id, video_webpage)
2425 if ytcfg:
2426 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2427 if not xsrf_token:
2428 xsrf_token = self._search_regex(
2429 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2430 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2431
2432 # annotations
2433 video_annotations = None
2434 if get_annotations:
64b6a4e9
RA
2435 invideo_url = try_get(
2436 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2437 if xsrf_token and invideo_url:
29f7c58a 2438 xsrf_field_name = None
2439 if ytcfg:
2440 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2441 if not xsrf_field_name:
2442 xsrf_field_name = self._search_regex(
2443 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2444 video_webpage, 'xsrf field name',
2445 group='xsrf_field_name', default='session_token')
64b6a4e9
RA
2446 video_annotations = self._download_webpage(
2447 self._proto_relative_url(invideo_url),
2448 video_id, note='Downloading annotations',
2449 errnote='Unable to download video annotations', fatal=False,
2450 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2451
84213ea8 2452 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2453
06167fbb 2454 # Get comments
2455 # TODO: Refactor and move to seperate function
2456 if get_comments:
2457 expected_video_comment_count = 0
2458 video_comments = []
2459
2460 def find_value(html, key, num_chars=2, separator='"'):
2461 pos_begin = html.find(key) + len(key) + num_chars
2462 pos_end = html.find(separator, pos_begin)
2463 return html[pos_begin: pos_end]
2464
2465 def search_dict(partial, key):
2466 if isinstance(partial, dict):
2467 for k, v in partial.items():
2468 if k == key:
2469 yield v
2470 else:
2471 for o in search_dict(v, key):
2472 yield o
2473 elif isinstance(partial, list):
2474 for i in partial:
2475 for o in search_dict(i, key):
2476 yield o
2477
2478 try:
2479 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
8d0ea5f9 2480 continuations = [ncd['continuation']]
06167fbb 2481 # Handle videos where comments have been disabled entirely
2482 except StopIteration:
2483 continuations = []
2484
8d0ea5f9 2485 def get_continuation(continuation, session_token, replies=False):
06167fbb 2486 query = {
66c935fb 2487 'pbj': 1,
2488 'ctoken': continuation,
06167fbb 2489 }
2490 if replies:
2491 query['action_get_comment_replies'] = 1
2492 else:
2493 query['action_get_comments'] = 1
2494
2495 while True:
2496 content, handle = self._download_webpage_handle(
2497 'https://www.youtube.com/comment_service_ajax',
2498 video_id,
2499 note=False,
2500 expected_status=[413],
2501 data=urlencode_postdata({
2502 'session_token': session_token
2503 }),
2504 query=query,
2505 headers={
2506 'Accept': '*/*',
2507 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2508 'X-YouTube-Client-Name': '1',
2509 'X-YouTube-Client-Version': '2.20201202.06.01'
2510 }
2511 )
2512
2513 response_code = handle.getcode()
2514 if (response_code == 200):
2515 return self._parse_json(content, video_id)
8d0ea5f9 2516 if (response_code == 413):
06167fbb 2517 return None
2518 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2519
2520 first_continuation = True
2521 while continuations:
2522 continuation, itct = continuations.pop()
8d0ea5f9 2523 comment_response = get_continuation(continuation, xsrf_token)
06167fbb 2524 if not comment_response:
2525 continue
2526 if list(search_dict(comment_response, 'externalErrorMessage')):
2527 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2528
8d0ea5f9
B
2529 if 'continuationContents' not in comment_response['response']:
2530 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2531 continue
2532 # not sure if this actually helps
2533 if 'xsrf_token' in comment_response:
2534 xsrf_token = comment_response['xsrf_token']
2535
06167fbb 2536 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2537 if first_continuation:
2538 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2539 first_continuation = False
2540 if 'contents' not in item_section:
2541 # continuation returned no comments?
2542 # set an empty array as to not break the for loop
2543 item_section['contents'] = []
2544
2545 for meta_comment in item_section['contents']:
2546 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2547 video_comments.append({
2548 'id': comment['commentId'],
2549 'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
8d0ea5f9 2550 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2551 'author': comment.get('authorText', {}).get('simpleText', ''),
2552 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2553 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2554 'parent': 'root'
2555 })
2556 if 'replies' not in meta_comment['commentThreadRenderer']:
2557 continue
2558
8d0ea5f9
B
2559 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2560 while reply_continuations:
06167fbb 2561 time.sleep(1)
8d0ea5f9
B
2562 continuation = reply_continuations.pop()
2563 replies_data = get_continuation(continuation, xsrf_token, True)
06167fbb 2564 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2565 continue
06167fbb 2566
2567 if self._downloader.params.get('verbose', False):
2568 self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
2569 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
2570 for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
2571 reply_comment = reply_meta['commentRenderer']
2572 video_comments.append({
2573 'id': reply_comment['commentId'],
2574 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2575 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2576 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2577 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2578 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2579 'parent': comment['commentId']
2580 })
2581 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2582 continue
06167fbb 2583
8d0ea5f9 2584 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2585
2586 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2587
2588 if 'continuations' in item_section:
8d0ea5f9 2589 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2590 time.sleep(1)
2591
2592 self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2593 else:
2594 expected_video_comment_count = None
2595 video_comments = None
2596
dd27fd17 2597 # Look for the DASH manifest
203fb43f 2598 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2599 dash_mpd_fatal = True
8ff648e4 2600 for mpd_url in dash_mpds:
d8d24a92 2601 dash_formats = {}
774e208f 2602 try:
05d0d131
YCH
2603 def decrypt_sig(mobj):
2604 s = mobj.group(1)
2605 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2606 return '/signature/%s' % dec_s
2607
8ff648e4 2608 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2609
8ff648e4 2610 for df in self._extract_mpd_formats(
2611 mpd_url, video_id, fatal=dash_mpd_fatal,
2612 formats_dict=self._formats):
c63ca0ee
S
2613 if not df.get('filesize'):
2614 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2615 # Do not overwrite DASH format found in some previous DASH manifest
2616 if df['format_id'] not in dash_formats:
2617 dash_formats[df['format_id']] = df
77c6fb5b
S
2618 # Additional DASH manifests may end up in HTTP Error 403 therefore
2619 # allow them to fail without bug report message if we already have
2620 # some DASH manifest succeeded. This is temporary workaround to reduce
2621 # burst of bug reports until we figure out the reason and whether it
2622 # can be fixed at all.
2623 dash_mpd_fatal = False
774e208f
PH
2624 except (ExtractorError, KeyError) as e:
2625 self.report_warning(
2626 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2627 if dash_formats:
04b3b3df
JMF
2628 # Remove the formats we found through non-DASH, they
2629 # contain less info and it can be wrong, because we use
2630 # fixed values (for example the resolution). See
067aa17e 2631 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2632 # example.
d80265cc 2633 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2634 formats.extend(dash_formats.values())
d80044c2 2635
6271f1ca
PH
2636 # Check for malformed aspect ratio
2637 stretched_m = re.search(
2638 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2639 video_webpage)
2640 if stretched_m:
313dfc45
LL
2641 w = float(stretched_m.group('w'))
2642 h = float(stretched_m.group('h'))
5faf9fed
S
2643 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2644 # We will only process correct ratios.
313dfc45 2645 if w > 0 and h > 0:
41f24c32 2646 ratio = w / h
313dfc45
LL
2647 for f in formats:
2648 if f.get('vcodec') != 'none':
2649 f['stretched_ratio'] = ratio
6271f1ca 2650
026fbedc 2651 if not formats:
43ebf77d
S
2652 if 'reason' in video_info:
2653 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2654 regions_allowed = self._html_search_meta(
2655 'regionsAllowed', video_webpage, default=None)
2656 countries = regions_allowed.split(',') if regions_allowed else None
2657 self.raise_geo_restricted(
2658 msg=video_info['reason'][0], countries=countries)
2659 reason = video_info['reason'][0]
2660 if 'Invalid parameters' in reason:
2661 unavailable_message = extract_unavailable_message()
2662 if unavailable_message:
2663 reason = unavailable_message
2664 raise ExtractorError(
2665 'YouTube said: %s' % reason,
2666 expected=True, video_id=video_id)
2667 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2668 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2669
4bcc7bd1 2670 self._sort_formats(formats)
4ea3be0a 2671
21c340b8 2672 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2673
4ea3be0a 2674 return {
8bcc8756
JW
2675 'id': video_id,
2676 'uploader': video_uploader,
2677 'uploader_id': video_uploader_id,
fd050249 2678 'uploader_url': video_uploader_url,
b60419c5 2679 'channel': video_uploader,
dd4c4492
S
2680 'channel_id': channel_id,
2681 'channel_url': channel_url,
8bcc8756 2682 'upload_date': upload_date,
7caf9830 2683 'license': video_license,
936784b2 2684 'creator': video_creator or artist,
8bcc8756 2685 'title': video_title,
936784b2 2686 'alt_title': video_alt_title or track,
b477fc13 2687 'thumbnails': thumbnails,
8bcc8756
JW
2688 'description': video_description,
2689 'categories': video_categories,
000b6b5a 2690 'tags': video_tags,
8bcc8756 2691 'subtitles': video_subtitles,
360e1ca5 2692 'automatic_captions': automatic_captions,
8bcc8756
JW
2693 'duration': video_duration,
2694 'age_limit': 18 if age_gate else 0,
2695 'annotations': video_annotations,
9cafc3fd 2696 'chapters': chapters,
7e8c0af0 2697 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2698 'view_count': view_count,
4ea3be0a 2699 'like_count': like_count,
2700 'dislike_count': dislike_count,
bf3c9326 2701 'average_rating': average_rating,
8bcc8756 2702 'formats': formats,
2fe1ff85 2703 'is_live': is_live,
7c80519c 2704 'start_time': start_time,
297a564b 2705 'end_time': end_time,
12afdc2a
S
2706 'series': series,
2707 'season_number': season_number,
2708 'episode_number': episode_number,
936784b2
S
2709 'track': track,
2710 'artist': artist,
5caabd3c 2711 'album': album,
2712 'release_date': release_date,
2713 'release_year': release_year,
b84071c0 2714 'subscriber_count': subscriber_count,
5ac23244 2715 'playable_in_embed': playable_in_embed,
06167fbb 2716 'comments': video_comments,
2717 'comment_count': expected_video_comment_count,
4ea3be0a 2718 }
c5e8d7af 2719
5f6a1245 2720
8bdd16b4 2721class YoutubeTabIE(YoutubeBaseInfoExtractor):
2722 IE_DESC = 'YouTube.com tab'
70d5c17b 2723 _VALID_URL = r'''(?x)
2724 https?://
2725 (?:\w+\.)?
2726 (?:
2727 youtube(?:kids)?\.com|
2728 invidio\.us
2729 )/
2730 (?:
2731 (?:channel|c|user)/|
2732 (?P<not_channel>
3d3dddc9 2733 feed/|
70d5c17b 2734 (?:playlist|watch)\?.*?\blist=
2735 )|
29f7c58a 2736 (?!(?:%s)\b) # Direct URLs
70d5c17b 2737 )
2738 (?P<id>[^/?\#&]+)
2739 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2740 IE_NAME = 'youtube:tab'
2741
81127aa5 2742 _TESTS = [{
8bdd16b4 2743 # playlists, multipage
2744 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2745 'playlist_mincount': 94,
2746 'info_dict': {
2747 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2748 'title': 'Игорь Клейнер - Playlists',
2749 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2750 },
2751 }, {
2752 # playlists, multipage, different order
2753 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2754 'playlist_mincount': 94,
2755 'info_dict': {
2756 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2757 'title': 'Игорь Клейнер - Playlists',
2758 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2759 },
2760 }, {
2761 # playlists, singlepage
2762 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2763 'playlist_mincount': 4,
2764 'info_dict': {
2765 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2766 'title': 'ThirstForScience - Playlists',
2767 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2768 }
2769 }, {
2770 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2771 'only_matching': True,
2772 }, {
2773 # basic, single video playlist
0e30a7b9 2774 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2775 'info_dict': {
0e30a7b9 2776 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2777 'uploader': 'Sergey M.',
2778 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2779 'title': 'youtube-dl public playlist',
81127aa5 2780 },
0e30a7b9 2781 'playlist_count': 1,
9291475f 2782 }, {
8bdd16b4 2783 # empty playlist
0e30a7b9 2784 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2785 'info_dict': {
0e30a7b9 2786 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2787 'uploader': 'Sergey M.',
2788 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2789 'title': 'youtube-dl empty playlist',
9291475f
PH
2790 },
2791 'playlist_count': 0,
2792 }, {
8bdd16b4 2793 # Home tab
2794 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2795 'info_dict': {
8bdd16b4 2796 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2797 'title': 'lex will - Home',
2798 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2799 },
8bdd16b4 2800 'playlist_mincount': 2,
9291475f 2801 }, {
8bdd16b4 2802 # Videos tab
2803 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2804 'info_dict': {
8bdd16b4 2805 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2806 'title': 'lex will - Videos',
2807 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2808 },
8bdd16b4 2809 'playlist_mincount': 975,
9291475f 2810 }, {
8bdd16b4 2811 # Videos tab, sorted by popular
2812 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2813 'info_dict': {
8bdd16b4 2814 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2815 'title': 'lex will - Videos',
2816 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2817 },
8bdd16b4 2818 'playlist_mincount': 199,
9291475f 2819 }, {
8bdd16b4 2820 # Playlists tab
2821 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2822 'info_dict': {
8bdd16b4 2823 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2824 'title': 'lex will - Playlists',
2825 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2826 },
8bdd16b4 2827 'playlist_mincount': 17,
ac7553d0 2828 }, {
8bdd16b4 2829 # Community tab
2830 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2831 'info_dict': {
8bdd16b4 2832 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2833 'title': 'lex will - Community',
2834 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2835 },
2836 'playlist_mincount': 18,
87dadd45 2837 }, {
8bdd16b4 2838 # Channels tab
2839 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2840 'info_dict': {
8bdd16b4 2841 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2842 'title': 'lex will - Channels',
2843 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2844 },
2845 'playlist_mincount': 138,
6b08cdf6 2846 }, {
a0566bbf 2847 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2848 'only_matching': True,
2849 }, {
a0566bbf 2850 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2851 'only_matching': True,
2852 }, {
a0566bbf 2853 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2854 'only_matching': True,
2855 }, {
2856 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2857 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2858 'info_dict': {
2859 'title': '29C3: Not my department',
2860 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2861 'uploader': 'Christiaan008',
2862 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2863 },
2864 'playlist_count': 96,
2865 }, {
2866 'note': 'Large playlist',
2867 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2868 'info_dict': {
8bdd16b4 2869 'title': 'Uploads from Cauchemar',
2870 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2871 'uploader': 'Cauchemar',
2872 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2873 },
8bdd16b4 2874 'playlist_mincount': 1123,
2875 }, {
2876 # even larger playlist, 8832 videos
2877 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2878 'only_matching': True,
4b7df0d3
JMF
2879 }, {
2880 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2881 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2882 'info_dict': {
acf757f4
PH
2883 'title': 'Uploads from Interstellar Movie',
2884 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2885 'uploader': 'Interstellar Movie',
8bdd16b4 2886 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2887 },
481cc733 2888 'playlist_mincount': 21,
8bdd16b4 2889 }, {
2890 # https://github.com/ytdl-org/youtube-dl/issues/21844
2891 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2892 'info_dict': {
2893 'title': 'Data Analysis with Dr Mike Pound',
2894 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2895 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2896 'uploader': 'Computerphile',
2897 },
2898 'playlist_mincount': 11,
2899 }, {
a0566bbf 2900 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2901 'only_matching': True,
dacb3a86
S
2902 }, {
2903 # Playlist URL that does not actually serve a playlist
2904 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2905 'info_dict': {
2906 'id': 'FqZTN594JQw',
2907 'ext': 'webm',
2908 'title': "Smiley's People 01 detective, Adventure Series, Action",
2909 'uploader': 'STREEM',
2910 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2912 'upload_date': '20150526',
2913 'license': 'Standard YouTube License',
2914 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2915 'categories': ['People & Blogs'],
2916 'tags': list,
dbdaaa23 2917 'view_count': int,
dacb3a86
S
2918 'like_count': int,
2919 'dislike_count': int,
2920 },
2921 'params': {
2922 'skip_download': True,
2923 },
13a75688 2924 'skip': 'This video is not available.',
dacb3a86 2925 'add_ie': [YoutubeIE.ie_key()],
481cc733 2926 }, {
8bdd16b4 2927 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2928 'only_matching': True,
66b48727 2929 }, {
8bdd16b4 2930 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2931 'only_matching': True,
a0566bbf 2932 }, {
2933 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2934 'info_dict': {
2935 'id': '9Auq9mYxFEE',
2936 'ext': 'mp4',
2937 'title': 'Watch Sky News live',
2938 'uploader': 'Sky News',
2939 'uploader_id': 'skynews',
2940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2941 'upload_date': '20191102',
2942 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2943 'categories': ['News & Politics'],
2944 'tags': list,
2945 'like_count': int,
2946 'dislike_count': int,
2947 },
2948 'params': {
2949 'skip_download': True,
2950 },
2951 }, {
2952 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2953 'info_dict': {
2954 'id': 'a48o2S1cPoo',
2955 'ext': 'mp4',
2956 'title': 'The Young Turks - Live Main Show',
2957 'uploader': 'The Young Turks',
2958 'uploader_id': 'TheYoungTurks',
2959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2960 'upload_date': '20150715',
2961 'license': 'Standard YouTube License',
2962 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2963 'categories': ['News & Politics'],
2964 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2965 'like_count': int,
2966 'dislike_count': int,
2967 },
2968 'params': {
2969 'skip_download': True,
2970 },
2971 'only_matching': True,
2972 }, {
2973 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2974 'only_matching': True,
2975 }, {
2976 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2977 'only_matching': True,
3d3dddc9 2978 }, {
2979 'url': 'https://www.youtube.com/feed/trending',
2980 'only_matching': True,
2981 }, {
2982 # needs auth
2983 'url': 'https://www.youtube.com/feed/library',
2984 'only_matching': True,
2985 }, {
2986 # needs auth
2987 'url': 'https://www.youtube.com/feed/history',
2988 'only_matching': True,
2989 }, {
2990 # needs auth
2991 'url': 'https://www.youtube.com/feed/subscriptions',
2992 'only_matching': True,
2993 }, {
2994 # needs auth
2995 'url': 'https://www.youtube.com/feed/watch_later',
2996 'only_matching': True,
2997 }, {
2998 # no longer available?
2999 'url': 'https://www.youtube.com/feed/recommended',
3000 'only_matching': True,
29f7c58a 3001 }, {
3002 # inline playlist with not always working continuations
3003 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3004 'only_matching': True,
3005 }, {
3006 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3007 'only_matching': True,
3008 }, {
3009 'url': 'https://www.youtube.com/course',
3010 'only_matching': True,
3011 }, {
3012 'url': 'https://www.youtube.com/zsecurity',
3013 'only_matching': True,
3014 }, {
3015 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3016 'only_matching': True,
3017 }, {
3018 'url': 'https://www.youtube.com/TheYoungTurks/live',
3019 'only_matching': True,
3020 }]
3021
3022 @classmethod
3023 def suitable(cls, url):
3024 return False if YoutubeIE.suitable(url) else super(
3025 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3026
3027 def _extract_channel_id(self, webpage):
3028 channel_id = self._html_search_meta(
3029 'channelId', webpage, 'channel id', default=None)
3030 if channel_id:
3031 return channel_id
3032 channel_url = self._html_search_meta(
3033 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3034 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3035 'twitter:app:url:googleplay'), webpage, 'channel url')
3036 return self._search_regex(
3037 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3038 channel_url, 'channel id')
15f6397c 3039
8bdd16b4 3040 @staticmethod
3041 def _extract_grid_item_renderer(item):
3042 for item_kind in ('Playlist', 'Video', 'Channel'):
3043 renderer = item.get('grid%sRenderer' % item_kind)
3044 if renderer:
3045 return renderer
3046
8bdd16b4 3047 def _grid_entries(self, grid_renderer):
3048 for item in grid_renderer['items']:
3049 if not isinstance(item, dict):
39b62db1 3050 continue
8bdd16b4 3051 renderer = self._extract_grid_item_renderer(item)
3052 if not isinstance(renderer, dict):
3053 continue
3054 title = try_get(
3055 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3056 # playlist
3057 playlist_id = renderer.get('playlistId')
3058 if playlist_id:
3059 yield self.url_result(
3060 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3061 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3062 video_title=title)
3063 # video
3064 video_id = renderer.get('videoId')
3065 if video_id:
3066 yield self._extract_video(renderer)
3067 # channel
3068 channel_id = renderer.get('channelId')
3069 if channel_id:
3070 title = try_get(
3071 renderer, lambda x: x['title']['simpleText'], compat_str)
3072 yield self.url_result(
3073 'https://www.youtube.com/channel/%s' % channel_id,
3074 ie=YoutubeTabIE.ie_key(), video_title=title)
3075
3d3dddc9 3076 def _shelf_entries_from_content(self, shelf_renderer):
3077 content = shelf_renderer.get('content')
3078 if not isinstance(content, dict):
8bdd16b4 3079 return
3d3dddc9 3080 renderer = content.get('gridRenderer')
3081 if renderer:
3082 # TODO: add support for nested playlists so each shelf is processed
3083 # as separate playlist
3084 # TODO: this includes only first N items
3085 for entry in self._grid_entries(renderer):
3086 yield entry
3087 renderer = content.get('horizontalListRenderer')
3088 if renderer:
3089 # TODO
3090 pass
8bdd16b4 3091
29f7c58a 3092 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3093 ep = try_get(
3094 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3095 compat_str)
3096 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3097 if shelf_url:
29f7c58a 3098 # Skipping links to another channels, note that checking for
3099 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3100 # will not work
3101 if skip_channels and '/channels?' in shelf_url:
3102 return
3d3dddc9 3103 title = try_get(
3104 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3105 yield self.url_result(shelf_url, video_title=title)
3106 # Shelf may not contain shelf URL, fallback to extraction from content
3107 for entry in self._shelf_entries_from_content(shelf_renderer):
3108 yield entry
c5e8d7af 3109
8bdd16b4 3110 def _playlist_entries(self, video_list_renderer):
3111 for content in video_list_renderer['contents']:
3112 if not isinstance(content, dict):
3113 continue
3114 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3115 if not isinstance(renderer, dict):
3116 continue
3117 video_id = renderer.get('videoId')
3118 if not video_id:
3119 continue
3120 yield self._extract_video(renderer)
07aeced6 3121
3d3dddc9 3122 r""" # Not needed in the new implementation
3462ffa8 3123 def _itemSection_entries(self, item_sect_renderer):
3124 for content in item_sect_renderer['contents']:
3125 if not isinstance(content, dict):
3126 continue
3127 renderer = content.get('videoRenderer', {})
3128 if not isinstance(renderer, dict):
3129 continue
3130 video_id = renderer.get('videoId')
3131 if not video_id:
3132 continue
3133 yield self._extract_video(renderer)
3d3dddc9 3134 """
3462ffa8 3135
3136 def _rich_entries(self, rich_grid_renderer):
3137 renderer = try_get(
70d5c17b 3138 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3139 video_id = renderer.get('videoId')
3140 if not video_id:
3141 return
3142 yield self._extract_video(renderer)
3143
8bdd16b4 3144 def _video_entry(self, video_renderer):
3145 video_id = video_renderer.get('videoId')
3146 if video_id:
3147 return self._extract_video(video_renderer)
dacb3a86 3148
8bdd16b4 3149 def _post_thread_entries(self, post_thread_renderer):
3150 post_renderer = try_get(
3151 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3152 if not post_renderer:
3153 return
3154 # video attachment
3155 video_renderer = try_get(
3156 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3157 video_id = None
3158 if video_renderer:
3159 entry = self._video_entry(video_renderer)
3160 if entry:
3161 yield entry
3162 # inline video links
3163 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3164 for run in runs:
3165 if not isinstance(run, dict):
3166 continue
3167 ep_url = try_get(
3168 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3169 if not ep_url:
3170 continue
3171 if not YoutubeIE.suitable(ep_url):
3172 continue
3173 ep_video_id = YoutubeIE._match_id(ep_url)
3174 if video_id == ep_video_id:
3175 continue
3176 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 3177
8bdd16b4 3178 def _post_thread_continuation_entries(self, post_thread_continuation):
3179 contents = post_thread_continuation.get('contents')
3180 if not isinstance(contents, list):
3181 return
3182 for content in contents:
3183 renderer = content.get('backstagePostThreadRenderer')
3184 if not isinstance(renderer, dict):
3185 continue
3186 for entry in self._post_thread_entries(renderer):
3187 yield entry
07aeced6 3188
29f7c58a 3189 @staticmethod
3190 def _build_continuation_query(continuation, ctp=None):
3191 query = {
3192 'ctoken': continuation,
3193 'continuation': continuation,
3194 }
3195 if ctp:
3196 query['itct'] = ctp
3197 return query
3198
8bdd16b4 3199 @staticmethod
3200 def _extract_next_continuation_data(renderer):
3201 next_continuation = try_get(
3202 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3203 if not next_continuation:
3204 return
3205 continuation = next_continuation.get('continuation')
3206 if not continuation:
3207 return
3208 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3209 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3210
8bdd16b4 3211 @classmethod
3212 def _extract_continuation(cls, renderer):
3213 next_continuation = cls._extract_next_continuation_data(renderer)
3214 if next_continuation:
3215 return next_continuation
3216 contents = renderer.get('contents')
3217 if not isinstance(contents, list):
3218 return
3219 for content in contents:
3220 if not isinstance(content, dict):
3221 continue
3222 continuation_ep = try_get(
3223 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3224 dict)
3225 if not continuation_ep:
3226 continue
3227 continuation = try_get(
3228 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3229 if not continuation:
3230 continue
3231 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3232 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3233
8bdd16b4 3234 def _entries(self, tab, identity_token):
3462ffa8 3235
70d5c17b 3236 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3237 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3238 for content in contents:
3239 if not isinstance(content, dict):
8bdd16b4 3240 continue
70d5c17b 3241 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3242 if not is_renderer:
70d5c17b 3243 renderer = content.get('richItemRenderer')
3462ffa8 3244 if renderer:
3245 for entry in self._rich_entries(renderer):
3246 yield entry
3247 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3248 continue
3462ffa8 3249 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3250 for isr_content in isr_contents:
3251 if not isinstance(isr_content, dict):
3252 continue
3253 renderer = isr_content.get('playlistVideoListRenderer')
3254 if renderer:
3255 for entry in self._playlist_entries(renderer):
3256 yield entry
3257 continuation_list[0] = self._extract_continuation(renderer)
3258 continue
3259 renderer = isr_content.get('gridRenderer')
3260 if renderer:
3261 for entry in self._grid_entries(renderer):
3262 yield entry
3263 continuation_list[0] = self._extract_continuation(renderer)
3264 continue
3265 renderer = isr_content.get('shelfRenderer')
3266 if renderer:
29f7c58a 3267 is_channels_tab = tab.get('title') == 'Channels'
3268 for entry in self._shelf_entries(renderer, not is_channels_tab):
3462ffa8 3269 yield entry
3462ffa8 3270 continue
3271 renderer = isr_content.get('backstagePostThreadRenderer')
3272 if renderer:
3273 for entry in self._post_thread_entries(renderer):
3274 yield entry
3275 continuation_list[0] = self._extract_continuation(renderer)
3276 continue
3277 renderer = isr_content.get('videoRenderer')
3278 if renderer:
3279 entry = self._video_entry(renderer)
3280 if entry:
3281 yield entry
70d5c17b 3282
3462ffa8 3283 if not continuation_list[0]:
3284 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3285
3286 if not continuation_list[0]:
3287 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3288
3289 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3290 tab_content = try_get(tab, lambda x: x['content'], dict)
3291 if not tab_content:
3292 return
3462ffa8 3293 parent_renderer = (
29f7c58a 3294 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3295 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3296 for entry in extract_entries(parent_renderer):
3297 yield entry
3462ffa8 3298 continuation = continuation_list[0]
8bdd16b4 3299
3300 headers = {
3301 'x-youtube-client-name': '1',
3302 'x-youtube-client-version': '2.20201112.04.01',
3303 }
3304 if identity_token:
3305 headers['x-youtube-identity-token'] = identity_token
ebf1b291 3306
8bdd16b4 3307 for page_num in itertools.count(1):
3308 if not continuation:
3309 break
29f7c58a 3310 count = 0
3311 retries = 3
3312 while count <= retries:
3313 try:
3314 # Downloading page may result in intermittent 5xx HTTP error
3315 # that is usually worked around with a retry
3316 browse = self._download_json(
3317 'https://www.youtube.com/browse_ajax', None,
3318 'Downloading page %d%s'
3319 % (page_num, ' (retry #%d)' % count if count else ''),
3320 headers=headers, query=continuation)
3321 break
3322 except ExtractorError as e:
3323 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3324 count += 1
3325 if count <= retries:
3326 continue
3327 raise
8bdd16b4 3328 if not browse:
3329 break
3330 response = try_get(browse, lambda x: x[1]['response'], dict)
3331 if not response:
3332 break
ebf1b291 3333
8bdd16b4 3334 continuation_contents = try_get(
3335 response, lambda x: x['continuationContents'], dict)
3336 if continuation_contents:
3337 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3338 if continuation_renderer:
3339 for entry in self._playlist_entries(continuation_renderer):
3340 yield entry
3341 continuation = self._extract_continuation(continuation_renderer)
3342 continue
3343 continuation_renderer = continuation_contents.get('gridContinuation')
3344 if continuation_renderer:
3345 for entry in self._grid_entries(continuation_renderer):
3346 yield entry
3347 continuation = self._extract_continuation(continuation_renderer)
3348 continue
3349 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3350 if continuation_renderer:
3351 for entry in self._post_thread_continuation_entries(continuation_renderer):
3352 yield entry
3353 continuation = self._extract_continuation(continuation_renderer)
3354 continue
70d5c17b 3355 continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
3462ffa8 3356 if continuation_renderer:
3357 continuation_list = [None]
3358 for entry in extract_entries(continuation_renderer):
3359 yield entry
3360 continuation = continuation_list[0]
3361 continue
c5e8d7af 3362
8bdd16b4 3363 continuation_items = try_get(
3364 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3365 if continuation_items:
3366 continuation_item = continuation_items[0]
3367 if not isinstance(continuation_item, dict):
3368 continue
70d5c17b 3369 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
8bdd16b4 3370 if renderer:
3371 video_list_renderer = {'contents': continuation_items}
3372 for entry in self._playlist_entries(video_list_renderer):
3373 yield entry
3374 continuation = self._extract_continuation(video_list_renderer)
3375 continue
8bdd16b4 3376 break
9558dcec 3377
8bdd16b4 3378 @staticmethod
3379 def _extract_selected_tab(tabs):
3380 for tab in tabs:
3381 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3382 return tab['tabRenderer']
2b3c2546 3383 else:
8bdd16b4 3384 raise ExtractorError('Unable to find selected tab')
b82f815f 3385
8bdd16b4 3386 @staticmethod
3387 def _extract_uploader(data):
3388 uploader = {}
3389 sidebar_renderer = try_get(
3390 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3391 if sidebar_renderer:
3392 for item in sidebar_renderer:
3393 if not isinstance(item, dict):
3394 continue
3395 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3396 if not isinstance(renderer, dict):
3397 continue
3398 owner = try_get(
3399 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3400 if owner:
3401 uploader['uploader'] = owner.get('text')
3402 uploader['uploader_id'] = try_get(
3403 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3404 uploader['uploader_url'] = urljoin(
3405 'https://www.youtube.com/',
3406 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3407 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3408
3409 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 3410 playlist_id = title = description = channel_url = channel_name = channel_id = None
3411 thumbnails_list = tags = []
3412
8bdd16b4 3413 selected_tab = self._extract_selected_tab(tabs)
3414 renderer = try_get(
3415 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3416 if renderer:
b60419c5 3417 channel_name = renderer.get('title')
3418 channel_url = renderer.get('channelUrl')
3419 channel_id = renderer.get('externalId')
64c0d954 3420
64c0d954 3421 if not renderer:
3422 renderer = try_get(
3423 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 3424 if renderer:
3425 title = renderer.get('title')
64c0d954 3426 description = renderer.get('description')
b60419c5 3427 playlist_id = channel_id
3428 tags = renderer.get('keywords', '').split()
3429 thumbnails_list = (
3430 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3431 or data['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails']
3432 or [])
3433
3434 thumbnails = []
3435 for t in thumbnails_list:
3436 if not isinstance(t, dict):
3437 continue
3438 thumbnail_url = url_or_none(t.get('url'))
3439 if not thumbnail_url:
3440 continue
3441 thumbnails.append({
3442 'url': thumbnail_url,
3443 'width': int_or_none(t.get('width')),
3444 'height': int_or_none(t.get('height')),
3445 })
64c0d954 3446
3462ffa8 3447 if playlist_id is None:
70d5c17b 3448 playlist_id = item_id
3449 if title is None:
b60419c5 3450 title = playlist_id
3451 title += format_field(selected_tab, 'title', ' - %s')
3452
3453 metadata = {
3454 'playlist_id': playlist_id,
3455 'playlist_title': title,
3456 'playlist_description': description,
3457 'uploader': channel_name,
3458 'uploader_id': channel_id,
3459 'uploader_url': channel_url,
3460 'thumbnails': thumbnails,
3461 'tags': tags,
3462 }
3463 if not channel_id:
3464 metadata.update(self._extract_uploader(data))
3465 metadata.update({
3466 'channel': metadata['uploader'],
3467 'channel_id': metadata['uploader_id'],
3468 'channel_url': metadata['uploader_url']})
3469 return self.playlist_result(
29f7c58a 3470 self._entries(selected_tab, identity_token),
b60419c5 3471 **metadata)
73c4ac2c 3472
29f7c58a 3473 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3474 title = playlist.get('title') or try_get(
3475 data, lambda x: x['titleText']['simpleText'], compat_str)
3476 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 3477 # Inline playlist rendition continuation does not always work
3478 # at Youtube side, so delegating regular tab-based playlist URL
3479 # processing whenever possible.
3480 playlist_url = urljoin(url, try_get(
3481 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3482 compat_str))
3483 if playlist_url and playlist_url != url:
3484 return self.url_result(
3485 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3486 video_title=title)
8bdd16b4 3487 return self.playlist_result(
3488 self._playlist_entries(playlist), playlist_id=playlist_id,
3489 playlist_title=title)
c5e8d7af 3490
29f7c58a 3491 @staticmethod
3492 def _extract_alerts(data):
02ced43c 3493 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 3494 if not isinstance(alert_dict, dict):
3495 continue
02ced43c 3496 for renderer in alert_dict:
3497 alert = alert_dict[renderer]
3498 alert_type = alert.get('type')
3499 if not alert_type:
3500 continue
3501 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3502 if message:
3503 yield alert_type, message
3504 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3505 message = try_get(run, lambda x: x['text'], compat_str)
3506 if message:
3507 yield alert_type, message
3508
29f7c58a 3509 def _extract_identity_token(self, webpage, item_id):
3510 ytcfg = self._extract_ytcfg(item_id, webpage)
3511 if ytcfg:
3512 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3513 if token:
3514 return token
3515 return self._search_regex(
3516 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3517 'identity token', default=None)
3518
8bdd16b4 3519 def _real_extract(self, url):
3520 item_id = self._match_id(url)
3521 url = compat_urlparse.urlunparse(
3522 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 3523 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 3524 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 3525 self._downloader.report_warning(
3526 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 3527 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 3528 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3529
8bdd16b4 3530 # Handle both video/playlist URLs
3531 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3532 video_id = qs.get('v', [None])[0]
3533 playlist_id = qs.get('list', [None])[0]
f0c532a4 3534
29f7c58a 3535 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3536 if playlist_id:
3537 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3538 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3539 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3540 else:
3541 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3542 if video_id and playlist_id:
3543 if self._downloader.params.get('noplaylist'):
3544 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3545 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3546 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3547
8bdd16b4 3548 webpage = self._download_webpage(url, item_id)
29f7c58a 3549 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 3550 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 3551 err_msg = None
02ced43c 3552 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 3553 if alert_type.lower() == 'error':
3554 if err_msg:
3555 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3556 err_msg = alert_message
3557 else:
3558 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3559 if err_msg:
3560 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 3561 tabs = try_get(
3562 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3563 if tabs:
3564 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3565 playlist = try_get(
3566 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3567 if playlist:
29f7c58a 3568 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3569 # Fallback to video extraction if no playlist alike page is recognized.
3570 # First check for the current video then try the v attribute of URL query.
3571 video_id = try_get(
3572 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3573 compat_str) or video_id
8bdd16b4 3574 if video_id:
3575 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3576 # Failed to recognize
3577 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3578
c5e8d7af 3579
8bdd16b4 3580class YoutubePlaylistIE(InfoExtractor):
3581 IE_DESC = 'YouTube.com playlists'
3582 _VALID_URL = r'''(?x)(?:
3583 (?:https?://)?
3584 (?:\w+\.)?
3585 (?:
3586 (?:
3587 youtube(?:kids)?\.com|
29f7c58a 3588 invidio\.us
8bdd16b4 3589 )
3590 /.*?\?.*?\blist=
3591 )?
3592 (?P<id>%(playlist_id)s)
3593 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3594 IE_NAME = 'youtube:playlist'
cdc628a4 3595 _TESTS = [{
8bdd16b4 3596 'note': 'issue #673',
3597 'url': 'PLBB231211A4F62143',
cdc628a4 3598 'info_dict': {
8bdd16b4 3599 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3600 'id': 'PLBB231211A4F62143',
3601 'uploader': 'Wickydoo',
3602 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3603 },
3604 'playlist_mincount': 29,
3605 }, {
3606 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3607 'info_dict': {
3608 'title': 'YDL_safe_search',
3609 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3610 },
3611 'playlist_count': 2,
3612 'skip': 'This playlist is private',
9558dcec 3613 }, {
8bdd16b4 3614 'note': 'embedded',
3615 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3616 'playlist_count': 4,
9558dcec 3617 'info_dict': {
8bdd16b4 3618 'title': 'JODA15',
3619 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3620 'uploader': 'milan',
3621 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3622 }
cdc628a4 3623 }, {
8bdd16b4 3624 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3625 'playlist_mincount': 982,
3626 'info_dict': {
3627 'title': '2018 Chinese New Singles (11/6 updated)',
3628 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3629 'uploader': 'LBK',
3630 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3631 }
daa0df9e 3632 }, {
29f7c58a 3633 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3634 'only_matching': True,
3635 }, {
3636 # music album playlist
3637 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3638 'only_matching': True,
3639 }]
3640
3641 @classmethod
3642 def suitable(cls, url):
3643 return False if YoutubeTabIE.suitable(url) else super(
3644 YoutubePlaylistIE, cls).suitable(url)
3645
3646 def _real_extract(self, url):
3647 playlist_id = self._match_id(url)
3648 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3649 if not qs:
3650 qs = {'list': playlist_id}
3651 return self.url_result(
3652 update_url_query('https://www.youtube.com/playlist', qs),
3653 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3654
3655
3656class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3657 IE_DESC = 'youtu.be'
29f7c58a 3658 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3659 _TESTS = [{
8bdd16b4 3660 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3661 'info_dict': {
3662 'id': 'yeWKywCrFtk',
3663 'ext': 'mp4',
3664 'title': 'Small Scale Baler and Braiding Rugs',
3665 'uploader': 'Backus-Page House Museum',
3666 'uploader_id': 'backuspagemuseum',
3667 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3668 'upload_date': '20161008',
3669 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3670 'categories': ['Nonprofits & Activism'],
3671 'tags': list,
3672 'like_count': int,
3673 'dislike_count': int,
3674 },
3675 'params': {
3676 'noplaylist': True,
3677 'skip_download': True,
3678 },
39e7107d 3679 }, {
8bdd16b4 3680 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3681 'only_matching': True,
cdc628a4
PH
3682 }]
3683
8bdd16b4 3684 def _real_extract(self, url):
29f7c58a 3685 mobj = re.match(self._VALID_URL, url)
3686 video_id = mobj.group('id')
3687 playlist_id = mobj.group('playlist_id')
8bdd16b4 3688 return self.url_result(
29f7c58a 3689 update_url_query('https://www.youtube.com/watch', {
3690 'v': video_id,
3691 'list': playlist_id,
3692 'feature': 'youtu.be',
3693 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3694
3695
3696class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3697 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3698 _VALID_URL = r'ytuser:(?P<id>.+)'
3699 _TESTS = [{
3700 'url': 'ytuser:phihag',
3701 'only_matching': True,
3702 }]
3703
3704 def _real_extract(self, url):
3705 user_id = self._match_id(url)
3706 return self.url_result(
3707 'https://www.youtube.com/user/%s' % user_id,
3708 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3709
b05654f0 3710
3d3dddc9 3711class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3712 IE_NAME = 'youtube:favorites'
3713 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3714 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3715 _LOGIN_REQUIRED = True
3716 _TESTS = [{
3717 'url': ':ytfav',
3718 'only_matching': True,
3719 }, {
3720 'url': ':ytfavorites',
3721 'only_matching': True,
3722 }]
3723
3724 def _real_extract(self, url):
3725 return self.url_result(
3726 'https://www.youtube.com/playlist?list=LL',
3727 ie=YoutubeTabIE.ie_key())
3728
3729
8bdd16b4 3730class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
78caa52a 3731 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3732 # there doesn't appear to be a real limit, for example if you search for
3733 # 'python' you get more than 8.000.000 results
3734 _MAX_RESULTS = float('inf')
78caa52a 3735 IE_NAME = 'youtube:search'
b05654f0 3736 _SEARCH_KEY = 'ytsearch'
6c894ea1 3737 _SEARCH_PARAMS = None
9dd8e46a 3738 _TESTS = []
b05654f0 3739
6c894ea1
U
3740 def _entries(self, query, n):
3741 data = {
3742 'context': {
3743 'client': {
3744 'clientName': 'WEB',
3745 'clientVersion': '2.20201021.03.00',
3746 }
3747 },
3748 'query': query,
a22b2fd1 3749 }
6c894ea1
U
3750 if self._SEARCH_PARAMS:
3751 data['params'] = self._SEARCH_PARAMS
3752 total = 0
3753 for page_num in itertools.count(1):
3754 search = self._download_json(
3755 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3756 video_id='query "%s"' % query,
3757 note='Downloading page %s' % page_num,
3758 errnote='Unable to download API page', fatal=False,
3759 data=json.dumps(data).encode('utf8'),
3760 headers={'content-type': 'application/json'})
3761 if not search:
b4c08069 3762 break
6c894ea1
U
3763 slr_contents = try_get(
3764 search,
3765 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3766 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3767 list)
3768 if not slr_contents:
a22b2fd1 3769 break
0366ae87 3770
0366ae87
M
3771 # Youtube sometimes adds promoted content to searches,
3772 # changing the index location of videos and token.
3773 # So we search through all entries till we find them.
30a074c2 3774 continuation_token = None
3775 for slr_content in slr_contents:
3776 isr_contents = try_get(
3777 slr_content,
3778 lambda x: x['itemSectionRenderer']['contents'],
3779 list)
9da76d30 3780 if not isr_contents:
30a074c2 3781 continue
3782 for content in isr_contents:
3783 if not isinstance(content, dict):
3784 continue
3785 video = content.get('videoRenderer')
3786 if not isinstance(video, dict):
3787 continue
3788 video_id = video.get('videoId')
3789 if not video_id:
3790 continue
3791
3792 yield self._extract_video(video)
3793 total += 1
3794 if total == n:
3795 return
0366ae87
M
3796
3797 if continuation_token is None:
3798 continuation_token = try_get(
30a074c2 3799 slr_content,
3800 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
0366ae87 3801 compat_str)
0366ae87 3802
0366ae87 3803 if not continuation_token:
6c894ea1 3804 break
0366ae87 3805 data['continuation'] = continuation_token
b05654f0 3806
6c894ea1
U
3807 def _get_n_results(self, query, n):
3808 """Get a specified number of results for a query"""
3809 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3810
c9ae7b95 3811
a3dd9248 3812class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3813 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3814 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3815 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3816 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3817
c9ae7b95 3818
386e1dd9 3819class YoutubeSearchURLIE(YoutubeSearchIE):
c76eb41b 3820 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
386e1dd9 3821 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3822 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3823 # _MAX_RESULTS = 100
3462ffa8 3824 _TESTS = [{
3825 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3826 'playlist_mincount': 5,
3827 'info_dict': {
3828 'title': 'youtube-dl test video',
3829 }
3830 }, {
3831 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3832 'only_matching': True,
3833 }]
3834
386e1dd9 3835 @classmethod
3836 def _make_valid_url(cls):
3837 return cls._VALID_URL
3838
3462ffa8 3839 def _real_extract(self, url):
386e1dd9 3840 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3841 query = (qs.get('search_query') or qs.get('q'))[0]
3842 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3843 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3844
3845
3846class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3847 """
25f14e9f 3848 Base class for feed extractors
3d3dddc9 3849 Subclasses must define the _FEED_NAME property.
d7ae0639 3850 """
b2e8bc1b 3851 _LOGIN_REQUIRED = True
3462ffa8 3852 # _MAX_PAGES = 5
ef2f3c7f 3853 _TESTS = []
d7ae0639
JMF
3854
3855 @property
3856 def IE_NAME(self):
78caa52a 3857 return 'youtube:%s' % self._FEED_NAME
04cc9617 3858
81f0259b 3859 def _real_initialize(self):
b2e8bc1b 3860 self._login()
81f0259b 3861
3853309f 3862 def _real_extract(self, url):
3d3dddc9 3863 return self.url_result(
3864 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3865 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3866
3867
ef2f3c7f 3868class YoutubeWatchLaterIE(InfoExtractor):
3869 IE_NAME = 'youtube:watchlater'
70d5c17b 3870 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3871 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3872 _TESTS = [{
8bdd16b4 3873 'url': ':ytwatchlater',
bc7a9cd8
S
3874 'only_matching': True,
3875 }]
25f14e9f
S
3876
3877 def _real_extract(self, url):
ef2f3c7f 3878 return self.url_result(
3879 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3880
3881
25f14e9f
S
3882class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3883 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3884 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3885 _FEED_NAME = 'recommended'
3d3dddc9 3886 _TESTS = [{
3887 'url': ':ytrec',
3888 'only_matching': True,
3889 }, {
3890 'url': ':ytrecommended',
3891 'only_matching': True,
3892 }, {
3893 'url': 'https://youtube.com',
3894 'only_matching': True,
3895 }]
1ed5b5c9 3896
1ed5b5c9 3897
25f14e9f 3898class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3899 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3900 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3901 _FEED_NAME = 'subscriptions'
3d3dddc9 3902 _TESTS = [{
3903 'url': ':ytsubs',
3904 'only_matching': True,
3905 }, {
3906 'url': ':ytsubscriptions',
3907 'only_matching': True,
3908 }]
1ed5b5c9 3909
1ed5b5c9 3910
25f14e9f
S
3911class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3912 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3913 _VALID_URL = r':ythistory'
25f14e9f 3914 _FEED_NAME = 'history'
3d3dddc9 3915 _TESTS = [{
3916 'url': ':ythistory',
3917 'only_matching': True,
3918 }]
1ed5b5c9
JMF
3919
3920
15870e90
PH
3921class YoutubeTruncatedURLIE(InfoExtractor):
3922 IE_NAME = 'youtube:truncated_url'
3923 IE_DESC = False # Do not list
975d35db 3924 _VALID_URL = r'''(?x)
b95aab84
PH
3925 (?:https?://)?
3926 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3927 (?:watch\?(?:
c4808c60 3928 feature=[a-z_]+|
b95aab84
PH
3929 annotation_id=annotation_[^&]+|
3930 x-yt-cl=[0-9]+|
c1708b89 3931 hl=[^&]*|
287be8c6 3932 t=[0-9]+
b95aab84
PH
3933 )?
3934 |
3935 attribution_link\?a=[^&]+
3936 )
3937 $
975d35db 3938 '''
15870e90 3939
c4808c60 3940 _TESTS = [{
2d3d2997 3941 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3942 'only_matching': True,
dc2fc736 3943 }, {
2d3d2997 3944 'url': 'https://www.youtube.com/watch?',
dc2fc736 3945 'only_matching': True,
b95aab84
PH
3946 }, {
3947 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3948 'only_matching': True,
3949 }, {
3950 'url': 'https://www.youtube.com/watch?feature=foo',
3951 'only_matching': True,
c1708b89
PH
3952 }, {
3953 'url': 'https://www.youtube.com/watch?hl=en-GB',
3954 'only_matching': True,
287be8c6
PH
3955 }, {
3956 'url': 'https://www.youtube.com/watch?t=2372',
3957 'only_matching': True,
c4808c60
PH
3958 }]
3959
15870e90
PH
3960 def _real_extract(self, url):
3961 raise ExtractorError(
78caa52a
PH
3962 'Did you forget to quote the URL? Remember that & is a meta '
3963 'character in most shells, so you want to put the URL in quotes, '
3867038a 3964 'like youtube-dl '
2d3d2997 3965 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3966 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3967 expected=True)
772fd5cc
PH
3968
3969
3970class YoutubeTruncatedIDIE(InfoExtractor):
3971 IE_NAME = 'youtube:truncated_id'
3972 IE_DESC = False # Do not list
b95aab84 3973 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3974
3975 _TESTS = [{
3976 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3977 'only_matching': True,
3978 }]
3979
3980 def _real_extract(self, url):
3981 video_id = self._match_id(url)
3982 raise ExtractorError(
3983 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3984 expected=True)
8bdd16b4 3985
3986
3462ffa8 3987# Do Youtube show urls even exist anymore? I couldn't find any
3988r'''
3989class YoutubeShowIE(YoutubeTabIE):
8bdd16b4 3990 IE_DESC = 'YouTube.com (multi-season) shows'
3991 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3992 IE_NAME = 'youtube:show'
3993 _TESTS = [{
3994 'url': 'https://www.youtube.com/show/airdisasters',
3995 'playlist_mincount': 5,
3996 'info_dict': {
3997 'id': 'airdisasters',
3998 'title': 'Air Disasters',
3999 }
4000 }]
4001
4002 def _real_extract(self, url):
4003 playlist_id = self._match_id(url)
4004 return super(YoutubeShowIE, self)._real_extract(
4005 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3462ffa8 4006'''