]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
More badges
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
2d30521a 34 float_or_none,
4bb4a188 35 get_element_by_id,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
b84071c0 39 parse_count,
7c80519c 40 parse_duration,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
81c2f20b 51 uppercase_escape,
21c340b8 52 url_or_none,
6e6bc8da 53 urlencode_postdata,
8bdd16b4 54 urljoin,
c5e8d7af
PH
55)
56
5f6a1245 57
de7f3446 58class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 66
3462ffa8 67 _RESERVED_NAMES = (
29f7c58a 68 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
69 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
70 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 71
b2e8bc1b
JMF
72 _NETRC_MACHINE = 'youtube'
73 # If True it will raise an error if no login info is provided
74 _LOGIN_REQUIRED = False
75
70d5c17b 76 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 77
b2e8bc1b 78 def _set_language(self):
810fb84d 79 self._set_cookie(
ee0b726c 80 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 81 # YouTube sets the expire time to about two months
810fb84d 82 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 83
25f14e9f
S
84 def _ids_to_results(self, ids):
85 return [
86 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 for vid_id in ids]
88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
68217024 97 username, password = self._get_login_info()
b2e8bc1b
JMF
98 # No authentication to be performed
99 if username is None:
70d35d16 100 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 101 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
102 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
103 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 104 return True
b2e8bc1b 105
7cc3570e
PH
106 login_page = self._download_webpage(
107 self._LOGIN_URL, None,
69ea8ca4
PH
108 note='Downloading login page',
109 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
110 if login_page is False:
111 return
b2e8bc1b 112
1212e997 113 login_form = self._hidden_inputs(login_page)
c5e8d7af 114
e00eb564
S
115 def req(url, f_req, note, errnote):
116 data = login_form.copy()
117 data.update({
118 'pstMsg': 1,
119 'checkConnection': 'youtube',
120 'checkedDomains': 'youtube',
121 'hl': 'en',
122 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 123 'f.req': json.dumps(f_req),
e00eb564
S
124 'flowName': 'GlifWebSignIn',
125 'flowEntry': 'ServiceLogin',
baf67a60
S
126 # TODO: reverse actual botguard identifier generation algo
127 'bgRequest': '["identifier",""]',
041bc3ad 128 })
e00eb564
S
129 return self._download_json(
130 url, None, note=note, errnote=errnote,
131 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
132 fatal=False,
133 data=urlencode_postdata(data), headers={
134 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
135 'Google-Accounts-XSRF': 1,
136 })
137
3995d37d
S
138 def warn(message):
139 self._downloader.report_warning(message)
140
141 lookup_req = [
142 username,
143 None, [], None, 'US', None, None, 2, False, True,
144 [
145 None, None,
146 [2, 1, None, 1,
147 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
148 None, [], 4],
149 1, [None, None, []], None, None, None, True
150 ],
151 username,
152 ]
153
e00eb564 154 lookup_results = req(
3995d37d 155 self._LOOKUP_URL, lookup_req,
e00eb564
S
156 'Looking up account info', 'Unable to look up account info')
157
158 if lookup_results is False:
159 return False
041bc3ad 160
3995d37d
S
161 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
162 if not user_hash:
163 warn('Unable to extract user hash')
164 return False
165
166 challenge_req = [
167 user_hash,
168 None, 1, None, [1, None, None, None, [password, None, True]],
169 [
170 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
171 1, [None, None, []], None, None, None, True
172 ]]
83317f69 173
3995d37d
S
174 challenge_results = req(
175 self._CHALLENGE_URL, challenge_req,
176 'Logging in', 'Unable to log in')
83317f69 177
3995d37d 178 if challenge_results is False:
e00eb564 179 return
83317f69 180
3995d37d
S
181 login_res = try_get(challenge_results, lambda x: x[0][5], list)
182 if login_res:
183 login_msg = try_get(login_res, lambda x: x[5], compat_str)
184 warn(
185 'Unable to login: %s' % 'Invalid password'
186 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
187 return False
188
189 res = try_get(challenge_results, lambda x: x[0][-1], list)
190 if not res:
191 warn('Unable to extract result entry')
192 return False
193
9a6628aa
S
194 login_challenge = try_get(res, lambda x: x[0][0], list)
195 if login_challenge:
196 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
197 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
198 # SEND_SUCCESS - TFA code has been successfully sent to phone
199 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 200 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
201 if status == 'QUOTA_EXCEEDED':
202 warn('Exceeded the limit of TFA codes, try later')
203 return False
204
205 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
206 if not tl:
207 warn('Unable to extract TL')
208 return False
209
210 tfa_code = self._get_tfa_info('2-step verification code')
211
212 if not tfa_code:
213 warn(
214 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
215 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
216 return False
217
218 tfa_code = remove_start(tfa_code, 'G-')
219
220 tfa_req = [
221 user_hash, None, 2, None,
222 [
223 9, None, None, None, None, None, None, None,
224 [None, tfa_code, True, 2]
225 ]]
226
227 tfa_results = req(
228 self._TFA_URL.format(tl), tfa_req,
229 'Submitting TFA code', 'Unable to submit TFA code')
230
231 if tfa_results is False:
232 return False
233
234 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
235 if tfa_res:
236 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
237 warn(
238 'Unable to finish TFA: %s' % 'Invalid TFA code'
239 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
240 return False
241
242 check_cookie_url = try_get(
243 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
244 else:
245 CHALLENGES = {
246 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
247 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
248 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
249 }
250 challenge = CHALLENGES.get(
251 challenge_str,
252 '%s returned error %s.' % (self.IE_NAME, challenge_str))
253 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
254 return False
3995d37d
S
255 else:
256 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
257
258 if not check_cookie_url:
259 warn('Unable to extract CheckCookie URL')
260 return False
e00eb564
S
261
262 check_cookie_results = self._download_webpage(
3995d37d
S
263 check_cookie_url, None, 'Checking cookie', fatal=False)
264
265 if check_cookie_results is False:
266 return False
e00eb564 267
3995d37d
S
268 if 'https://myaccount.google.com/' not in check_cookie_results:
269 warn('Unable to log in')
b2e8bc1b 270 return False
e00eb564 271
b2e8bc1b
JMF
272 return True
273
30226342 274 def _download_webpage_handle(self, *args, **kwargs):
c1148516 275 query = kwargs.get('query', {}).copy()
c1148516 276 kwargs['query'] = query
30226342 277 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
278 *args, **compat_kwargs(kwargs))
279
5b0a6a80 280 def _get_yt_initial_data(self, video_id, webpage):
281 config = self._search_regex(
282 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
283 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
284 webpage, 'ytInitialData', default=None)
285 if config:
286 return self._parse_json(
287 uppercase_escape(config), video_id, fatal=False)
288
b2e8bc1b
JMF
289 def _real_initialize(self):
290 if self._downloader is None:
291 return
42939b61 292 self._set_language()
b2e8bc1b
JMF
293 if not self._login():
294 return
c5e8d7af 295
8bdd16b4 296 _DEFAULT_API_DATA = {
297 'context': {
298 'client': {
299 'clientName': 'WEB',
300 'clientVersion': '2.20201021.03.00',
301 }
302 },
303 }
8377574c 304
a0566bbf 305 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 306 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
307 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 308
8bdd16b4 309 def _call_api(self, ep, query, video_id):
310 data = self._DEFAULT_API_DATA.copy()
311 data.update(query)
9833e7a0 312
8bdd16b4 313 response = self._download_json(
314 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
315 note='Downloading API JSON', errnote='Unable to download API page',
316 data=json.dumps(data).encode('utf8'),
317 headers={'content-type': 'application/json'},
318 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 319
8bdd16b4 320 return response
061a75ed 321
8bdd16b4 322 def _extract_yt_initial_data(self, video_id, webpage):
323 return self._parse_json(
324 self._search_regex(
29f7c58a 325 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 326 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 327 video_id)
0c148415 328
29f7c58a 329 def _extract_ytcfg(self, video_id, webpage):
330 return self._parse_json(
331 self._search_regex(
332 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
333 default='{}'), video_id, fatal=False)
334
30a074c2 335 def _extract_video(self, renderer):
336 video_id = renderer.get('videoId')
337 title = try_get(
338 renderer,
339 (lambda x: x['title']['runs'][0]['text'],
340 lambda x: x['title']['simpleText']), compat_str)
341 description = try_get(
342 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
343 compat_str)
344 duration = parse_duration(try_get(
345 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
346 view_count_text = try_get(
347 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
348 view_count = str_to_int(self._search_regex(
349 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
350 'view count', default=None))
351 uploader = try_get(
352 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
353 return {
354 '_type': 'url_transparent',
355 'ie_key': YoutubeIE.ie_key(),
356 'id': video_id,
357 'url': video_id,
358 'title': title,
359 'description': description,
360 'duration': duration,
361 'view_count': view_count,
362 'uploader': uploader,
363 }
364
0c148415 365
360e1ca5 366class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 367 IE_DESC = 'YouTube.com'
cb7dfeea 368 _VALID_URL = r"""(?x)^
c5e8d7af 369 (
edb53e2d 370 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 371 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 372 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 373 (?:www\.)?pwnyoutube\.com/|
8b561bfc 374 (?:www\.)?hooktube\.com/|
f7000f3a 375 (?:www\.)?yourepeat\.com/|
e69ae5b9 376 tube\.majestyc\.net/|
ba036333 377 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 378 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 379 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 380 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 381 (?:www\.)?invidious\.kabi\.tk/|
ba036333 382 (?:www\.)?invidious\.13ad\.de/|
791d2e81 383 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 384 (?:www\.)?invidious\.zapashcanon\.fr/|
385 (?:www\.)?invidious\.kavin\.rocks/|
386 (?:www\.)?invidious\.tube/|
387 (?:www\.)?invidiou\.site/|
388 (?:www\.)?invidious\.site/|
389 (?:www\.)?invidious\.xyz/|
494d664e 390 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 391 (?:www\.)?invidious\.drycat\.fr/|
ba036333 392 (?:www\.)?tube\.poal\.co/|
29f7c58a 393 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 394 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 395 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 396 (?:www\.)?yewtu\.be/|
494d664e 397 (?:www\.)?yt\.elukerio\.org/|
894b3826 398 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 399 (?:www\.)?invidious\.ggc-project\.de/|
400 (?:www\.)?yt\.maisputain\.ovh/|
401 (?:www\.)?invidious\.13ad\.de/|
402 (?:www\.)?invidious\.toot\.koeln/|
403 (?:www\.)?invidious\.fdn\.fr/|
404 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 405 (?:www\.)?kgg2m7yk5aybusll\.onion/|
406 (?:www\.)?qklhadlycap4cnod\.onion/|
407 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
408 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
409 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
410 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 411 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 412 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 413 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
414 (?:.*?\#/)? # handle anchor (#/) redirect urls
415 (?: # the various things that can precede the ID:
ac7553d0 416 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 417 |(?: # or the v= param in all its forms
f7000f3a 418 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 419 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 420 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
421 v=
422 )
f4b05232 423 ))
cbaed4bb
S
424 |(?:
425 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
426 vid\.plus| # or vid.plus/xxxx
427 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 428 )/
edb53e2d 429 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 430 )
c5e8d7af 431 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 432 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
433 (?!.*?\blist=
434 (?:
435 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
436 WL # WL are handled by the watch later IE
437 )
438 )
c5e8d7af 439 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 440 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 441 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
442 _PLAYER_INFO_RE = (
443 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
444 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
445 )
2c62dc26 446 _formats = {
c2d3cb4c 447 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
448 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
449 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
450 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
451 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
452 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
453 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
454 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 455 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 456 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
457 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
458 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
459 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
460 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
461 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 462 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 463 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
464 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 465
466
467 # 3D videos
c2d3cb4c 468 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
469 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
470 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
471 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 472 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
473 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
474 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 475
96fb5605 476 # Apple HTTP Live Streaming
11f12195 477 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 478 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
480 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
481 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
482 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 483 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
484 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
485
486 # DASH mp4 video
d23028a8
S
487 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
488 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
491 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 492 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
493 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
494 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
495 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
496 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
497 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
498 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 499
f6f1fc92 500 # Dash mp4 audio
d23028a8
S
501 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
502 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
503 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
504 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
505 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
506 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
507 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
508
509 # Dash webm
d23028a8
S
510 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
512 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
513 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
514 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
515 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
516 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
517 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
521 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
524 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 525 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
526 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
527 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
528 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
529 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
530 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
532
533 # Dash webm audio
d23028a8
S
534 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
535 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 536
0857baad 537 # Dash webm audio with opus inside
d23028a8
S
538 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
539 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
540 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 541
ce6b9a2d
PH
542 # RTMP (unnamed)
543 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
544
545 # av01 video only formats sometimes served with "unknown" codecs
546 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
547 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
548 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
549 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 550 }
29f7c58a 551 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 552
fd5c4aab
S
553 _GEO_BYPASS = False
554
78caa52a 555 IE_NAME = 'youtube'
2eb88d95
PH
556 _TESTS = [
557 {
2d3d2997 558 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
559 'info_dict': {
560 'id': 'BaW_jenozKc',
561 'ext': 'mp4',
3867038a 562 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
563 'uploader': 'Philipp Hagemeister',
564 'uploader_id': 'phihag',
ec85ded8 565 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
566 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
567 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 568 'upload_date': '20121002',
3867038a 569 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 570 'categories': ['Science & Technology'],
3867038a 571 'tags': ['youtube-dl'],
556dbe7f 572 'duration': 10,
dbdaaa23 573 'view_count': int,
3e7c1224
PH
574 'like_count': int,
575 'dislike_count': int,
7c80519c 576 'start_time': 1,
297a564b 577 'end_time': 9,
2eb88d95 578 }
0e853ca4 579 },
fccd3771 580 {
4bc3a23e
PH
581 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
582 'note': 'Embed-only video (#1746)',
583 'info_dict': {
584 'id': 'yZIXLfi8CZQ',
585 'ext': 'mp4',
586 'upload_date': '20120608',
587 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
588 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
589 'uploader': 'SET India',
94bfcd23 590 'uploader_id': 'setindia',
ec85ded8 591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 592 'age_limit': 18,
fccd3771
PH
593 }
594 },
11b56058 595 {
8bdd16b4 596 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
597 'note': 'Use the first video ID in the URL',
598 'info_dict': {
599 'id': 'BaW_jenozKc',
600 'ext': 'mp4',
3867038a 601 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
602 'uploader': 'Philipp Hagemeister',
603 'uploader_id': 'phihag',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 605 'upload_date': '20121002',
3867038a 606 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 607 'categories': ['Science & Technology'],
3867038a 608 'tags': ['youtube-dl'],
556dbe7f 609 'duration': 10,
dbdaaa23 610 'view_count': int,
11b56058
PM
611 'like_count': int,
612 'dislike_count': int,
34a7de29
S
613 },
614 'params': {
615 'skip_download': True,
616 },
11b56058 617 },
dd27fd17 618 {
2d3d2997 619 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
620 'note': '256k DASH audio (format 141) via DASH manifest',
621 'info_dict': {
622 'id': 'a9LDPn-MO4I',
623 'ext': 'm4a',
624 'upload_date': '20121002',
625 'uploader_id': '8KVIDEO',
ec85ded8 626 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
627 'description': '',
628 'uploader': '8KVIDEO',
629 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 630 },
4bc3a23e
PH
631 'params': {
632 'youtube_include_dash_manifest': True,
633 'format': '141',
4919603f 634 },
de3c7fe0 635 'skip': 'format 141 not served anymore',
dd27fd17 636 },
8bdd16b4 637 # DASH manifest with encrypted signature
638 {
639 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
640 'info_dict': {
641 'id': 'IB3lcPjvWLA',
642 'ext': 'm4a',
643 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
644 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
645 'duration': 244,
646 'uploader': 'AfrojackVEVO',
647 'uploader_id': 'AfrojackVEVO',
648 'upload_date': '20131011',
649 },
650 'params': {
651 'youtube_include_dash_manifest': True,
652 'format': '141/bestaudio[ext=m4a]',
653 },
654 },
aa79ac0c
PH
655 # Controversy video
656 {
657 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
658 'info_dict': {
659 'id': 'T4XJQO3qol8',
660 'ext': 'mp4',
556dbe7f 661 'duration': 219,
aa79ac0c 662 'upload_date': '20100909',
4fe54c12 663 'uploader': 'Amazing Atheist',
aa79ac0c 664 'uploader_id': 'TheAmazingAtheist',
ec85ded8 665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
666 'title': 'Burning Everyone\'s Koran',
667 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
668 }
c522adb1 669 },
dd2d55f1 670 # Normal age-gate video (embed allowed)
c522adb1 671 {
2d3d2997 672 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
673 'info_dict': {
674 'id': 'HtVdAasjOgU',
675 'ext': 'mp4',
676 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 677 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 678 'duration': 142,
c522adb1
JMF
679 'uploader': 'The Witcher',
680 'uploader_id': 'WitcherGame',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 682 'upload_date': '20140605',
34952f09 683 'age_limit': 18,
c522adb1
JMF
684 },
685 },
8bdd16b4 686 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
687 # YouTube Red ad is not captured for creator
688 {
689 'url': '__2ABJjxzNo',
690 'info_dict': {
691 'id': '__2ABJjxzNo',
692 'ext': 'mp4',
693 'duration': 266,
694 'upload_date': '20100430',
695 'uploader_id': 'deadmau5',
696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
697 'creator': 'Dada Life, deadmau5',
698 'description': 'md5:12c56784b8032162bb936a5f76d55360',
699 'uploader': 'deadmau5',
700 'title': 'Deadmau5 - Some Chords (HD)',
701 'alt_title': 'This Machine Kills Some Chords',
702 },
703 'expected_warnings': [
704 'DASH manifest missing',
705 ]
706 },
067aa17e 707 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
708 {
709 'url': 'lqQg6PlCWgI',
710 'info_dict': {
711 'id': 'lqQg6PlCWgI',
712 'ext': 'mp4',
556dbe7f 713 'duration': 6085,
90227264 714 'upload_date': '20150827',
cbe2bd91 715 'uploader_id': 'olympic',
ec85ded8 716 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 717 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 718 'uploader': 'Olympic',
cbe2bd91
PH
719 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
720 },
721 'params': {
722 'skip_download': 'requires avconv',
e52a40ab 723 }
cbe2bd91 724 },
6271f1ca
PH
725 # Non-square pixels
726 {
727 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
728 'info_dict': {
729 'id': '_b-2C3KPAM0',
730 'ext': 'mp4',
731 'stretched_ratio': 16 / 9.,
556dbe7f 732 'duration': 85,
6271f1ca
PH
733 'upload_date': '20110310',
734 'uploader_id': 'AllenMeow',
ec85ded8 735 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 736 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 737 'uploader': '孫ᄋᄅ',
6271f1ca
PH
738 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
739 },
06b491eb
S
740 },
741 # url_encoded_fmt_stream_map is empty string
742 {
743 'url': 'qEJwOuvDf7I',
744 'info_dict': {
745 'id': 'qEJwOuvDf7I',
f57b7835 746 'ext': 'webm',
06b491eb
S
747 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
748 'description': '',
749 'upload_date': '20150404',
750 'uploader_id': 'spbelect',
751 'uploader': 'Наблюдатели Петербурга',
752 },
753 'params': {
754 'skip_download': 'requires avconv',
e323cf3f
S
755 },
756 'skip': 'This live event has ended.',
06b491eb 757 },
067aa17e 758 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
759 {
760 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
761 'info_dict': {
762 'id': 'FIl7x6_3R5Y',
eb6793ba 763 'ext': 'webm',
da77d856
S
764 'title': 'md5:7b81415841e02ecd4313668cde88737a',
765 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 766 'duration': 220,
da77d856
S
767 'upload_date': '20150625',
768 'uploader_id': 'dorappi2000',
ec85ded8 769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 770 'uploader': 'dorappi2000',
eb6793ba 771 'formats': 'mincount:31',
da77d856 772 },
eb6793ba 773 'skip': 'not actual anymore',
2ee8f5d8 774 },
8a1a26ce
YCH
775 # DASH manifest with segment_list
776 {
777 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
778 'md5': '8ce563a1d667b599d21064e982ab9e31',
779 'info_dict': {
780 'id': 'CsmdDsKjzN8',
781 'ext': 'mp4',
17ee98e1 782 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
783 'uploader': 'Airtek',
784 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
785 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
786 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
787 },
788 'params': {
789 'youtube_include_dash_manifest': True,
790 'format': '135', # bestvideo
be49068d
S
791 },
792 'skip': 'This live event has ended.',
2ee8f5d8 793 },
cf7e015f
S
794 {
795 # Multifeed videos (multiple cameras), URL is for Main Camera
796 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
797 'info_dict': {
798 'id': 'jqWvoWXjCVs',
799 'title': 'teamPGP: Rocket League Noob Stream',
800 'description': 'md5:dc7872fb300e143831327f1bae3af010',
801 },
802 'playlist': [{
803 'info_dict': {
804 'id': 'jqWvoWXjCVs',
805 'ext': 'mp4',
806 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
807 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 808 'duration': 7335,
cf7e015f
S
809 'upload_date': '20150721',
810 'uploader': 'Beer Games Beer',
811 'uploader_id': 'beergamesbeer',
ec85ded8 812 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 813 'license': 'Standard YouTube License',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
817 'id': '6h8e8xoXJzg',
818 'ext': 'mp4',
819 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
820 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 821 'duration': 7337,
cf7e015f
S
822 'upload_date': '20150721',
823 'uploader': 'Beer Games Beer',
824 'uploader_id': 'beergamesbeer',
ec85ded8 825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 826 'license': 'Standard YouTube License',
cf7e015f
S
827 },
828 }, {
829 'info_dict': {
830 'id': 'PUOgX5z9xZw',
831 'ext': 'mp4',
832 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
833 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 834 'duration': 7337,
cf7e015f
S
835 'upload_date': '20150721',
836 'uploader': 'Beer Games Beer',
837 'uploader_id': 'beergamesbeer',
ec85ded8 838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 839 'license': 'Standard YouTube License',
cf7e015f
S
840 },
841 }, {
842 'info_dict': {
843 'id': 'teuwxikvS5k',
844 'ext': 'mp4',
845 'title': 'teamPGP: Rocket League Noob Stream (zim)',
846 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 847 'duration': 7334,
cf7e015f
S
848 'upload_date': '20150721',
849 'uploader': 'Beer Games Beer',
850 'uploader_id': 'beergamesbeer',
ec85ded8 851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 852 'license': 'Standard YouTube License',
cf7e015f
S
853 },
854 }],
855 'params': {
856 'skip_download': True,
857 },
4fe54c12 858 'skip': 'This video is not available.',
cbaed4bb 859 },
f9f49d87 860 {
067aa17e 861 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
862 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
863 'info_dict': {
864 'id': 'gVfLd0zydlo',
865 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
866 },
867 'playlist_count': 2,
be49068d 868 'skip': 'Not multifeed anymore',
f9f49d87 869 },
cbaed4bb 870 {
2d3d2997 871 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 872 'only_matching': True,
0e49d9a6 873 },
6d4fc66b 874 {
2d3d2997 875 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
876 'only_matching': True,
877 },
0e49d9a6 878 {
067aa17e 879 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 880 # Also tests cut-off URL expansion in video description (see
067aa17e
S
881 # https://github.com/ytdl-org/youtube-dl/issues/1892,
882 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
883 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
884 'info_dict': {
885 'id': 'lsguqyKfVQg',
886 'ext': 'mp4',
887 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 888 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 889 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 890 'duration': 133,
0e49d9a6
LL
891 'upload_date': '20151119',
892 'uploader_id': 'IronSoulElf',
ec85ded8 893 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 894 'uploader': 'IronSoulElf',
eb6793ba
S
895 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
896 'track': 'Dark Walk - Position Music',
897 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 898 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
899 },
900 'params': {
901 'skip_download': True,
902 },
903 },
61f92af1 904 {
067aa17e 905 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
906 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
907 'only_matching': True,
908 },
313dfc45
LL
909 {
910 # Video with yt:stretch=17:0
911 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
912 'info_dict': {
913 'id': 'Q39EVAstoRM',
914 'ext': 'mp4',
915 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
916 'description': 'md5:ee18a25c350637c8faff806845bddee9',
917 'upload_date': '20151107',
918 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
919 'uploader': 'CH GAMER DROID',
920 },
921 'params': {
922 'skip_download': True,
923 },
be49068d 924 'skip': 'This video does not exist.',
313dfc45 925 },
7caf9830
S
926 {
927 # Video licensed under Creative Commons
928 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
929 'info_dict': {
930 'id': 'M4gD1WSo5mA',
931 'ext': 'mp4',
932 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
933 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 934 'duration': 721,
7caf9830
S
935 'upload_date': '20150127',
936 'uploader_id': 'BerkmanCenter',
ec85ded8 937 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 938 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
939 'license': 'Creative Commons Attribution license (reuse allowed)',
940 },
941 'params': {
942 'skip_download': True,
943 },
944 },
fd050249
S
945 {
946 # Channel-like uploader_url
947 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
948 'info_dict': {
949 'id': 'eQcmzGIKrzg',
950 'ext': 'mp4',
951 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
952 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 953 'duration': 4060,
fd050249 954 'upload_date': '20151119',
eb6793ba 955 'uploader': 'Bernie Sanders',
fd050249 956 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
958 'license': 'Creative Commons Attribution license (reuse allowed)',
959 },
960 'params': {
961 'skip_download': True,
962 },
963 },
040ac686
S
964 {
965 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
966 'only_matching': True,
7f29cf54
S
967 },
968 {
067aa17e 969 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
970 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
971 'only_matching': True,
6496ccb4
S
972 },
973 {
974 # Rental video preview
975 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
976 'info_dict': {
977 'id': 'uGpuVWrhIzE',
978 'ext': 'mp4',
979 'title': 'Piku - Trailer',
980 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
981 'upload_date': '20150811',
982 'uploader': 'FlixMatrix',
983 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
985 'license': 'Standard YouTube License',
986 },
987 'params': {
988 'skip_download': True,
989 },
eb6793ba 990 'skip': 'This video is not available.',
022a5d66 991 },
12afdc2a
S
992 {
993 # YouTube Red video with episode data
994 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
995 'info_dict': {
996 'id': 'iqKdEhx-dD4',
997 'ext': 'mp4',
998 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 999 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1000 'duration': 2085,
12afdc2a
S
1001 'upload_date': '20170118',
1002 'uploader': 'Vsauce',
1003 'uploader_id': 'Vsauce',
1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1005 'series': 'Mind Field',
1006 'season_number': 1,
1007 'episode_number': 1,
1008 },
1009 'params': {
1010 'skip_download': True,
1011 },
1012 'expected_warnings': [
1013 'Skipping DASH manifest',
1014 ],
1015 },
c7121fa7
S
1016 {
1017 # The following content has been identified by the YouTube community
1018 # as inappropriate or offensive to some audiences.
1019 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1020 'info_dict': {
1021 'id': '6SJNVb0GnPI',
1022 'ext': 'mp4',
1023 'title': 'Race Differences in Intelligence',
1024 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1025 'duration': 965,
1026 'upload_date': '20140124',
1027 'uploader': 'New Century Foundation',
1028 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1030 },
1031 'params': {
1032 'skip_download': True,
1033 },
1034 },
022a5d66
S
1035 {
1036 # itag 212
1037 'url': '1t24XAntNCY',
1038 'only_matching': True,
fd5c4aab
S
1039 },
1040 {
1041 # geo restricted to JP
1042 'url': 'sJL6WA-aGkQ',
1043 'only_matching': True,
1044 },
cd5a74a2
S
1045 {
1046 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1047 'only_matching': True,
1048 },
825cd268
RA
1049 {
1050 # DRM protected
1051 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1052 'only_matching': True,
4fe54c12
S
1053 },
1054 {
1055 # Video with unsupported adaptive stream type formats
1056 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1057 'info_dict': {
1058 'id': 'Z4Vy8R84T1U',
1059 'ext': 'mp4',
1060 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1061 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1062 'duration': 433,
1063 'upload_date': '20130923',
1064 'uploader': 'Amelia Putri Harwita',
1065 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1066 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1067 'formats': 'maxcount:10',
1068 },
1069 'params': {
1070 'skip_download': True,
1071 'youtube_include_dash_manifest': False,
1072 },
5429d6a9 1073 'skip': 'not actual anymore',
5caabd3c 1074 },
1075 {
822b9d9c 1076 # Youtube Music Auto-generated description
5caabd3c 1077 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1078 'info_dict': {
1079 'id': 'MgNrAu2pzNs',
1080 'ext': 'mp4',
1081 'title': 'Voyeur Girl',
1082 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1083 'upload_date': '20190312',
5429d6a9
S
1084 'uploader': 'Stephen - Topic',
1085 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1086 'artist': 'Stephen',
1087 'track': 'Voyeur Girl',
1088 'album': 'it\'s too much love to know my dear',
1089 'release_date': '20190313',
1090 'release_year': 2019,
1091 },
1092 'params': {
1093 'skip_download': True,
1094 },
1095 },
66b48727
RA
1096 {
1097 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1098 'only_matching': True,
1099 },
011e75e6
S
1100 {
1101 # invalid -> valid video id redirection
1102 'url': 'DJztXj2GPfl',
1103 'info_dict': {
1104 'id': 'DJztXj2GPfk',
1105 'ext': 'mp4',
1106 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1107 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1108 'upload_date': '20090125',
1109 'uploader': 'Prochorowka',
1110 'uploader_id': 'Prochorowka',
1111 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1112 'artist': 'Panjabi MC',
1113 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1114 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1115 },
1116 'params': {
1117 'skip_download': True,
1118 },
ea74e00b
DP
1119 },
1120 {
1121 # empty description results in an empty string
1122 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1123 'info_dict': {
1124 'id': 'x41yOUIvK2k',
1125 'ext': 'mp4',
1126 'title': 'IMG 3456',
1127 'description': '',
1128 'upload_date': '20170613',
1129 'uploader_id': 'ElevageOrVert',
1130 'uploader': 'ElevageOrVert',
1131 },
1132 'params': {
1133 'skip_download': True,
1134 },
1135 },
a0566bbf 1136 {
29f7c58a 1137 # with '};' inside yt initial data (see [1])
1138 # see [2] for an example with '};' inside ytInitialPlayerResponse
1139 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1140 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1141 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1142 'info_dict': {
1143 'id': 'CHqg6qOn4no',
1144 'ext': 'mp4',
1145 'title': 'Part 77 Sort a list of simple types in c#',
1146 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1147 'upload_date': '20130831',
1148 'uploader_id': 'kudvenkat',
1149 'uploader': 'kudvenkat',
1150 },
1151 'params': {
1152 'skip_download': True,
1153 },
1154 },
29f7c58a 1155 {
1156 # another example of '};' in ytInitialData
1157 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1158 'only_matching': True,
1159 },
1160 {
1161 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1162 'only_matching': True,
1163 },
2eb88d95
PH
1164 ]
1165
e0df6211
PH
1166 def __init__(self, *args, **kwargs):
1167 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1168 self._player_cache = {}
e0df6211 1169
c5e8d7af
PH
1170 def report_video_info_webpage_download(self, video_id):
1171 """Report attempt to download video info webpage."""
69ea8ca4 1172 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1173
c5e8d7af
PH
1174 def report_information_extraction(self, video_id):
1175 """Report attempt to extract video information."""
69ea8ca4 1176 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1177
1178 def report_unavailable_format(self, video_id, format):
1179 """Report extracted video URL."""
69ea8ca4 1180 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1181
1182 def report_rtmp_download(self):
1183 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1184 self.to_screen('RTMP download detected')
c5e8d7af 1185
60064c53
PH
1186 def _signature_cache_id(self, example_sig):
1187 """ Return a string representation of a signature """
78caa52a 1188 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1189
e40c758c
S
1190 @classmethod
1191 def _extract_player_info(cls, player_url):
1192 for player_re in cls._PLAYER_INFO_RE:
1193 id_m = re.search(player_re, player_url)
1194 if id_m:
1195 break
1196 else:
c081b35c 1197 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1198 return id_m.group('ext'), id_m.group('id')
1199
1200 def _extract_signature_function(self, video_id, player_url, example_sig):
1201 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1202
c4417ddb 1203 # Read from filesystem cache
60064c53
PH
1204 func_id = '%s_%s_%s' % (
1205 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1206 assert os.path.basename(func_id) == func_id
a0e07d31 1207
69ea8ca4 1208 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1209 if cache_spec is not None:
78caa52a 1210 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1211
6d1a55a5
PH
1212 download_note = (
1213 'Downloading player %s' % player_url
1214 if self._downloader.params.get('verbose') else
1215 'Downloading %s player %s' % (player_type, player_id)
1216 )
e0df6211
PH
1217 if player_type == 'js':
1218 code = self._download_webpage(
1219 player_url, video_id,
6d1a55a5 1220 note=download_note,
69ea8ca4 1221 errnote='Download of %s failed' % player_url)
83799698 1222 res = self._parse_sig_js(code)
c4417ddb 1223 elif player_type == 'swf':
e0df6211
PH
1224 urlh = self._request_webpage(
1225 player_url, video_id,
6d1a55a5 1226 note=download_note,
69ea8ca4 1227 errnote='Download of %s failed' % player_url)
e0df6211 1228 code = urlh.read()
83799698 1229 res = self._parse_sig_swf(code)
e0df6211
PH
1230 else:
1231 assert False, 'Invalid player type %r' % player_type
1232
785521bf
PH
1233 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1234 cache_res = res(test_string)
1235 cache_spec = [ord(c) for c in cache_res]
83799698 1236
69ea8ca4 1237 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1238 return res
1239
60064c53 1240 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1241 def gen_sig_code(idxs):
1242 def _genslice(start, end, step):
78caa52a 1243 starts = '' if start == 0 else str(start)
8bcc8756 1244 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1245 steps = '' if step == 1 else (':%d' % step)
78caa52a 1246 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1247
1248 step = None
7af808a5
PH
1249 # Quelch pyflakes warnings - start will be set when step is set
1250 start = '(Never used)'
edf3e38e
PH
1251 for i, prev in zip(idxs[1:], idxs[:-1]):
1252 if step is not None:
1253 if i - prev == step:
1254 continue
1255 yield _genslice(start, prev, step)
1256 step = None
1257 continue
1258 if i - prev in [-1, 1]:
1259 step = i - prev
1260 start = prev
1261 continue
1262 else:
78caa52a 1263 yield 's[%d]' % prev
edf3e38e 1264 if step is None:
78caa52a 1265 yield 's[%d]' % i
edf3e38e
PH
1266 else:
1267 yield _genslice(start, i, step)
1268
78caa52a 1269 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1270 cache_res = func(test_string)
edf3e38e 1271 cache_spec = [ord(c) for c in cache_res]
78caa52a 1272 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1273 signature_id_tuple = '(%s)' % (
1274 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1275 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1276 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1277 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1278
e0df6211
PH
1279 def _parse_sig_js(self, jscode):
1280 funcname = self._search_regex(
abefc03f
S
1281 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1283 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1284 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1285 # Obsolete patterns
1286 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1287 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1288 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1289 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1290 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1291 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1292 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1293 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1294 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1295
1296 jsi = JSInterpreter(jscode)
1297 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1298 return lambda s: initial_function([s])
1299
1300 def _parse_sig_swf(self, file_contents):
54256267 1301 swfi = SWFInterpreter(file_contents)
78caa52a 1302 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1303 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1304 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1305 return lambda s: initial_function([s])
1306
83799698 1307 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1308 """Turn the encrypted s field into a working signature"""
6b37f0be 1309
c8bf86d5 1310 if player_url is None:
69ea8ca4 1311 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1312
69ea8ca4 1313 if player_url.startswith('//'):
78caa52a 1314 player_url = 'https:' + player_url
3c90cc8b
S
1315 elif not re.match(r'https?://', player_url):
1316 player_url = compat_urlparse.urljoin(
1317 'https://www.youtube.com', player_url)
c8bf86d5 1318 try:
62af3a0e 1319 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1320 if player_id not in self._player_cache:
1321 func = self._extract_signature_function(
60064c53 1322 video_id, player_url, s
c8bf86d5
PH
1323 )
1324 self._player_cache[player_id] = func
1325 func = self._player_cache[player_id]
1326 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1327 self._print_sig_code(func, s)
c8bf86d5
PH
1328 return func(s)
1329 except Exception as e:
1330 tb = traceback.format_exc()
1331 raise ExtractorError(
78caa52a 1332 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1333
f96f5dda 1334 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1335 try:
60e47a26 1336 subs_doc = self._download_xml(
38c2e5b8 1337 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1338 video_id, note=False)
1339 except ExtractorError as err:
9b9c5355 1340 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1341 return {}
de7f3446
JMF
1342
1343 sub_lang_list = {}
60e47a26
JMF
1344 for track in subs_doc.findall('track'):
1345 lang = track.attrib['lang_code']
7e660ac1
LD
1346 if lang in sub_lang_list:
1347 continue
360e1ca5 1348 sub_formats = []
23d17e4b 1349 for ext in self._SUBTITLE_FORMATS:
15707c7e 1350 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1351 'lang': lang,
1352 'v': video_id,
1353 'fmt': ext,
1354 'name': track.attrib['name'].encode('utf-8'),
1355 })
1356 sub_formats.append({
1357 'url': 'https://www.youtube.com/api/timedtext?' + params,
1358 'ext': ext,
1359 })
1360 sub_lang_list[lang] = sub_formats
9f448fcb 1361 if has_live_chat_replay:
321bf820 1362 sub_lang_list['live_chat'] = [
1363 {
1364 'video_id': video_id,
1365 'ext': 'json',
1366 'protocol': 'youtube_live_chat_replay',
1367 },
9f448fcb 1368 ]
de7f3446 1369 if not sub_lang_list:
69ea8ca4 1370 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1371 return {}
1372 return sub_lang_list
1373
a72778d3
S
1374 def _get_ytplayer_config(self, video_id, webpage):
1375 patterns = (
526b3b07
S
1376 # User data may contain arbitrary character sequences that may affect
1377 # JSON extraction with regex, e.g. when '};' is contained the second
1378 # regex won't capture the whole JSON. Yet working around by trying more
1379 # concrete regex first keeping in mind proper quoted string handling
1380 # to be implemented in future that will replace this workaround (see
067aa17e
S
1381 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1382 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1383 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1384 r';ytplayer\.config\s*=\s*({.+?});',
1385 )
1386 config = self._search_regex(
1387 patterns, webpage, 'ytplayer.config', default=None)
1388 if config:
1389 return self._parse_json(
1390 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1391
29f7c58a 1392 def _get_automatic_captions(self, video_id, player_response, player_config):
de7f3446
JMF
1393 """We need the webpage for getting the captions url, pass it as an
1394 argument to speed up the process."""
69ea8ca4 1395 self.to_screen('%s: Looking for automatic captions' % video_id)
78caa52a 1396 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
29f7c58a 1397 if not (player_response or player_config):
de7f3446
JMF
1398 self._downloader.report_warning(err_msg)
1399 return {}
de7f3446 1400 try:
29f7c58a 1401 args = player_config.get('args') if player_config else {}
8bdd16b4 1402 caption_url = args.get('ttsurl')
1403 if caption_url:
b78b292f
S
1404 timestamp = args['timestamp']
1405 # We get the available subtitles
15707c7e 1406 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1407 'type': 'list',
1408 'tlangs': 1,
1409 'asrs': 1,
1410 })
1411 list_url = caption_url + '&' + list_params
1412 caption_list = self._download_xml(list_url, video_id)
1413 original_lang_node = caption_list.find('track')
1414 if original_lang_node is None:
1415 self._downloader.report_warning('Video doesn\'t have automatic captions')
1416 return {}
1417 original_lang = original_lang_node.attrib['lang_code']
1418 caption_kind = original_lang_node.attrib.get('kind', '')
1419
1420 sub_lang_list = {}
1421 for lang_node in caption_list.findall('target'):
1422 sub_lang = lang_node.attrib['lang_code']
1423 sub_formats = []
1424 for ext in self._SUBTITLE_FORMATS:
15707c7e 1425 params = compat_urllib_parse_urlencode({
b78b292f
S
1426 'lang': original_lang,
1427 'tlang': sub_lang,
1428 'fmt': ext,
1429 'ts': timestamp,
1430 'kind': caption_kind,
1431 })
1432 sub_formats.append({
1433 'url': caption_url + '&' + params,
1434 'ext': ext,
1435 })
1436 sub_lang_list[sub_lang] = sub_formats
1437 return sub_lang_list
1438
ddbb4c5c
S
1439 def make_captions(sub_url, sub_langs):
1440 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1441 caption_qs = compat_parse_qs(parsed_sub_url.query)
1442 captions = {}
1443 for sub_lang in sub_langs:
1444 sub_formats = []
1445 for ext in self._SUBTITLE_FORMATS:
1446 caption_qs.update({
1447 'tlang': [sub_lang],
1448 'fmt': [ext],
1449 })
1450 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1451 query=compat_urllib_parse_urlencode(caption_qs, True)))
1452 sub_formats.append({
1453 'url': sub_url,
1454 'ext': ext,
1455 })
1456 captions[sub_lang] = sub_formats
1457 return captions
1458
1459 # New captions format as of 22.06.2017
29f7c58a 1460 if player_response:
1461 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1462 base_url = renderer['captionTracks'][0]['baseUrl']
1463 sub_lang_list = []
1464 for lang in renderer['translationLanguages']:
1465 lang_code = lang.get('languageCode')
1466 if lang_code:
1467 sub_lang_list.append(lang_code)
1468 return make_captions(base_url, sub_lang_list)
59c5fa91 1469
8bdd16b4 1470 # Some videos don't provide ttsurl but rather caption_tracks and
1471 # caption_translation_languages (e.g. 20LmZk1hakA)
1472 # Does not used anymore as of 22.06.2017
1473 caption_tracks = args['caption_tracks']
1474 caption_translation_languages = args['caption_translation_languages']
1475 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1476 sub_lang_list = []
1477 for lang in caption_translation_languages.split(','):
1478 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1479 sub_lang = lang_qs.get('lc', [None])[0]
1480 if sub_lang:
1481 sub_lang_list.append(sub_lang)
1482 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1483 # An extractor error can be raise by the download process if there are
1484 # no automatic captions but there are subtitles
ddbb4c5c 1485 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1486 self._downloader.report_warning(err_msg)
1487 return {}
1488
21c340b8
S
1489 def _mark_watched(self, video_id, video_info, player_response):
1490 playback_url = url_or_none(try_get(
1491 player_response,
1492 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1493 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1494 if not playback_url:
1495 return
1496 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1497 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1498
1499 # cpn generation algorithm is reverse engineered from base.js.
1500 # In fact it works even with dummy cpn.
1501 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1502 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1503
1504 qs.update({
1505 'ver': ['2'],
1506 'cpn': [cpn],
1507 })
1508 playback_url = compat_urlparse.urlunparse(
15707c7e 1509 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1510
1511 self._download_webpage(
1512 playback_url, video_id, 'Marking watched',
1513 'Unable to mark watched', fatal=False)
1514
66c9fa36
S
1515 @staticmethod
1516 def _extract_urls(webpage):
1517 # Embedded YouTube player
1518 entries = [
1519 unescapeHTML(mobj.group('url'))
1520 for mobj in re.finditer(r'''(?x)
1521 (?:
1522 <iframe[^>]+?src=|
1523 data-video-url=|
1524 <embed[^>]+?src=|
1525 embedSWF\(?:\s*|
1526 <object[^>]+data=|
1527 new\s+SWFObject\(
1528 )
1529 (["\'])
1530 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1531 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1532 \1''', webpage)]
1533
1534 # lazyYT YouTube embed
1535 entries.extend(list(map(
1536 unescapeHTML,
1537 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1538
1539 # Wordpress "YouTube Video Importer" plugin
1540 matches = re.findall(r'''(?x)<div[^>]+
1541 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1542 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1543 entries.extend(m[-1] for m in matches)
1544
1545 return entries
1546
1547 @staticmethod
1548 def _extract_url(webpage):
1549 urls = YoutubeIE._extract_urls(webpage)
1550 return urls[0] if urls else None
1551
97665381
PH
1552 @classmethod
1553 def extract_id(cls, url):
1554 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1555 if mobj is None:
69ea8ca4 1556 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1557 video_id = mobj.group(2)
1558 return video_id
1559
84213ea8
S
1560 def _extract_chapters_from_json(self, webpage, video_id, duration):
1561 if not webpage:
1562 return
8bdd16b4 1563 data = self._extract_yt_initial_data(video_id, webpage)
1564 if not data or not isinstance(data, dict):
84213ea8
S
1565 return
1566 chapters_list = try_get(
8bdd16b4 1567 data,
84213ea8
S
1568 lambda x: x['playerOverlays']
1569 ['playerOverlayRenderer']
1570 ['decoratedPlayerBarRenderer']
1571 ['decoratedPlayerBarRenderer']
1572 ['playerBar']
1573 ['chapteredPlayerBarRenderer']
1574 ['chapters'],
1575 list)
1576 if not chapters_list:
1577 return
1578
1579 def chapter_time(chapter):
1580 return float_or_none(
1581 try_get(
1582 chapter,
1583 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1584 int),
1585 scale=1000)
1586 chapters = []
1587 for next_num, chapter in enumerate(chapters_list, start=1):
1588 start_time = chapter_time(chapter)
1589 if start_time is None:
1590 continue
1591 end_time = (chapter_time(chapters_list[next_num])
1592 if next_num < len(chapters_list) else duration)
1593 if end_time is None:
1594 continue
1595 title = try_get(
1596 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1597 compat_str)
1598 chapters.append({
1599 'start_time': start_time,
1600 'end_time': end_time,
1601 'title': title,
1602 })
1603 return chapters
1604
9cafc3fd 1605 @staticmethod
84213ea8 1606 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1607 if not description:
1608 return None
1609 chapter_lines = re.findall(
1610 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1611 description)
1612 if not chapter_lines:
1613 return None
1614 chapters = []
1615 for next_num, (chapter_line, time_point) in enumerate(
1616 chapter_lines, start=1):
1617 start_time = parse_duration(time_point)
1618 if start_time is None:
1619 continue
39d4c1be
S
1620 if start_time > duration:
1621 break
9cafc3fd
S
1622 end_time = (duration if next_num == len(chapter_lines)
1623 else parse_duration(chapter_lines[next_num][1]))
1624 if end_time is None:
1625 continue
39d4c1be
S
1626 if end_time > duration:
1627 end_time = duration
1628 if start_time > end_time:
1629 break
9cafc3fd
S
1630 chapter_title = re.sub(
1631 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1632 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1633 chapters.append({
1634 'start_time': start_time,
1635 'end_time': end_time,
1636 'title': chapter_title,
1637 })
1638 return chapters
1639
84213ea8
S
1640 def _extract_chapters(self, webpage, description, video_id, duration):
1641 return (self._extract_chapters_from_json(webpage, video_id, duration)
1642 or self._extract_chapters_from_description(description, duration))
1643
c5e8d7af 1644 def _real_extract(self, url):
cf7e015f
S
1645 url, smuggled_data = unsmuggle_url(url, {})
1646
7e8c0af0 1647 proto = (
78caa52a
PH
1648 'http' if self._downloader.params.get('prefer_insecure', False)
1649 else 'https')
7e8c0af0 1650
7c80519c 1651 start_time = None
297a564b 1652 end_time = None
7c80519c
JMF
1653 parsed_url = compat_urllib_parse_urlparse(url)
1654 for component in [parsed_url.fragment, parsed_url.query]:
1655 query = compat_parse_qs(component)
297a564b 1656 if start_time is None and 't' in query:
7c80519c 1657 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1658 if start_time is None and 'start' in query:
1659 start_time = parse_duration(query['start'][0])
297a564b
JMF
1660 if end_time is None and 'end' in query:
1661 end_time = parse_duration(query['end'][0])
7c80519c 1662
c5e8d7af
PH
1663 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1664 mobj = re.search(self._NEXT_URL_RE, url)
1665 if mobj:
7fd002c0 1666 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1667 video_id = self.extract_id(url)
c5e8d7af
PH
1668
1669 # Get video webpage
aa79ac0c 1670 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1671 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1672
1673 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1674 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1675
1676 # Attempt to extract SWF player URL
e0df6211 1677 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1678 if mobj is not None:
1679 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1680 else:
1681 player_url = None
1682
d8d24a92
S
1683 dash_mpds = []
1684
1685 def add_dash_mpd(video_info):
1686 dash_mpd = video_info.get('dashmpd')
1687 if dash_mpd and dash_mpd[0] not in dash_mpds:
1688 dash_mpds.append(dash_mpd[0])
1689
561b456e
S
1690 def add_dash_mpd_pr(pl_response):
1691 dash_mpd = url_or_none(try_get(
1692 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1693 compat_str))
1694 if dash_mpd and dash_mpd not in dash_mpds:
1695 dash_mpds.append(dash_mpd)
1696
c7121fa7
S
1697 is_live = None
1698 view_count = None
1699
1700 def extract_view_count(v_info):
1701 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1702
c2d125d9
S
1703 def extract_player_response(player_response, video_id):
1704 pl_response = str_or_none(player_response)
1705 if not pl_response:
1706 return
1707 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1708 if isinstance(pl_response, dict):
1709 add_dash_mpd_pr(pl_response)
1710 return pl_response
1711
fb2c9277
U
1712 def extract_embedded_config(embed_webpage, video_id):
1713 embedded_config = self._search_regex(
1714 r'setConfig\(({.*})\);',
1715 embed_webpage, 'ytInitialData', default=None)
1716 if embedded_config:
1717 return embedded_config
1718
62d80ba1 1719 video_info = {}
dbdaaa23 1720 player_response = {}
62d80ba1 1721 ytplayer_config = None
1722 embed_webpage = None
dbdaaa23 1723
c5e8d7af 1724 # Get video info
39e7107d
U
1725 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1726 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1727 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1728 age_gate = True
1729 # We simulate the access to the video from www.youtube.com/v/{video_id}
1730 # this can be viewed without login into Youtube
beb95e77
CL
1731 url = proto + '://www.youtube.com/embed/%s' % video_id
1732 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
fb2c9277
U
1733 ext = extract_embedded_config(embed_webpage, video_id)
1734 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1735 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1736 if not playable_in_embed:
1737 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1738 playable_in_embed = ''
1739 else:
1740 playable_in_embed = playable_in_embed.group('playableinEmbed')
1741 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1742 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1743 if playable_in_embed == 'false':
c73baf23
U
1744 '''
1745 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1746 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1747 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1748 '''
1749 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1750 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1751 age_gate = False
1752 # Try looking directly into the video webpage
1753 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1754 if ytplayer_config:
59c5fa91
PO
1755 args = ytplayer_config.get("args")
1756 if args is not None:
1757 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1758 # Convert to the same format returned by compat_parse_qs
1759 video_info = dict((k, [v]) for k, v in args.items())
1760 add_dash_mpd(video_info)
1761 # Rental video is not rented but preview is available (e.g.
1762 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1763 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1764 if not video_info and args.get('ypc_vid'):
1765 return self.url_result(
1766 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1767 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1768 is_live = True
1769 if not player_response:
1770 player_response = extract_player_response(args.get('player_response'), video_id)
1771 elif not player_response:
1772 player_response = ytplayer_config
4bb9c880
U
1773 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1774 add_dash_mpd_pr(player_response)
9d9314cb
U
1775 else:
1776 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1777 else:
1778 data = compat_urllib_parse_urlencode({
1779 'video_id': video_id,
1780 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1781 'sts': self._search_regex(
1782 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1783 })
1784 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1785 try:
1786 video_info_webpage = self._download_webpage(
1787 video_info_url, video_id,
1788 note='Refetching age-gated info webpage',
1789 errnote='unable to download video info webpage')
1790 except ExtractorError:
1791 video_info_webpage = None
1792 if video_info_webpage:
1793 video_info = compat_parse_qs(video_info_webpage)
1794 pl_response = video_info.get('player_response', [None])[0]
1795 player_response = extract_player_response(pl_response, video_id)
1796 add_dash_mpd(video_info)
1797 view_count = extract_view_count(video_info)
c108eb73
JMF
1798 else:
1799 age_gate = False
d8d24a92 1800 # Try looking directly into the video webpage
a72778d3 1801 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
8bdd16b4 1802 if ytplayer_config:
1803 args = ytplayer_config.get('args', {})
4c76aa06 1804 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1805 # Convert to the same format returned by compat_parse_qs
1806 video_info = dict((k, [v]) for k, v in args.items())
1807 add_dash_mpd(video_info)
6496ccb4
S
1808 # Rental video is not rented but preview is available (e.g.
1809 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1810 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1811 if not video_info and args.get('ypc_vid'):
1812 return self.url_result(
1813 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1814 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1815 is_live = True
dbdaaa23 1816 if not player_response:
c2d125d9 1817 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1818 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1819 add_dash_mpd_pr(player_response)
bbb7c3f7 1820
8bdd16b4 1821 if not video_info and not player_response:
1822 player_response = extract_player_response(
1823 self._search_regex(
29f7c58a 1824 (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1825 self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
8bdd16b4 1826 'initial player response', default='{}'),
1827 video_id)
1828
bbb7c3f7 1829 def extract_unavailable_message():
0add33ab
S
1830 messages = []
1831 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1832 msg = self._html_search_regex(
1833 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1834 video_webpage, 'unavailable %s' % kind, default=None)
1835 if msg:
1836 messages.append(msg)
1837 if messages:
1838 return '\n'.join(messages)
bbb7c3f7 1839
f93abcf1 1840 if not video_info and not player_response:
15be3eb5
RA
1841 unavailable_message = extract_unavailable_message()
1842 if not unavailable_message:
1843 unavailable_message = 'Unable to extract video data'
1844 raise ExtractorError(
1845 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1846
f93abcf1
S
1847 if not isinstance(video_info, dict):
1848 video_info = {}
1849
5ac23244 1850 playable_in_embed = try_get(
1851 player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1852
dbdaaa23
S
1853 video_details = try_get(
1854 player_response, lambda x: x['videoDetails'], dict) or {}
1855
37357d21
S
1856 microformat = try_get(
1857 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1858
8dbf751a
RA
1859 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1860 if not video_title:
cf7e015f
S
1861 self._downloader.report_warning('Unable to extract video title')
1862 video_title = '_'
1863
9cafc3fd 1864 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1865 if video_description:
fa4bc6e7
RA
1866
1867 def replace_url(m):
1868 redir_url = compat_urlparse.urljoin(url, m.group(1))
1869 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1870 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1871 qs = compat_parse_qs(parsed_redir_url.query)
1872 q = qs.get('q')
1873 if q and q[0]:
1874 return q[0]
1875 return redir_url
1876
9cafc3fd 1877 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1878 <a\s+
25cb7a0e 1879 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1880 (?:title|href)="([^"]+)"\s+
25cb7a0e 1881 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1882 class="[^"]*"[^>]*>
23f13e97 1883 [^<]+\.{3}\s*
cf7e015f 1884 </a>
fa4bc6e7 1885 ''', replace_url, video_description)
cf7e015f
S
1886 video_description = clean_html(video_description)
1887 else:
ea74e00b
DP
1888 video_description = video_details.get('shortDescription')
1889 if video_description is None:
1890 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 1891
8fe10494 1892 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1893 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1894 multifeed_metadata_list = try_get(
1895 player_response,
1896 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1897 compat_str) or try_get(
1898 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1899 if multifeed_metadata_list:
1900 entries = []
1901 feed_ids = []
1902 for feed in multifeed_metadata_list.split(','):
1903 # Unquote should take place before split on comma (,) since textual
1904 # fields may contain comma as well (see
067aa17e 1905 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1906 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1907
1908 def feed_entry(name):
1909 return try_get(feed_data, lambda x: x[name][0], compat_str)
1910
1911 feed_id = feed_entry('id')
1912 if not feed_id:
1913 continue
1914 feed_title = feed_entry('title')
1915 title = video_title
1916 if feed_title:
1917 title += ' (%s)' % feed_title
8fe10494
S
1918 entries.append({
1919 '_type': 'url_transparent',
1920 'ie_key': 'Youtube',
1921 'url': smuggle_url(
1922 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1923 {'force_singlefeed': True}),
6b09401b 1924 'title': title,
8fe10494 1925 })
6b09401b 1926 feed_ids.append(feed_id)
8fe10494
S
1927 self.to_screen(
1928 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1929 % (', '.join(feed_ids), video_id))
1930 return self.playlist_result(entries, video_id, video_title, video_description)
1931 else:
1932 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1933
c7121fa7 1934 if view_count is None:
1c9c8de2 1935 view_count = extract_view_count(video_info)
dbdaaa23
S
1936 if view_count is None and video_details:
1937 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
1938 if view_count is None and microformat:
1939 view_count = int_or_none(microformat.get('viewCount'))
1d699755 1940
27019dbb 1941 if is_live is None:
898238e9 1942 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1943
321bf820 1944 has_live_chat_replay = False
f0f76a33 1945 if not is_live:
321bf820 1946 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1947 try:
1948 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1949 has_live_chat_replay = True
f0f76a33 1950 except (KeyError, IndexError, TypeError):
321bf820 1951 pass
1952
c5e8d7af
PH
1953 # Check for "rental" videos
1954 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1955 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1956
c63ca0ee
S
1957 def _extract_filesize(media_url):
1958 return int_or_none(self._search_regex(
1959 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1960
bf1317d2
S
1961 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1962 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1963
c5e8d7af
PH
1964 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1965 self.report_rtmp_download()
dd27fd17
PH
1966 formats = [{
1967 'format_id': '_rtmp',
1968 'protocol': 'rtmp',
1969 'url': video_info['conn'][0],
1970 'player_url': player_url,
1971 }]
bf1317d2 1972 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1973 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1974 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1975 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1976 formats = []
3318832e 1977 formats_spec = {}
82156fdb 1978 fmt_list = video_info.get('fmt_list', [''])[0]
1979 if fmt_list:
1980 for fmt in fmt_list.split(','):
1981 spec = fmt.split('/')
3318832e 1982 if len(spec) > 1:
1983 width_height = spec[1].split('x')
1984 if len(width_height) == 2:
1985 formats_spec[spec[0]] = {
1986 'resolution': spec[1],
1987 'width': int_or_none(width_height[0]),
1988 'height': int_or_none(width_height[1]),
1989 }
bf1317d2
S
1990 for fmt in streaming_formats:
1991 itag = str_or_none(fmt.get('itag'))
1992 if not itag:
201e9eaa 1993 continue
bf1317d2
S
1994 quality = fmt.get('quality')
1995 quality_label = fmt.get('qualityLabel') or quality
1996 formats_spec[itag] = {
1997 'asr': int_or_none(fmt.get('audioSampleRate')),
1998 'filesize': int_or_none(fmt.get('contentLength')),
1999 'format_note': quality_label,
2000 'fps': int_or_none(fmt.get('fps')),
2001 'height': int_or_none(fmt.get('height')),
bf1317d2
S
2002 # bitrate for itag 43 is always 2147483647
2003 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2004 'width': int_or_none(fmt.get('width')),
2005 }
2006
2007 for fmt in streaming_formats:
00eb865b 2008 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2009 continue
2010 url = url_or_none(fmt.get('url'))
2011
2012 if not url:
fa3db383 2013 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2014 if not cipher:
2015 continue
2016 url_data = compat_parse_qs(cipher)
2017 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2018 if not url:
2019 continue
2020 else:
2021 cipher = None
2022 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2023
2f483bc1
S
2024 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2025 # Unsupported FORMAT_STREAM_TYPE_OTF
2026 if stream_type == 3:
2027 continue
6449cd80 2028
bf1317d2
S
2029 format_id = fmt.get('itag') or url_data['itag'][0]
2030 if not format_id:
2031 continue
2032 format_id = compat_str(format_id)
a49eccdf 2033
bf1317d2
S
2034 if cipher:
2035 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
8bdd16b4 2036 ASSETS_RE = (
2037 r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2038 r'"jsUrl"\s*:\s*("[^"]+")',
2039 r'"assets":.+?"js":\s*("[^"]+")')
bf1317d2
S
2040 jsplayer_url_json = self._search_regex(
2041 ASSETS_RE,
2042 embed_webpage if age_gate else video_webpage,
2043 'JS player URL (1)', default=None)
2044 if not jsplayer_url_json and not age_gate:
2045 # We need the embed website after all
2046 if embed_webpage is None:
2047 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2048 embed_webpage = self._download_webpage(
2049 embed_url, video_id, 'Downloading embed webpage')
2050 jsplayer_url_json = self._search_regex(
2051 ASSETS_RE, embed_webpage, 'JS player URL')
2052
2053 player_url = json.loads(jsplayer_url_json)
cf010131 2054 if player_url is None:
bf1317d2
S
2055 player_url_json = self._search_regex(
2056 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2057 video_webpage, 'age gate player URL')
2058 player_url = json.loads(player_url_json)
2059
2060 if 'sig' in url_data:
2061 url += '&signature=' + url_data['sig'][0]
2062 elif 's' in url_data:
2063 encrypted_sig = url_data['s'][0]
2064
2065 if self._downloader.params.get('verbose'):
2066 if player_url is None:
bf1317d2 2067 player_desc = 'unknown'
cf010131 2068 else:
e40c758c
S
2069 player_type, player_version = self._extract_player_info(player_url)
2070 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2071 parts_sizes = self._signature_cache_id(encrypted_sig)
2072 self.to_screen('{%s} signature length %s, %s' %
2073 (format_id, parts_sizes, player_desc))
2074
2075 signature = self._decrypt_signature(
2076 encrypted_sig, video_id, player_url, age_gate)
2077 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2078 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2079 if 'ratebypass' not in url:
2080 url += '&ratebypass=yes'
c9afb51c 2081
94278f72
YCH
2082 dct = {
2083 'format_id': format_id,
2084 'url': url,
2085 'player_url': player_url,
2086 }
2087 if format_id in self._formats:
2088 dct.update(self._formats[format_id])
3318832e 2089 if format_id in formats_spec:
2090 dct.update(formats_spec[format_id])
94278f72 2091
aabc2be6 2092 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2093 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2094 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2095 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2096 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2097
bf1317d2
S
2098 if width is None:
2099 width = int_or_none(fmt.get('width'))
2100 if height is None:
2101 height = int_or_none(fmt.get('height'))
2102
c63ca0ee
S
2103 filesize = int_or_none(url_data.get(
2104 'clen', [None])[0]) or _extract_filesize(url)
2105
bf1317d2
S
2106 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2107 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2108
4878759f
S
2109 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2110 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2111 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2112
94278f72 2113 more_fields = {
c63ca0ee 2114 'filesize': filesize,
bf1317d2 2115 'tbr': tbr,
c9afb51c
AH
2116 'width': width,
2117 'height': height,
bf1317d2
S
2118 'fps': fps,
2119 'format_note': quality_label or quality,
c9afb51c 2120 }
94278f72
YCH
2121 for key, value in more_fields.items():
2122 if value:
2123 dct[key] = value
bf1317d2 2124 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2125 if type_:
2126 type_split = type_.split(';')
2127 kind_ext = type_split[0].split('/')
2128 if len(kind_ext) == 2:
94278f72
YCH
2129 kind, _ = kind_ext
2130 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2131 if kind in ('audio', 'video'):
2132 codecs = None
2133 for mobj in re.finditer(
2134 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2135 if mobj.group('key') == 'codecs':
2136 codecs = mobj.group('val')
2137 break
2138 if codecs:
6310acf5 2139 dct.update(parse_codecs(codecs))
e4a60912
S
2140 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2141 dct['downloader_options'] = {
2142 # Youtube throttles chunks >~10M
2143 'http_chunk_size': 10485760,
2144 }
aabc2be6 2145 formats.append(dct)
c5e8d7af 2146 else:
c3e54389
S
2147 manifest_url = (
2148 url_or_none(try_get(
2149 player_response,
2150 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2151 compat_str))
2152 or url_or_none(try_get(
c3e54389
S
2153 video_info, lambda x: x['hlsvp'][0], compat_str)))
2154 if manifest_url:
2155 formats = []
2156 m3u8_formats = self._extract_m3u8_formats(
2157 manifest_url, video_id, 'mp4', fatal=False)
2158 for a_format in m3u8_formats:
2159 itag = self._search_regex(
2160 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2161 if itag:
2162 a_format['format_id'] = itag
2163 if itag in self._formats:
2164 dct = self._formats[itag].copy()
2165 dct.update(a_format)
2166 a_format = dct
2167 a_format['player_url'] = player_url
2168 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2169 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2170 if self._downloader.params.get('youtube_include_hls_manifest', True):
2171 formats.append(a_format)
c3e54389 2172 else:
13577349 2173 error_message = extract_unavailable_message()
a0566bbf 2174 if not error_message:
2175 reason_list = try_get(
2176 player_response,
2177 lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2178 list) or []
2179 for reason in reason_list:
2180 if not isinstance(reason, dict):
2181 continue
2182 reason_text = try_get(reason, lambda x: x['text'], compat_str)
2183 if reason_text:
2184 if not error_message:
2185 error_message = ''
2186 error_message += reason_text
2187 if error_message:
2188 error_message = clean_html(error_message)
c3e54389 2189 if not error_message:
13577349
S
2190 error_message = clean_html(try_get(
2191 player_response, lambda x: x['playabilityStatus']['reason'],
2192 compat_str))
2193 if not error_message:
2194 error_message = clean_html(
2195 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2196 if error_message:
2197 raise ExtractorError(error_message, expected=True)
2198 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2199
7e72694b 2200 # uploader
dbdaaa23
S
2201 video_uploader = try_get(
2202 video_info, lambda x: x['author'][0],
2203 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2204 if video_uploader:
2205 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2206 else:
2207 self._downloader.report_warning('unable to extract uploader name')
2208
2209 # uploader_id
2210 video_uploader_id = None
2211 video_uploader_url = None
2212 mobj = re.search(
2213 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2214 video_webpage)
2215 if mobj is not None:
2216 video_uploader_id = mobj.group('uploader_id')
2217 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2218 else:
2219 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2220 if owner_profile_url:
2221 video_uploader_id = self._search_regex(
2222 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2223 default=None)
2224 video_uploader_url = owner_profile_url
7e72694b 2225
b45a9e69 2226 channel_id = (
3089bc74
S
2227 str_or_none(video_details.get('channelId'))
2228 or self._html_search_meta(
2229 'channelId', video_webpage, 'channel id', default=None)
2230 or self._search_regex(
b45a9e69 2231 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2232 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2233 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2234
b477fc13
S
2235 thumbnails = []
2236 thumbnails_list = try_get(
2237 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2238 for t in thumbnails_list:
2239 if not isinstance(t, dict):
2240 continue
2241 thumbnail_url = url_or_none(t.get('url'))
2242 if not thumbnail_url:
2243 continue
2244 thumbnails.append({
2245 'url': thumbnail_url,
2246 'width': int_or_none(t.get('width')),
2247 'height': int_or_none(t.get('height')),
2248 })
2249
2250 if not thumbnails:
7e72694b 2251 video_thumbnail = None
b477fc13
S
2252 # We try first to get a high quality image:
2253 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2254 video_webpage, re.DOTALL)
2255 if m_thumb is not None:
2256 video_thumbnail = m_thumb.group(1)
2257 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2258 if thumbnail_url:
2259 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2260 if video_thumbnail:
2261 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2262
2263 # upload date
2264 upload_date = self._html_search_meta(
2265 'datePublished', video_webpage, 'upload date', default=None)
2266 if not upload_date:
2267 upload_date = self._search_regex(
2268 [r'(?s)id="eow-date.*?>(.*?)</span>',
2269 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2270 video_webpage, 'upload date', default=None)
37357d21
S
2271 if not upload_date:
2272 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2273 upload_date = unified_strdate(upload_date)
2274
2275 video_license = self._html_search_regex(
2276 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2277 video_webpage, 'license', default=None)
2278
2279 m_music = re.search(
2280 r'''(?x)
2281 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2282 <ul[^>]*>\s*
2283 <li>(?P<title>.+?)
2284 by (?P<creator>.+?)
2285 (?:
2286 \(.+?\)|
2287 <a[^>]*
2288 (?:
2289 \bhref=["\']/red[^>]*>| # drop possible
2290 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2291 )
2292 .*?
2293 )?</li
2294 ''',
2295 video_webpage)
2296 if m_music:
2297 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2298 video_creator = clean_html(m_music.group('creator'))
2299 else:
2300 video_alt_title = video_creator = None
2301
2302 def extract_meta(field):
2303 return self._html_search_regex(
2304 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2305 video_webpage, field, default=None)
2306
2307 track = extract_meta('Song')
2308 artist = extract_meta('Artist')
92bc97d3 2309 album = extract_meta('Album')
822b9d9c
RA
2310
2311 # Youtube Music Auto-generated description
92bc97d3 2312 release_date = release_year = None
822b9d9c 2313 if video_description:
38d70284 2314 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c
RA
2315 if mobj:
2316 if not track:
2317 track = mobj.group('track').strip()
2318 if not artist:
2319 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2320 if not album:
2321 album = mobj.group('album'.strip())
822b9d9c
RA
2322 release_year = mobj.group('release_year')
2323 release_date = mobj.group('release_date')
2324 if release_date:
2325 release_date = release_date.replace('-', '')
2326 if not release_year:
2327 release_year = int(release_date[:4])
2328 if release_year:
2329 release_year = int(release_year)
7e72694b 2330
38d70284 2331 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2332 contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2333 for content in contents:
2334 rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2335 multiple_songs = False
2336 for row in rows:
2337 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2338 multiple_songs = True
2339 break
2340 for row in rows:
2341 mrr = row.get('metadataRowRenderer') or {}
2342 mrr_title = try_get(
2343 mrr, lambda x: x['title']['simpleText'], compat_str)
2344 mrr_contents = try_get(
2345 mrr, lambda x: x['contents'][0], dict) or {}
2346 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2347 if not (mrr_title and mrr_contents_text):
2348 continue
2349 if mrr_title == 'License':
2350 video_license = mrr_contents_text
2351 elif not multiple_songs:
2352 if mrr_title == 'Album':
2353 album = mrr_contents_text
2354 elif mrr_title == 'Artist':
2355 artist = mrr_contents_text
2356 elif mrr_title == 'Song':
2357 track = mrr_contents_text
9322f116 2358
7e72694b
S
2359 m_episode = re.search(
2360 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2361 video_webpage)
2362 if m_episode:
c2dd2dc0 2363 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2364 season_number = int(m_episode.group('season'))
2365 episode_number = int(m_episode.group('episode'))
2366 else:
2367 series = season_number = episode_number = None
2368
2369 m_cat_container = self._search_regex(
2370 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2371 video_webpage, 'categories', default=None)
dbeafce5 2372 category = None
7e72694b
S
2373 if m_cat_container:
2374 category = self._html_search_regex(
2375 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2376 default=None)
dbeafce5
S
2377 if not category:
2378 category = try_get(
2379 microformat, lambda x: x['category'], compat_str)
2380 video_categories = None if category is None else [category]
7e72694b
S
2381
2382 video_tags = [
2383 unescapeHTML(m.group('content'))
2384 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2385 if not video_tags:
2386 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2387
2388 def _extract_count(count_name):
2389 return str_to_int(self._search_regex(
a0566bbf 2390 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2391 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
7e72694b
S
2392 video_webpage, count_name, default=None))
2393
2394 like_count = _extract_count('like')
2395 dislike_count = _extract_count('dislike')
2396
dbdaaa23
S
2397 if view_count is None:
2398 view_count = str_to_int(self._search_regex(
2399 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2400 'view count', default=None))
2401
bf3c9326
S
2402 average_rating = (
2403 float_or_none(video_details.get('averageRating'))
2404 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2405
7e72694b 2406 # subtitles
321bf820 2407 video_subtitles = self.extract_subtitles(
2408 video_id, video_webpage, has_live_chat_replay)
29f7c58a 2409 automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
7e72694b
S
2410
2411 video_duration = try_get(
2412 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2413 if not video_duration:
2414 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2415 if not video_duration:
2416 video_duration = parse_duration(self._html_search_meta(
2417 'duration', video_webpage, 'video duration'))
2418
b84071c0
JP
2419 # Get Subscriber Count of channel
2420 subscriber_count = parse_count(self._search_regex(
2421 r'"text":"([\d\.]+\w?) subscribers"',
2422 video_webpage,
2423 'subscriber count',
2424 default=None
2425 ))
2426
7e72694b
S
2427 # annotations
2428 video_annotations = None
2429 if self._downloader.params.get('writeannotations', False):
29f7c58a 2430 xsrf_token = None
2431 ytcfg = self._extract_ytcfg(video_id, video_webpage)
2432 if ytcfg:
2433 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2434 if not xsrf_token:
2435 xsrf_token = self._search_regex(
2436 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2437 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
64b6a4e9
RA
2438 invideo_url = try_get(
2439 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2440 if xsrf_token and invideo_url:
29f7c58a 2441 xsrf_field_name = None
2442 if ytcfg:
2443 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2444 if not xsrf_field_name:
2445 xsrf_field_name = self._search_regex(
2446 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2447 video_webpage, 'xsrf field name',
2448 group='xsrf_field_name', default='session_token')
64b6a4e9
RA
2449 video_annotations = self._download_webpage(
2450 self._proto_relative_url(invideo_url),
2451 video_id, note='Downloading annotations',
2452 errnote='Unable to download video annotations', fatal=False,
2453 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2454
84213ea8 2455 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2456
dd27fd17 2457 # Look for the DASH manifest
203fb43f 2458 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2459 dash_mpd_fatal = True
8ff648e4 2460 for mpd_url in dash_mpds:
d8d24a92 2461 dash_formats = {}
774e208f 2462 try:
05d0d131
YCH
2463 def decrypt_sig(mobj):
2464 s = mobj.group(1)
2465 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2466 return '/signature/%s' % dec_s
2467
8ff648e4 2468 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2469
8ff648e4 2470 for df in self._extract_mpd_formats(
2471 mpd_url, video_id, fatal=dash_mpd_fatal,
2472 formats_dict=self._formats):
c63ca0ee
S
2473 if not df.get('filesize'):
2474 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2475 # Do not overwrite DASH format found in some previous DASH manifest
2476 if df['format_id'] not in dash_formats:
2477 dash_formats[df['format_id']] = df
77c6fb5b
S
2478 # Additional DASH manifests may end up in HTTP Error 403 therefore
2479 # allow them to fail without bug report message if we already have
2480 # some DASH manifest succeeded. This is temporary workaround to reduce
2481 # burst of bug reports until we figure out the reason and whether it
2482 # can be fixed at all.
2483 dash_mpd_fatal = False
774e208f
PH
2484 except (ExtractorError, KeyError) as e:
2485 self.report_warning(
2486 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2487 if dash_formats:
04b3b3df
JMF
2488 # Remove the formats we found through non-DASH, they
2489 # contain less info and it can be wrong, because we use
2490 # fixed values (for example the resolution). See
067aa17e 2491 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2492 # example.
d80265cc 2493 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2494 formats.extend(dash_formats.values())
d80044c2 2495
6271f1ca
PH
2496 # Check for malformed aspect ratio
2497 stretched_m = re.search(
2498 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2499 video_webpage)
2500 if stretched_m:
313dfc45
LL
2501 w = float(stretched_m.group('w'))
2502 h = float(stretched_m.group('h'))
5faf9fed
S
2503 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2504 # We will only process correct ratios.
313dfc45 2505 if w > 0 and h > 0:
41f24c32 2506 ratio = w / h
313dfc45
LL
2507 for f in formats:
2508 if f.get('vcodec') != 'none':
2509 f['stretched_ratio'] = ratio
6271f1ca 2510
026fbedc 2511 if not formats:
43ebf77d
S
2512 if 'reason' in video_info:
2513 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2514 regions_allowed = self._html_search_meta(
2515 'regionsAllowed', video_webpage, default=None)
2516 countries = regions_allowed.split(',') if regions_allowed else None
2517 self.raise_geo_restricted(
2518 msg=video_info['reason'][0], countries=countries)
2519 reason = video_info['reason'][0]
2520 if 'Invalid parameters' in reason:
2521 unavailable_message = extract_unavailable_message()
2522 if unavailable_message:
2523 reason = unavailable_message
2524 raise ExtractorError(
2525 'YouTube said: %s' % reason,
2526 expected=True, video_id=video_id)
2527 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2528 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2529
4bcc7bd1 2530 self._sort_formats(formats)
4ea3be0a 2531
21c340b8 2532 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2533
4ea3be0a 2534 return {
8bcc8756
JW
2535 'id': video_id,
2536 'uploader': video_uploader,
2537 'uploader_id': video_uploader_id,
fd050249 2538 'uploader_url': video_uploader_url,
dd4c4492
S
2539 'channel_id': channel_id,
2540 'channel_url': channel_url,
8bcc8756 2541 'upload_date': upload_date,
7caf9830 2542 'license': video_license,
936784b2 2543 'creator': video_creator or artist,
8bcc8756 2544 'title': video_title,
936784b2 2545 'alt_title': video_alt_title or track,
b477fc13 2546 'thumbnails': thumbnails,
8bcc8756
JW
2547 'description': video_description,
2548 'categories': video_categories,
000b6b5a 2549 'tags': video_tags,
8bcc8756 2550 'subtitles': video_subtitles,
360e1ca5 2551 'automatic_captions': automatic_captions,
8bcc8756
JW
2552 'duration': video_duration,
2553 'age_limit': 18 if age_gate else 0,
2554 'annotations': video_annotations,
9cafc3fd 2555 'chapters': chapters,
7e8c0af0 2556 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2557 'view_count': view_count,
4ea3be0a 2558 'like_count': like_count,
2559 'dislike_count': dislike_count,
bf3c9326 2560 'average_rating': average_rating,
8bcc8756 2561 'formats': formats,
2fe1ff85 2562 'is_live': is_live,
7c80519c 2563 'start_time': start_time,
297a564b 2564 'end_time': end_time,
12afdc2a
S
2565 'series': series,
2566 'season_number': season_number,
2567 'episode_number': episode_number,
936784b2
S
2568 'track': track,
2569 'artist': artist,
5caabd3c 2570 'album': album,
2571 'release_date': release_date,
2572 'release_year': release_year,
b84071c0 2573 'subscriber_count': subscriber_count,
5ac23244 2574 'playable_in_embed': playable_in_embed,
4ea3be0a 2575 }
c5e8d7af 2576
5f6a1245 2577
8bdd16b4 2578class YoutubeTabIE(YoutubeBaseInfoExtractor):
2579 IE_DESC = 'YouTube.com tab'
70d5c17b 2580 _VALID_URL = r'''(?x)
2581 https?://
2582 (?:\w+\.)?
2583 (?:
2584 youtube(?:kids)?\.com|
2585 invidio\.us
2586 )/
2587 (?:
2588 (?:channel|c|user)/|
2589 (?P<not_channel>
3d3dddc9 2590 feed/|
70d5c17b 2591 (?:playlist|watch)\?.*?\blist=
2592 )|
29f7c58a 2593 (?!(?:%s)\b) # Direct URLs
70d5c17b 2594 )
2595 (?P<id>[^/?\#&]+)
2596 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2597 IE_NAME = 'youtube:tab'
2598
81127aa5 2599 _TESTS = [{
8bdd16b4 2600 # playlists, multipage
2601 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2602 'playlist_mincount': 94,
2603 'info_dict': {
2604 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2605 'title': 'Игорь Клейнер - Playlists',
2606 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2607 },
2608 }, {
2609 # playlists, multipage, different order
2610 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2611 'playlist_mincount': 94,
2612 'info_dict': {
2613 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2614 'title': 'Игорь Клейнер - Playlists',
2615 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2616 },
2617 }, {
2618 # playlists, singlepage
2619 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2620 'playlist_mincount': 4,
2621 'info_dict': {
2622 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2623 'title': 'ThirstForScience - Playlists',
2624 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2625 }
2626 }, {
2627 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2628 'only_matching': True,
2629 }, {
2630 # basic, single video playlist
0e30a7b9 2631 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2632 'info_dict': {
0e30a7b9 2633 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2634 'uploader': 'Sergey M.',
2635 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2636 'title': 'youtube-dl public playlist',
81127aa5 2637 },
0e30a7b9 2638 'playlist_count': 1,
9291475f 2639 }, {
8bdd16b4 2640 # empty playlist
0e30a7b9 2641 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2642 'info_dict': {
0e30a7b9 2643 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2644 'uploader': 'Sergey M.',
2645 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2646 'title': 'youtube-dl empty playlist',
9291475f
PH
2647 },
2648 'playlist_count': 0,
2649 }, {
8bdd16b4 2650 # Home tab
2651 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2652 'info_dict': {
8bdd16b4 2653 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2654 'title': 'lex will - Home',
2655 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2656 },
8bdd16b4 2657 'playlist_mincount': 2,
9291475f 2658 }, {
8bdd16b4 2659 # Videos tab
2660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2661 'info_dict': {
8bdd16b4 2662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2663 'title': 'lex will - Videos',
2664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2665 },
8bdd16b4 2666 'playlist_mincount': 975,
9291475f 2667 }, {
8bdd16b4 2668 # Videos tab, sorted by popular
2669 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2670 'info_dict': {
8bdd16b4 2671 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2672 'title': 'lex will - Videos',
2673 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2674 },
8bdd16b4 2675 'playlist_mincount': 199,
9291475f 2676 }, {
8bdd16b4 2677 # Playlists tab
2678 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2679 'info_dict': {
8bdd16b4 2680 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2681 'title': 'lex will - Playlists',
2682 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2683 },
8bdd16b4 2684 'playlist_mincount': 17,
ac7553d0 2685 }, {
8bdd16b4 2686 # Community tab
2687 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2688 'info_dict': {
8bdd16b4 2689 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2690 'title': 'lex will - Community',
2691 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2692 },
2693 'playlist_mincount': 18,
87dadd45 2694 }, {
8bdd16b4 2695 # Channels tab
2696 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2697 'info_dict': {
8bdd16b4 2698 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2699 'title': 'lex will - Channels',
2700 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2701 },
2702 'playlist_mincount': 138,
6b08cdf6 2703 }, {
a0566bbf 2704 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2705 'only_matching': True,
2706 }, {
a0566bbf 2707 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2708 'only_matching': True,
2709 }, {
a0566bbf 2710 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2711 'only_matching': True,
2712 }, {
2713 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2714 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2715 'info_dict': {
2716 'title': '29C3: Not my department',
2717 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2718 'uploader': 'Christiaan008',
2719 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2720 },
2721 'playlist_count': 96,
2722 }, {
2723 'note': 'Large playlist',
2724 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2725 'info_dict': {
8bdd16b4 2726 'title': 'Uploads from Cauchemar',
2727 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2728 'uploader': 'Cauchemar',
2729 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2730 },
8bdd16b4 2731 'playlist_mincount': 1123,
2732 }, {
2733 # even larger playlist, 8832 videos
2734 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2735 'only_matching': True,
4b7df0d3
JMF
2736 }, {
2737 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2738 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2739 'info_dict': {
acf757f4
PH
2740 'title': 'Uploads from Interstellar Movie',
2741 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2742 'uploader': 'Interstellar Movie',
8bdd16b4 2743 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2744 },
481cc733 2745 'playlist_mincount': 21,
8bdd16b4 2746 }, {
2747 # https://github.com/ytdl-org/youtube-dl/issues/21844
2748 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2749 'info_dict': {
2750 'title': 'Data Analysis with Dr Mike Pound',
2751 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2752 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2753 'uploader': 'Computerphile',
2754 },
2755 'playlist_mincount': 11,
2756 }, {
a0566bbf 2757 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2758 'only_matching': True,
dacb3a86
S
2759 }, {
2760 # Playlist URL that does not actually serve a playlist
2761 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2762 'info_dict': {
2763 'id': 'FqZTN594JQw',
2764 'ext': 'webm',
2765 'title': "Smiley's People 01 detective, Adventure Series, Action",
2766 'uploader': 'STREEM',
2767 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2769 'upload_date': '20150526',
2770 'license': 'Standard YouTube License',
2771 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2772 'categories': ['People & Blogs'],
2773 'tags': list,
dbdaaa23 2774 'view_count': int,
dacb3a86
S
2775 'like_count': int,
2776 'dislike_count': int,
2777 },
2778 'params': {
2779 'skip_download': True,
2780 },
13a75688 2781 'skip': 'This video is not available.',
dacb3a86 2782 'add_ie': [YoutubeIE.ie_key()],
481cc733 2783 }, {
8bdd16b4 2784 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2785 'only_matching': True,
66b48727 2786 }, {
8bdd16b4 2787 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2788 'only_matching': True,
a0566bbf 2789 }, {
2790 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2791 'info_dict': {
2792 'id': '9Auq9mYxFEE',
2793 'ext': 'mp4',
2794 'title': 'Watch Sky News live',
2795 'uploader': 'Sky News',
2796 'uploader_id': 'skynews',
2797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2798 'upload_date': '20191102',
2799 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2800 'categories': ['News & Politics'],
2801 'tags': list,
2802 'like_count': int,
2803 'dislike_count': int,
2804 },
2805 'params': {
2806 'skip_download': True,
2807 },
2808 }, {
2809 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2810 'info_dict': {
2811 'id': 'a48o2S1cPoo',
2812 'ext': 'mp4',
2813 'title': 'The Young Turks - Live Main Show',
2814 'uploader': 'The Young Turks',
2815 'uploader_id': 'TheYoungTurks',
2816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2817 'upload_date': '20150715',
2818 'license': 'Standard YouTube License',
2819 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2820 'categories': ['News & Politics'],
2821 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2822 'like_count': int,
2823 'dislike_count': int,
2824 },
2825 'params': {
2826 'skip_download': True,
2827 },
2828 'only_matching': True,
2829 }, {
2830 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2831 'only_matching': True,
2832 }, {
2833 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2834 'only_matching': True,
3d3dddc9 2835 }, {
2836 'url': 'https://www.youtube.com/feed/trending',
2837 'only_matching': True,
2838 }, {
2839 # needs auth
2840 'url': 'https://www.youtube.com/feed/library',
2841 'only_matching': True,
2842 }, {
2843 # needs auth
2844 'url': 'https://www.youtube.com/feed/history',
2845 'only_matching': True,
2846 }, {
2847 # needs auth
2848 'url': 'https://www.youtube.com/feed/subscriptions',
2849 'only_matching': True,
2850 }, {
2851 # needs auth
2852 'url': 'https://www.youtube.com/feed/watch_later',
2853 'only_matching': True,
2854 }, {
2855 # no longer available?
2856 'url': 'https://www.youtube.com/feed/recommended',
2857 'only_matching': True,
29f7c58a 2858 }, {
2859 # inline playlist with not always working continuations
2860 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2861 'only_matching': True,
2862 }, {
2863 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2864 'only_matching': True,
2865 }, {
2866 'url': 'https://www.youtube.com/course',
2867 'only_matching': True,
2868 }, {
2869 'url': 'https://www.youtube.com/zsecurity',
2870 'only_matching': True,
2871 }, {
2872 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2873 'only_matching': True,
2874 }, {
2875 'url': 'https://www.youtube.com/TheYoungTurks/live',
2876 'only_matching': True,
2877 }]
2878
2879 @classmethod
2880 def suitable(cls, url):
2881 return False if YoutubeIE.suitable(url) else super(
2882 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2883
2884 def _extract_channel_id(self, webpage):
2885 channel_id = self._html_search_meta(
2886 'channelId', webpage, 'channel id', default=None)
2887 if channel_id:
2888 return channel_id
2889 channel_url = self._html_search_meta(
2890 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2891 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2892 'twitter:app:url:googleplay'), webpage, 'channel url')
2893 return self._search_regex(
2894 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2895 channel_url, 'channel id')
15f6397c 2896
8bdd16b4 2897 @staticmethod
2898 def _extract_grid_item_renderer(item):
2899 for item_kind in ('Playlist', 'Video', 'Channel'):
2900 renderer = item.get('grid%sRenderer' % item_kind)
2901 if renderer:
2902 return renderer
2903
8bdd16b4 2904 def _grid_entries(self, grid_renderer):
2905 for item in grid_renderer['items']:
2906 if not isinstance(item, dict):
39b62db1 2907 continue
8bdd16b4 2908 renderer = self._extract_grid_item_renderer(item)
2909 if not isinstance(renderer, dict):
2910 continue
2911 title = try_get(
2912 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2913 # playlist
2914 playlist_id = renderer.get('playlistId')
2915 if playlist_id:
2916 yield self.url_result(
2917 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2918 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2919 video_title=title)
2920 # video
2921 video_id = renderer.get('videoId')
2922 if video_id:
2923 yield self._extract_video(renderer)
2924 # channel
2925 channel_id = renderer.get('channelId')
2926 if channel_id:
2927 title = try_get(
2928 renderer, lambda x: x['title']['simpleText'], compat_str)
2929 yield self.url_result(
2930 'https://www.youtube.com/channel/%s' % channel_id,
2931 ie=YoutubeTabIE.ie_key(), video_title=title)
2932
3d3dddc9 2933 def _shelf_entries_from_content(self, shelf_renderer):
2934 content = shelf_renderer.get('content')
2935 if not isinstance(content, dict):
8bdd16b4 2936 return
3d3dddc9 2937 renderer = content.get('gridRenderer')
2938 if renderer:
2939 # TODO: add support for nested playlists so each shelf is processed
2940 # as separate playlist
2941 # TODO: this includes only first N items
2942 for entry in self._grid_entries(renderer):
2943 yield entry
2944 renderer = content.get('horizontalListRenderer')
2945 if renderer:
2946 # TODO
2947 pass
8bdd16b4 2948
29f7c58a 2949 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2950 ep = try_get(
2951 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2952 compat_str)
2953 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2954 if shelf_url:
29f7c58a 2955 # Skipping links to another channels, note that checking for
2956 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2957 # will not work
2958 if skip_channels and '/channels?' in shelf_url:
2959 return
3d3dddc9 2960 title = try_get(
2961 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2962 yield self.url_result(shelf_url, video_title=title)
2963 # Shelf may not contain shelf URL, fallback to extraction from content
2964 for entry in self._shelf_entries_from_content(shelf_renderer):
2965 yield entry
c5e8d7af 2966
8bdd16b4 2967 def _playlist_entries(self, video_list_renderer):
2968 for content in video_list_renderer['contents']:
2969 if not isinstance(content, dict):
2970 continue
2971 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2972 if not isinstance(renderer, dict):
2973 continue
2974 video_id = renderer.get('videoId')
2975 if not video_id:
2976 continue
2977 yield self._extract_video(renderer)
07aeced6 2978
3d3dddc9 2979 r""" # Not needed in the new implementation
3462ffa8 2980 def _itemSection_entries(self, item_sect_renderer):
2981 for content in item_sect_renderer['contents']:
2982 if not isinstance(content, dict):
2983 continue
2984 renderer = content.get('videoRenderer', {})
2985 if not isinstance(renderer, dict):
2986 continue
2987 video_id = renderer.get('videoId')
2988 if not video_id:
2989 continue
2990 yield self._extract_video(renderer)
3d3dddc9 2991 """
3462ffa8 2992
2993 def _rich_entries(self, rich_grid_renderer):
2994 renderer = try_get(
70d5c17b 2995 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2996 video_id = renderer.get('videoId')
2997 if not video_id:
2998 return
2999 yield self._extract_video(renderer)
3000
8bdd16b4 3001 def _video_entry(self, video_renderer):
3002 video_id = video_renderer.get('videoId')
3003 if video_id:
3004 return self._extract_video(video_renderer)
dacb3a86 3005
8bdd16b4 3006 def _post_thread_entries(self, post_thread_renderer):
3007 post_renderer = try_get(
3008 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3009 if not post_renderer:
3010 return
3011 # video attachment
3012 video_renderer = try_get(
3013 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3014 video_id = None
3015 if video_renderer:
3016 entry = self._video_entry(video_renderer)
3017 if entry:
3018 yield entry
3019 # inline video links
3020 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3021 for run in runs:
3022 if not isinstance(run, dict):
3023 continue
3024 ep_url = try_get(
3025 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3026 if not ep_url:
3027 continue
3028 if not YoutubeIE.suitable(ep_url):
3029 continue
3030 ep_video_id = YoutubeIE._match_id(ep_url)
3031 if video_id == ep_video_id:
3032 continue
3033 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 3034
8bdd16b4 3035 def _post_thread_continuation_entries(self, post_thread_continuation):
3036 contents = post_thread_continuation.get('contents')
3037 if not isinstance(contents, list):
3038 return
3039 for content in contents:
3040 renderer = content.get('backstagePostThreadRenderer')
3041 if not isinstance(renderer, dict):
3042 continue
3043 for entry in self._post_thread_entries(renderer):
3044 yield entry
07aeced6 3045
29f7c58a 3046 @staticmethod
3047 def _build_continuation_query(continuation, ctp=None):
3048 query = {
3049 'ctoken': continuation,
3050 'continuation': continuation,
3051 }
3052 if ctp:
3053 query['itct'] = ctp
3054 return query
3055
8bdd16b4 3056 @staticmethod
3057 def _extract_next_continuation_data(renderer):
3058 next_continuation = try_get(
3059 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3060 if not next_continuation:
3061 return
3062 continuation = next_continuation.get('continuation')
3063 if not continuation:
3064 return
3065 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3066 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3067
8bdd16b4 3068 @classmethod
3069 def _extract_continuation(cls, renderer):
3070 next_continuation = cls._extract_next_continuation_data(renderer)
3071 if next_continuation:
3072 return next_continuation
3073 contents = renderer.get('contents')
3074 if not isinstance(contents, list):
3075 return
3076 for content in contents:
3077 if not isinstance(content, dict):
3078 continue
3079 continuation_ep = try_get(
3080 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3081 dict)
3082 if not continuation_ep:
3083 continue
3084 continuation = try_get(
3085 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3086 if not continuation:
3087 continue
3088 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3089 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3090
8bdd16b4 3091 def _entries(self, tab, identity_token):
3462ffa8 3092
70d5c17b 3093 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3094 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3095 for content in contents:
3096 if not isinstance(content, dict):
8bdd16b4 3097 continue
70d5c17b 3098 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3099 if not is_renderer:
70d5c17b 3100 renderer = content.get('richItemRenderer')
3462ffa8 3101 if renderer:
3102 for entry in self._rich_entries(renderer):
3103 yield entry
3104 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3105 continue
3462ffa8 3106 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3107 for isr_content in isr_contents:
3108 if not isinstance(isr_content, dict):
3109 continue
3110 renderer = isr_content.get('playlistVideoListRenderer')
3111 if renderer:
3112 for entry in self._playlist_entries(renderer):
3113 yield entry
3114 continuation_list[0] = self._extract_continuation(renderer)
3115 continue
3116 renderer = isr_content.get('gridRenderer')
3117 if renderer:
3118 for entry in self._grid_entries(renderer):
3119 yield entry
3120 continuation_list[0] = self._extract_continuation(renderer)
3121 continue
3122 renderer = isr_content.get('shelfRenderer')
3123 if renderer:
29f7c58a 3124 is_channels_tab = tab.get('title') == 'Channels'
3125 for entry in self._shelf_entries(renderer, not is_channels_tab):
3462ffa8 3126 yield entry
3462ffa8 3127 continue
3128 renderer = isr_content.get('backstagePostThreadRenderer')
3129 if renderer:
3130 for entry in self._post_thread_entries(renderer):
3131 yield entry
3132 continuation_list[0] = self._extract_continuation(renderer)
3133 continue
3134 renderer = isr_content.get('videoRenderer')
3135 if renderer:
3136 entry = self._video_entry(renderer)
3137 if entry:
3138 yield entry
70d5c17b 3139
3462ffa8 3140 if not continuation_list[0]:
3141 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3142
3143 if not continuation_list[0]:
3144 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3145
3146 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3147 tab_content = try_get(tab, lambda x: x['content'], dict)
3148 if not tab_content:
3149 return
3462ffa8 3150 parent_renderer = (
29f7c58a 3151 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3152 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3153 for entry in extract_entries(parent_renderer):
3154 yield entry
3462ffa8 3155 continuation = continuation_list[0]
8bdd16b4 3156
3157 headers = {
3158 'x-youtube-client-name': '1',
3159 'x-youtube-client-version': '2.20201112.04.01',
3160 }
3161 if identity_token:
3162 headers['x-youtube-identity-token'] = identity_token
ebf1b291 3163
8bdd16b4 3164 for page_num in itertools.count(1):
3165 if not continuation:
3166 break
29f7c58a 3167 count = 0
3168 retries = 3
3169 while count <= retries:
3170 try:
3171 # Downloading page may result in intermittent 5xx HTTP error
3172 # that is usually worked around with a retry
3173 browse = self._download_json(
3174 'https://www.youtube.com/browse_ajax', None,
3175 'Downloading page %d%s'
3176 % (page_num, ' (retry #%d)' % count if count else ''),
3177 headers=headers, query=continuation)
3178 break
3179 except ExtractorError as e:
3180 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3181 count += 1
3182 if count <= retries:
3183 continue
3184 raise
8bdd16b4 3185 if not browse:
3186 break
3187 response = try_get(browse, lambda x: x[1]['response'], dict)
3188 if not response:
3189 break
ebf1b291 3190
8bdd16b4 3191 continuation_contents = try_get(
3192 response, lambda x: x['continuationContents'], dict)
3193 if continuation_contents:
3194 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3195 if continuation_renderer:
3196 for entry in self._playlist_entries(continuation_renderer):
3197 yield entry
3198 continuation = self._extract_continuation(continuation_renderer)
3199 continue
3200 continuation_renderer = continuation_contents.get('gridContinuation')
3201 if continuation_renderer:
3202 for entry in self._grid_entries(continuation_renderer):
3203 yield entry
3204 continuation = self._extract_continuation(continuation_renderer)
3205 continue
3206 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3207 if continuation_renderer:
3208 for entry in self._post_thread_continuation_entries(continuation_renderer):
3209 yield entry
3210 continuation = self._extract_continuation(continuation_renderer)
3211 continue
70d5c17b 3212 continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
3462ffa8 3213 if continuation_renderer:
3214 continuation_list = [None]
3215 for entry in extract_entries(continuation_renderer):
3216 yield entry
3217 continuation = continuation_list[0]
3218 continue
c5e8d7af 3219
8bdd16b4 3220 continuation_items = try_get(
3221 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3222 if continuation_items:
3223 continuation_item = continuation_items[0]
3224 if not isinstance(continuation_item, dict):
3225 continue
70d5c17b 3226 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
8bdd16b4 3227 if renderer:
3228 video_list_renderer = {'contents': continuation_items}
3229 for entry in self._playlist_entries(video_list_renderer):
3230 yield entry
3231 continuation = self._extract_continuation(video_list_renderer)
3232 continue
8bdd16b4 3233 break
9558dcec 3234
8bdd16b4 3235 @staticmethod
3236 def _extract_selected_tab(tabs):
3237 for tab in tabs:
3238 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3239 return tab['tabRenderer']
2b3c2546 3240 else:
8bdd16b4 3241 raise ExtractorError('Unable to find selected tab')
b82f815f 3242
8bdd16b4 3243 @staticmethod
3244 def _extract_uploader(data):
3245 uploader = {}
3246 sidebar_renderer = try_get(
3247 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3248 if sidebar_renderer:
3249 for item in sidebar_renderer:
3250 if not isinstance(item, dict):
3251 continue
3252 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3253 if not isinstance(renderer, dict):
3254 continue
3255 owner = try_get(
3256 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3257 if owner:
3258 uploader['uploader'] = owner.get('text')
3259 uploader['uploader_id'] = try_get(
3260 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3261 uploader['uploader_url'] = urljoin(
3262 'https://www.youtube.com/',
3263 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3264 return uploader
3265
3266 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3267 selected_tab = self._extract_selected_tab(tabs)
3268 renderer = try_get(
3269 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
70d5c17b 3270 playlist_id = title = description = None
8bdd16b4 3271 if renderer:
3272 channel_title = renderer.get('title') or item_id
3273 tab_title = selected_tab.get('title')
3274 title = channel_title or item_id
3275 if tab_title:
3276 title += ' - %s' % tab_title
3277 description = renderer.get('description')
3278 playlist_id = renderer.get('externalId')
3279 renderer = try_get(
3280 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3281 if renderer:
3282 title = renderer.get('title')
3283 description = None
3284 playlist_id = item_id
3462ffa8 3285 if playlist_id is None:
70d5c17b 3286 playlist_id = item_id
3287 if title is None:
3288 title = "Youtube " + playlist_id.title()
8bdd16b4 3289 playlist = self.playlist_result(
29f7c58a 3290 self._entries(selected_tab, identity_token),
8bdd16b4 3291 playlist_id=playlist_id, playlist_title=title,
3292 playlist_description=description)
3293 playlist.update(self._extract_uploader(data))
3294 return playlist
73c4ac2c 3295
29f7c58a 3296 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3297 title = playlist.get('title') or try_get(
3298 data, lambda x: x['titleText']['simpleText'], compat_str)
3299 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 3300 # Inline playlist rendition continuation does not always work
3301 # at Youtube side, so delegating regular tab-based playlist URL
3302 # processing whenever possible.
3303 playlist_url = urljoin(url, try_get(
3304 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3305 compat_str))
3306 if playlist_url and playlist_url != url:
3307 return self.url_result(
3308 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3309 video_title=title)
8bdd16b4 3310 return self.playlist_result(
3311 self._playlist_entries(playlist), playlist_id=playlist_id,
3312 playlist_title=title)
c5e8d7af 3313
29f7c58a 3314 @staticmethod
3315 def _extract_alerts(data):
02ced43c 3316 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 3317 if not isinstance(alert_dict, dict):
3318 continue
02ced43c 3319 for renderer in alert_dict:
3320 alert = alert_dict[renderer]
3321 alert_type = alert.get('type')
3322 if not alert_type:
3323 continue
3324 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3325 if message:
3326 yield alert_type, message
3327 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3328 message = try_get(run, lambda x: x['text'], compat_str)
3329 if message:
3330 yield alert_type, message
3331
29f7c58a 3332 def _extract_identity_token(self, webpage, item_id):
3333 ytcfg = self._extract_ytcfg(item_id, webpage)
3334 if ytcfg:
3335 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3336 if token:
3337 return token
3338 return self._search_regex(
3339 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3340 'identity token', default=None)
3341
8bdd16b4 3342 def _real_extract(self, url):
3343 item_id = self._match_id(url)
3344 url = compat_urlparse.urlunparse(
3345 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 3346 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 3347 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 3348 self._downloader.report_warning(
3349 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 3350 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 3351 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3352
8bdd16b4 3353 # Handle both video/playlist URLs
3354 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3355 video_id = qs.get('v', [None])[0]
3356 playlist_id = qs.get('list', [None])[0]
f0c532a4 3357
29f7c58a 3358 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3359 if playlist_id:
3360 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3361 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3362 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3363 else:
3364 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3365 if video_id and playlist_id:
3366 if self._downloader.params.get('noplaylist'):
3367 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3368 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3369 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3370
8bdd16b4 3371 webpage = self._download_webpage(url, item_id)
29f7c58a 3372 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 3373 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 3374 err_msg = None
02ced43c 3375 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 3376 if alert_type.lower() == 'error':
3377 if err_msg:
3378 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3379 err_msg = alert_message
3380 else:
3381 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3382 if err_msg:
3383 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 3384 tabs = try_get(
3385 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3386 if tabs:
3387 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3388 playlist = try_get(
3389 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3390 if playlist:
29f7c58a 3391 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3392 # Fallback to video extraction if no playlist alike page is recognized.
3393 # First check for the current video then try the v attribute of URL query.
3394 video_id = try_get(
3395 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3396 compat_str) or video_id
8bdd16b4 3397 if video_id:
3398 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3399 # Failed to recognize
3400 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3401
c5e8d7af 3402
8bdd16b4 3403class YoutubePlaylistIE(InfoExtractor):
3404 IE_DESC = 'YouTube.com playlists'
3405 _VALID_URL = r'''(?x)(?:
3406 (?:https?://)?
3407 (?:\w+\.)?
3408 (?:
3409 (?:
3410 youtube(?:kids)?\.com|
29f7c58a 3411 invidio\.us
8bdd16b4 3412 )
3413 /.*?\?.*?\blist=
3414 )?
3415 (?P<id>%(playlist_id)s)
3416 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3417 IE_NAME = 'youtube:playlist'
cdc628a4 3418 _TESTS = [{
8bdd16b4 3419 'note': 'issue #673',
3420 'url': 'PLBB231211A4F62143',
cdc628a4 3421 'info_dict': {
8bdd16b4 3422 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3423 'id': 'PLBB231211A4F62143',
3424 'uploader': 'Wickydoo',
3425 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3426 },
3427 'playlist_mincount': 29,
3428 }, {
3429 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3430 'info_dict': {
3431 'title': 'YDL_safe_search',
3432 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3433 },
3434 'playlist_count': 2,
3435 'skip': 'This playlist is private',
9558dcec 3436 }, {
8bdd16b4 3437 'note': 'embedded',
3438 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3439 'playlist_count': 4,
9558dcec 3440 'info_dict': {
8bdd16b4 3441 'title': 'JODA15',
3442 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3443 'uploader': 'milan',
3444 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3445 }
cdc628a4 3446 }, {
8bdd16b4 3447 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3448 'playlist_mincount': 982,
3449 'info_dict': {
3450 'title': '2018 Chinese New Singles (11/6 updated)',
3451 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3452 'uploader': 'LBK',
3453 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3454 }
daa0df9e 3455 }, {
29f7c58a 3456 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3457 'only_matching': True,
3458 }, {
3459 # music album playlist
3460 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3461 'only_matching': True,
3462 }]
3463
3464 @classmethod
3465 def suitable(cls, url):
3466 return False if YoutubeTabIE.suitable(url) else super(
3467 YoutubePlaylistIE, cls).suitable(url)
3468
3469 def _real_extract(self, url):
3470 playlist_id = self._match_id(url)
3471 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3472 if not qs:
3473 qs = {'list': playlist_id}
3474 return self.url_result(
3475 update_url_query('https://www.youtube.com/playlist', qs),
3476 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3477
3478
3479class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3480 IE_DESC = 'youtu.be'
29f7c58a 3481 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3482 _TESTS = [{
8bdd16b4 3483 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3484 'info_dict': {
3485 'id': 'yeWKywCrFtk',
3486 'ext': 'mp4',
3487 'title': 'Small Scale Baler and Braiding Rugs',
3488 'uploader': 'Backus-Page House Museum',
3489 'uploader_id': 'backuspagemuseum',
3490 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3491 'upload_date': '20161008',
3492 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3493 'categories': ['Nonprofits & Activism'],
3494 'tags': list,
3495 'like_count': int,
3496 'dislike_count': int,
3497 },
3498 'params': {
3499 'noplaylist': True,
3500 'skip_download': True,
3501 },
39e7107d 3502 }, {
8bdd16b4 3503 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3504 'only_matching': True,
cdc628a4
PH
3505 }]
3506
8bdd16b4 3507 def _real_extract(self, url):
29f7c58a 3508 mobj = re.match(self._VALID_URL, url)
3509 video_id = mobj.group('id')
3510 playlist_id = mobj.group('playlist_id')
8bdd16b4 3511 return self.url_result(
29f7c58a 3512 update_url_query('https://www.youtube.com/watch', {
3513 'v': video_id,
3514 'list': playlist_id,
3515 'feature': 'youtu.be',
3516 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3517
3518
3519class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3520 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3521 _VALID_URL = r'ytuser:(?P<id>.+)'
3522 _TESTS = [{
3523 'url': 'ytuser:phihag',
3524 'only_matching': True,
3525 }]
3526
3527 def _real_extract(self, url):
3528 user_id = self._match_id(url)
3529 return self.url_result(
3530 'https://www.youtube.com/user/%s' % user_id,
3531 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3532
b05654f0 3533
3d3dddc9 3534class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3535 IE_NAME = 'youtube:favorites'
3536 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3537 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3538 _LOGIN_REQUIRED = True
3539 _TESTS = [{
3540 'url': ':ytfav',
3541 'only_matching': True,
3542 }, {
3543 'url': ':ytfavorites',
3544 'only_matching': True,
3545 }]
3546
3547 def _real_extract(self, url):
3548 return self.url_result(
3549 'https://www.youtube.com/playlist?list=LL',
3550 ie=YoutubeTabIE.ie_key())
3551
3552
8bdd16b4 3553class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
78caa52a 3554 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3555 # there doesn't appear to be a real limit, for example if you search for
3556 # 'python' you get more than 8.000.000 results
3557 _MAX_RESULTS = float('inf')
78caa52a 3558 IE_NAME = 'youtube:search'
b05654f0 3559 _SEARCH_KEY = 'ytsearch'
6c894ea1 3560 _SEARCH_PARAMS = None
9dd8e46a 3561 _TESTS = []
b05654f0 3562
6c894ea1
U
3563 def _entries(self, query, n):
3564 data = {
3565 'context': {
3566 'client': {
3567 'clientName': 'WEB',
3568 'clientVersion': '2.20201021.03.00',
3569 }
3570 },
3571 'query': query,
a22b2fd1 3572 }
6c894ea1
U
3573 if self._SEARCH_PARAMS:
3574 data['params'] = self._SEARCH_PARAMS
3575 total = 0
3576 for page_num in itertools.count(1):
3577 search = self._download_json(
3578 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3579 video_id='query "%s"' % query,
3580 note='Downloading page %s' % page_num,
3581 errnote='Unable to download API page', fatal=False,
3582 data=json.dumps(data).encode('utf8'),
3583 headers={'content-type': 'application/json'})
3584 if not search:
b4c08069 3585 break
6c894ea1
U
3586 slr_contents = try_get(
3587 search,
3588 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3589 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3590 list)
3591 if not slr_contents:
a22b2fd1 3592 break
0366ae87 3593
0366ae87
M
3594 # Youtube sometimes adds promoted content to searches,
3595 # changing the index location of videos and token.
3596 # So we search through all entries till we find them.
30a074c2 3597 continuation_token = None
3598 for slr_content in slr_contents:
3599 isr_contents = try_get(
3600 slr_content,
3601 lambda x: x['itemSectionRenderer']['contents'],
3602 list)
9da76d30 3603 if not isr_contents:
30a074c2 3604 continue
3605 for content in isr_contents:
3606 if not isinstance(content, dict):
3607 continue
3608 video = content.get('videoRenderer')
3609 if not isinstance(video, dict):
3610 continue
3611 video_id = video.get('videoId')
3612 if not video_id:
3613 continue
3614
3615 yield self._extract_video(video)
3616 total += 1
3617 if total == n:
3618 return
0366ae87
M
3619
3620 if continuation_token is None:
3621 continuation_token = try_get(
30a074c2 3622 slr_content,
3623 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
0366ae87 3624 compat_str)
0366ae87 3625
0366ae87 3626 if not continuation_token:
6c894ea1 3627 break
0366ae87 3628 data['continuation'] = continuation_token
b05654f0 3629
6c894ea1
U
3630 def _get_n_results(self, query, n):
3631 """Get a specified number of results for a query"""
3632 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3633
c9ae7b95 3634
a3dd9248 3635class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3636 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3637 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3638 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3639 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3640
c9ae7b95 3641
386e1dd9 3642class YoutubeSearchURLIE(YoutubeSearchIE):
c76eb41b 3643 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
386e1dd9 3644 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3645 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3646 # _MAX_RESULTS = 100
3462ffa8 3647 _TESTS = [{
3648 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3649 'playlist_mincount': 5,
3650 'info_dict': {
3651 'title': 'youtube-dl test video',
3652 }
3653 }, {
3654 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3655 'only_matching': True,
3656 }]
3657
386e1dd9 3658 @classmethod
3659 def _make_valid_url(cls):
3660 return cls._VALID_URL
3661
3462ffa8 3662 def _real_extract(self, url):
386e1dd9 3663 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3664 query = (qs.get('search_query') or qs.get('q'))[0]
3665 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3666 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3667
3668
3669class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3670 """
25f14e9f 3671 Base class for feed extractors
3d3dddc9 3672 Subclasses must define the _FEED_NAME property.
d7ae0639 3673 """
b2e8bc1b 3674 _LOGIN_REQUIRED = True
3462ffa8 3675 # _MAX_PAGES = 5
ef2f3c7f 3676 _TESTS = []
d7ae0639
JMF
3677
3678 @property
3679 def IE_NAME(self):
78caa52a 3680 return 'youtube:%s' % self._FEED_NAME
04cc9617 3681
81f0259b 3682 def _real_initialize(self):
b2e8bc1b 3683 self._login()
81f0259b 3684
3853309f 3685 def _real_extract(self, url):
3d3dddc9 3686 return self.url_result(
3687 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3688 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3689
3690
ef2f3c7f 3691class YoutubeWatchLaterIE(InfoExtractor):
3692 IE_NAME = 'youtube:watchlater'
70d5c17b 3693 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3694 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3695 _TESTS = [{
8bdd16b4 3696 'url': ':ytwatchlater',
bc7a9cd8
S
3697 'only_matching': True,
3698 }]
25f14e9f
S
3699
3700 def _real_extract(self, url):
ef2f3c7f 3701 return self.url_result(
3702 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3703
3704
25f14e9f
S
3705class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3706 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3707 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3708 _FEED_NAME = 'recommended'
3d3dddc9 3709 _TESTS = [{
3710 'url': ':ytrec',
3711 'only_matching': True,
3712 }, {
3713 'url': ':ytrecommended',
3714 'only_matching': True,
3715 }, {
3716 'url': 'https://youtube.com',
3717 'only_matching': True,
3718 }]
1ed5b5c9 3719
1ed5b5c9 3720
25f14e9f 3721class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3722 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3723 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3724 _FEED_NAME = 'subscriptions'
3d3dddc9 3725 _TESTS = [{
3726 'url': ':ytsubs',
3727 'only_matching': True,
3728 }, {
3729 'url': ':ytsubscriptions',
3730 'only_matching': True,
3731 }]
1ed5b5c9 3732
1ed5b5c9 3733
25f14e9f
S
3734class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3735 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3736 _VALID_URL = r':ythistory'
25f14e9f 3737 _FEED_NAME = 'history'
3d3dddc9 3738 _TESTS = [{
3739 'url': ':ythistory',
3740 'only_matching': True,
3741 }]
1ed5b5c9
JMF
3742
3743
15870e90
PH
3744class YoutubeTruncatedURLIE(InfoExtractor):
3745 IE_NAME = 'youtube:truncated_url'
3746 IE_DESC = False # Do not list
975d35db 3747 _VALID_URL = r'''(?x)
b95aab84
PH
3748 (?:https?://)?
3749 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3750 (?:watch\?(?:
c4808c60 3751 feature=[a-z_]+|
b95aab84
PH
3752 annotation_id=annotation_[^&]+|
3753 x-yt-cl=[0-9]+|
c1708b89 3754 hl=[^&]*|
287be8c6 3755 t=[0-9]+
b95aab84
PH
3756 )?
3757 |
3758 attribution_link\?a=[^&]+
3759 )
3760 $
975d35db 3761 '''
15870e90 3762
c4808c60 3763 _TESTS = [{
2d3d2997 3764 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3765 'only_matching': True,
dc2fc736 3766 }, {
2d3d2997 3767 'url': 'https://www.youtube.com/watch?',
dc2fc736 3768 'only_matching': True,
b95aab84
PH
3769 }, {
3770 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3771 'only_matching': True,
3772 }, {
3773 'url': 'https://www.youtube.com/watch?feature=foo',
3774 'only_matching': True,
c1708b89
PH
3775 }, {
3776 'url': 'https://www.youtube.com/watch?hl=en-GB',
3777 'only_matching': True,
287be8c6
PH
3778 }, {
3779 'url': 'https://www.youtube.com/watch?t=2372',
3780 'only_matching': True,
c4808c60
PH
3781 }]
3782
15870e90
PH
3783 def _real_extract(self, url):
3784 raise ExtractorError(
78caa52a
PH
3785 'Did you forget to quote the URL? Remember that & is a meta '
3786 'character in most shells, so you want to put the URL in quotes, '
3867038a 3787 'like youtube-dl '
2d3d2997 3788 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3789 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3790 expected=True)
772fd5cc
PH
3791
3792
3793class YoutubeTruncatedIDIE(InfoExtractor):
3794 IE_NAME = 'youtube:truncated_id'
3795 IE_DESC = False # Do not list
b95aab84 3796 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3797
3798 _TESTS = [{
3799 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3800 'only_matching': True,
3801 }]
3802
3803 def _real_extract(self, url):
3804 video_id = self._match_id(url)
3805 raise ExtractorError(
3806 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3807 expected=True)
8bdd16b4 3808
3809
3462ffa8 3810# Do Youtube show urls even exist anymore? I couldn't find any
3811r'''
3812class YoutubeShowIE(YoutubeTabIE):
8bdd16b4 3813 IE_DESC = 'YouTube.com (multi-season) shows'
3814 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3815 IE_NAME = 'youtube:show'
3816 _TESTS = [{
3817 'url': 'https://www.youtube.com/show/airdisasters',
3818 'playlist_mincount': 5,
3819 'info_dict': {
3820 'id': 'airdisasters',
3821 'title': 'Air Disasters',
3822 }
3823 }]
3824
3825 def _real_extract(self, url):
3826 playlist_id = self._match_id(url)
3827 return super(YoutubeShowIE, self)._real_extract(
3828 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3462ffa8 3829'''