]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
[version] update
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
2d30521a 34 float_or_none,
4bb4a188 35 get_element_by_id,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
b84071c0 39 parse_count,
7c80519c 40 parse_duration,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
81c2f20b 51 uppercase_escape,
21c340b8 52 url_or_none,
6e6bc8da 53 urlencode_postdata,
8bdd16b4 54 urljoin,
c5e8d7af
PH
55)
56
5f6a1245 57
de7f3446 58class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 66
3462ffa8 67 _RESERVED_NAMES = (
29f7c58a 68 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
69 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
70 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 71
b2e8bc1b
JMF
72 _NETRC_MACHINE = 'youtube'
73 # If True it will raise an error if no login info is provided
74 _LOGIN_REQUIRED = False
75
70d5c17b 76 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 77
b2e8bc1b 78 def _set_language(self):
810fb84d 79 self._set_cookie(
ee0b726c 80 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 81 # YouTube sets the expire time to about two months
810fb84d 82 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 83
25f14e9f
S
84 def _ids_to_results(self, ids):
85 return [
86 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 for vid_id in ids]
88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
68217024 97 username, password = self._get_login_info()
b2e8bc1b
JMF
98 # No authentication to be performed
99 if username is None:
70d35d16 100 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 101 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
102 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
103 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 104 return True
b2e8bc1b 105
7cc3570e
PH
106 login_page = self._download_webpage(
107 self._LOGIN_URL, None,
69ea8ca4
PH
108 note='Downloading login page',
109 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
110 if login_page is False:
111 return
b2e8bc1b 112
1212e997 113 login_form = self._hidden_inputs(login_page)
c5e8d7af 114
e00eb564
S
115 def req(url, f_req, note, errnote):
116 data = login_form.copy()
117 data.update({
118 'pstMsg': 1,
119 'checkConnection': 'youtube',
120 'checkedDomains': 'youtube',
121 'hl': 'en',
122 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 123 'f.req': json.dumps(f_req),
e00eb564
S
124 'flowName': 'GlifWebSignIn',
125 'flowEntry': 'ServiceLogin',
baf67a60
S
126 # TODO: reverse actual botguard identifier generation algo
127 'bgRequest': '["identifier",""]',
041bc3ad 128 })
e00eb564
S
129 return self._download_json(
130 url, None, note=note, errnote=errnote,
131 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
132 fatal=False,
133 data=urlencode_postdata(data), headers={
134 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
135 'Google-Accounts-XSRF': 1,
136 })
137
3995d37d
S
138 def warn(message):
139 self._downloader.report_warning(message)
140
141 lookup_req = [
142 username,
143 None, [], None, 'US', None, None, 2, False, True,
144 [
145 None, None,
146 [2, 1, None, 1,
147 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
148 None, [], 4],
149 1, [None, None, []], None, None, None, True
150 ],
151 username,
152 ]
153
e00eb564 154 lookup_results = req(
3995d37d 155 self._LOOKUP_URL, lookup_req,
e00eb564
S
156 'Looking up account info', 'Unable to look up account info')
157
158 if lookup_results is False:
159 return False
041bc3ad 160
3995d37d
S
161 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
162 if not user_hash:
163 warn('Unable to extract user hash')
164 return False
165
166 challenge_req = [
167 user_hash,
168 None, 1, None, [1, None, None, None, [password, None, True]],
169 [
170 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
171 1, [None, None, []], None, None, None, True
172 ]]
83317f69 173
3995d37d
S
174 challenge_results = req(
175 self._CHALLENGE_URL, challenge_req,
176 'Logging in', 'Unable to log in')
83317f69 177
3995d37d 178 if challenge_results is False:
e00eb564 179 return
83317f69 180
3995d37d
S
181 login_res = try_get(challenge_results, lambda x: x[0][5], list)
182 if login_res:
183 login_msg = try_get(login_res, lambda x: x[5], compat_str)
184 warn(
185 'Unable to login: %s' % 'Invalid password'
186 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
187 return False
188
189 res = try_get(challenge_results, lambda x: x[0][-1], list)
190 if not res:
191 warn('Unable to extract result entry')
192 return False
193
9a6628aa
S
194 login_challenge = try_get(res, lambda x: x[0][0], list)
195 if login_challenge:
196 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
197 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
198 # SEND_SUCCESS - TFA code has been successfully sent to phone
199 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 200 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
201 if status == 'QUOTA_EXCEEDED':
202 warn('Exceeded the limit of TFA codes, try later')
203 return False
204
205 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
206 if not tl:
207 warn('Unable to extract TL')
208 return False
209
210 tfa_code = self._get_tfa_info('2-step verification code')
211
212 if not tfa_code:
213 warn(
214 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
215 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
216 return False
217
218 tfa_code = remove_start(tfa_code, 'G-')
219
220 tfa_req = [
221 user_hash, None, 2, None,
222 [
223 9, None, None, None, None, None, None, None,
224 [None, tfa_code, True, 2]
225 ]]
226
227 tfa_results = req(
228 self._TFA_URL.format(tl), tfa_req,
229 'Submitting TFA code', 'Unable to submit TFA code')
230
231 if tfa_results is False:
232 return False
233
234 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
235 if tfa_res:
236 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
237 warn(
238 'Unable to finish TFA: %s' % 'Invalid TFA code'
239 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
240 return False
241
242 check_cookie_url = try_get(
243 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
244 else:
245 CHALLENGES = {
246 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
247 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
248 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
249 }
250 challenge = CHALLENGES.get(
251 challenge_str,
252 '%s returned error %s.' % (self.IE_NAME, challenge_str))
253 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
254 return False
3995d37d
S
255 else:
256 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
257
258 if not check_cookie_url:
259 warn('Unable to extract CheckCookie URL')
260 return False
e00eb564
S
261
262 check_cookie_results = self._download_webpage(
3995d37d
S
263 check_cookie_url, None, 'Checking cookie', fatal=False)
264
265 if check_cookie_results is False:
266 return False
e00eb564 267
3995d37d
S
268 if 'https://myaccount.google.com/' not in check_cookie_results:
269 warn('Unable to log in')
b2e8bc1b 270 return False
e00eb564 271
b2e8bc1b
JMF
272 return True
273
30226342 274 def _download_webpage_handle(self, *args, **kwargs):
c1148516 275 query = kwargs.get('query', {}).copy()
c1148516 276 kwargs['query'] = query
30226342 277 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
278 *args, **compat_kwargs(kwargs))
279
5b0a6a80 280 def _get_yt_initial_data(self, video_id, webpage):
281 config = self._search_regex(
282 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
283 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
284 webpage, 'ytInitialData', default=None)
285 if config:
286 return self._parse_json(
287 uppercase_escape(config), video_id, fatal=False)
288
b2e8bc1b
JMF
289 def _real_initialize(self):
290 if self._downloader is None:
291 return
42939b61 292 self._set_language()
b2e8bc1b
JMF
293 if not self._login():
294 return
c5e8d7af 295
8bdd16b4 296 _DEFAULT_API_DATA = {
297 'context': {
298 'client': {
299 'clientName': 'WEB',
300 'clientVersion': '2.20201021.03.00',
301 }
302 },
303 }
8377574c 304
a0566bbf 305 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 306 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
307 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 308
8bdd16b4 309 def _call_api(self, ep, query, video_id):
310 data = self._DEFAULT_API_DATA.copy()
311 data.update(query)
9833e7a0 312
8bdd16b4 313 response = self._download_json(
314 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
315 note='Downloading API JSON', errnote='Unable to download API page',
316 data=json.dumps(data).encode('utf8'),
317 headers={'content-type': 'application/json'},
318 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 319
8bdd16b4 320 return response
061a75ed 321
8bdd16b4 322 def _extract_yt_initial_data(self, video_id, webpage):
323 return self._parse_json(
324 self._search_regex(
29f7c58a 325 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 326 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 327 video_id)
0c148415 328
29f7c58a 329 def _extract_ytcfg(self, video_id, webpage):
330 return self._parse_json(
331 self._search_regex(
332 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
333 default='{}'), video_id, fatal=False)
334
0c148415 335
360e1ca5 336class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 337 IE_DESC = 'YouTube.com'
cb7dfeea 338 _VALID_URL = r"""(?x)^
c5e8d7af 339 (
edb53e2d 340 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 341 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 342 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 343 (?:www\.)?pwnyoutube\.com/|
8b561bfc 344 (?:www\.)?hooktube\.com/|
f7000f3a 345 (?:www\.)?yourepeat\.com/|
e69ae5b9 346 tube\.majestyc\.net/|
ba036333 347 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 348 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 349 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 350 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 351 (?:www\.)?invidious\.kabi\.tk/|
ba036333 352 (?:www\.)?invidious\.13ad\.de/|
791d2e81 353 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 354 (?:www\.)?invidious\.zapashcanon\.fr/|
355 (?:www\.)?invidious\.kavin\.rocks/|
356 (?:www\.)?invidious\.tube/|
357 (?:www\.)?invidiou\.site/|
358 (?:www\.)?invidious\.site/|
359 (?:www\.)?invidious\.xyz/|
494d664e 360 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 361 (?:www\.)?invidious\.drycat\.fr/|
ba036333 362 (?:www\.)?tube\.poal\.co/|
29f7c58a 363 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 364 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 365 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 366 (?:www\.)?yewtu\.be/|
494d664e 367 (?:www\.)?yt\.elukerio\.org/|
894b3826 368 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 369 (?:www\.)?invidious\.ggc-project\.de/|
370 (?:www\.)?yt\.maisputain\.ovh/|
371 (?:www\.)?invidious\.13ad\.de/|
372 (?:www\.)?invidious\.toot\.koeln/|
373 (?:www\.)?invidious\.fdn\.fr/|
374 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 375 (?:www\.)?kgg2m7yk5aybusll\.onion/|
376 (?:www\.)?qklhadlycap4cnod\.onion/|
377 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
378 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
379 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
380 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 381 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 382 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 383 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
384 (?:.*?\#/)? # handle anchor (#/) redirect urls
385 (?: # the various things that can precede the ID:
ac7553d0 386 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 387 |(?: # or the v= param in all its forms
f7000f3a 388 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 389 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 390 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
391 v=
392 )
f4b05232 393 ))
cbaed4bb
S
394 |(?:
395 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
396 vid\.plus| # or vid.plus/xxxx
397 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 398 )/
edb53e2d 399 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 400 )
c5e8d7af 401 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 402 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
403 (?!.*?\blist=
404 (?:
405 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
406 WL # WL are handled by the watch later IE
407 )
408 )
c5e8d7af 409 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 410 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 411 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
412 _PLAYER_INFO_RE = (
413 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
414 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
415 )
2c62dc26 416 _formats = {
c2d3cb4c 417 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
418 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
419 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
420 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
421 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
422 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
423 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
424 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 425 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 426 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
427 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
429 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
430 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
431 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 432 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 433 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
434 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 435
436
437 # 3D videos
c2d3cb4c 438 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
439 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
440 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
441 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 442 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
443 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
444 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 445
96fb5605 446 # Apple HTTP Live Streaming
11f12195 447 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 448 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
449 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
450 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
451 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
452 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 453 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
455
456 # DASH mp4 video
d23028a8
S
457 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
458 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
459 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
460 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
461 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 462 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
463 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
467 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
468 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 469
f6f1fc92 470 # Dash mp4 audio
d23028a8
S
471 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
472 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
473 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
474 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
475 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
476 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
477 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
478
479 # Dash webm
d23028a8
S
480 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
481 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
482 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
483 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
484 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
485 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
487 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
488 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
489 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
490 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
491 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
492 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 495 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
496 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
498 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
499 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
500 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
502
503 # Dash webm audio
d23028a8
S
504 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
505 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 506
0857baad 507 # Dash webm audio with opus inside
d23028a8
S
508 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
509 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
510 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 511
ce6b9a2d
PH
512 # RTMP (unnamed)
513 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
514
515 # av01 video only formats sometimes served with "unknown" codecs
516 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
517 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
518 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
519 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 520 }
29f7c58a 521 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 522
fd5c4aab
S
523 _GEO_BYPASS = False
524
78caa52a 525 IE_NAME = 'youtube'
2eb88d95
PH
526 _TESTS = [
527 {
2d3d2997 528 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
529 'info_dict': {
530 'id': 'BaW_jenozKc',
531 'ext': 'mp4',
3867038a 532 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
533 'uploader': 'Philipp Hagemeister',
534 'uploader_id': 'phihag',
ec85ded8 535 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
536 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
537 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 538 'upload_date': '20121002',
3867038a 539 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 540 'categories': ['Science & Technology'],
3867038a 541 'tags': ['youtube-dl'],
556dbe7f 542 'duration': 10,
dbdaaa23 543 'view_count': int,
3e7c1224
PH
544 'like_count': int,
545 'dislike_count': int,
7c80519c 546 'start_time': 1,
297a564b 547 'end_time': 9,
2eb88d95 548 }
0e853ca4 549 },
fccd3771 550 {
4bc3a23e
PH
551 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
552 'note': 'Embed-only video (#1746)',
553 'info_dict': {
554 'id': 'yZIXLfi8CZQ',
555 'ext': 'mp4',
556 'upload_date': '20120608',
557 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
558 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
559 'uploader': 'SET India',
94bfcd23 560 'uploader_id': 'setindia',
ec85ded8 561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 562 'age_limit': 18,
fccd3771
PH
563 }
564 },
11b56058 565 {
8bdd16b4 566 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
567 'note': 'Use the first video ID in the URL',
568 'info_dict': {
569 'id': 'BaW_jenozKc',
570 'ext': 'mp4',
3867038a 571 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
572 'uploader': 'Philipp Hagemeister',
573 'uploader_id': 'phihag',
ec85ded8 574 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 575 'upload_date': '20121002',
3867038a 576 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 577 'categories': ['Science & Technology'],
3867038a 578 'tags': ['youtube-dl'],
556dbe7f 579 'duration': 10,
dbdaaa23 580 'view_count': int,
11b56058
PM
581 'like_count': int,
582 'dislike_count': int,
34a7de29
S
583 },
584 'params': {
585 'skip_download': True,
586 },
11b56058 587 },
dd27fd17 588 {
2d3d2997 589 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
590 'note': '256k DASH audio (format 141) via DASH manifest',
591 'info_dict': {
592 'id': 'a9LDPn-MO4I',
593 'ext': 'm4a',
594 'upload_date': '20121002',
595 'uploader_id': '8KVIDEO',
ec85ded8 596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
597 'description': '',
598 'uploader': '8KVIDEO',
599 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 600 },
4bc3a23e
PH
601 'params': {
602 'youtube_include_dash_manifest': True,
603 'format': '141',
4919603f 604 },
de3c7fe0 605 'skip': 'format 141 not served anymore',
dd27fd17 606 },
8bdd16b4 607 # DASH manifest with encrypted signature
608 {
609 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
610 'info_dict': {
611 'id': 'IB3lcPjvWLA',
612 'ext': 'm4a',
613 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
614 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
615 'duration': 244,
616 'uploader': 'AfrojackVEVO',
617 'uploader_id': 'AfrojackVEVO',
618 'upload_date': '20131011',
619 },
620 'params': {
621 'youtube_include_dash_manifest': True,
622 'format': '141/bestaudio[ext=m4a]',
623 },
624 },
aa79ac0c
PH
625 # Controversy video
626 {
627 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
628 'info_dict': {
629 'id': 'T4XJQO3qol8',
630 'ext': 'mp4',
556dbe7f 631 'duration': 219,
aa79ac0c 632 'upload_date': '20100909',
4fe54c12 633 'uploader': 'Amazing Atheist',
aa79ac0c 634 'uploader_id': 'TheAmazingAtheist',
ec85ded8 635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
636 'title': 'Burning Everyone\'s Koran',
637 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
638 }
c522adb1 639 },
dd2d55f1 640 # Normal age-gate video (embed allowed)
c522adb1 641 {
2d3d2997 642 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
643 'info_dict': {
644 'id': 'HtVdAasjOgU',
645 'ext': 'mp4',
646 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 647 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 648 'duration': 142,
c522adb1
JMF
649 'uploader': 'The Witcher',
650 'uploader_id': 'WitcherGame',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 652 'upload_date': '20140605',
34952f09 653 'age_limit': 18,
c522adb1
JMF
654 },
655 },
8bdd16b4 656 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
657 # YouTube Red ad is not captured for creator
658 {
659 'url': '__2ABJjxzNo',
660 'info_dict': {
661 'id': '__2ABJjxzNo',
662 'ext': 'mp4',
663 'duration': 266,
664 'upload_date': '20100430',
665 'uploader_id': 'deadmau5',
666 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
667 'creator': 'Dada Life, deadmau5',
668 'description': 'md5:12c56784b8032162bb936a5f76d55360',
669 'uploader': 'deadmau5',
670 'title': 'Deadmau5 - Some Chords (HD)',
671 'alt_title': 'This Machine Kills Some Chords',
672 },
673 'expected_warnings': [
674 'DASH manifest missing',
675 ]
676 },
067aa17e 677 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
678 {
679 'url': 'lqQg6PlCWgI',
680 'info_dict': {
681 'id': 'lqQg6PlCWgI',
682 'ext': 'mp4',
556dbe7f 683 'duration': 6085,
90227264 684 'upload_date': '20150827',
cbe2bd91 685 'uploader_id': 'olympic',
ec85ded8 686 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 687 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 688 'uploader': 'Olympic',
cbe2bd91
PH
689 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
690 },
691 'params': {
692 'skip_download': 'requires avconv',
e52a40ab 693 }
cbe2bd91 694 },
6271f1ca
PH
695 # Non-square pixels
696 {
697 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
698 'info_dict': {
699 'id': '_b-2C3KPAM0',
700 'ext': 'mp4',
701 'stretched_ratio': 16 / 9.,
556dbe7f 702 'duration': 85,
6271f1ca
PH
703 'upload_date': '20110310',
704 'uploader_id': 'AllenMeow',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 706 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 707 'uploader': '孫ᄋᄅ',
6271f1ca
PH
708 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
709 },
06b491eb
S
710 },
711 # url_encoded_fmt_stream_map is empty string
712 {
713 'url': 'qEJwOuvDf7I',
714 'info_dict': {
715 'id': 'qEJwOuvDf7I',
f57b7835 716 'ext': 'webm',
06b491eb
S
717 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
718 'description': '',
719 'upload_date': '20150404',
720 'uploader_id': 'spbelect',
721 'uploader': 'Наблюдатели Петербурга',
722 },
723 'params': {
724 'skip_download': 'requires avconv',
e323cf3f
S
725 },
726 'skip': 'This live event has ended.',
06b491eb 727 },
067aa17e 728 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
729 {
730 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
731 'info_dict': {
732 'id': 'FIl7x6_3R5Y',
eb6793ba 733 'ext': 'webm',
da77d856
S
734 'title': 'md5:7b81415841e02ecd4313668cde88737a',
735 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 736 'duration': 220,
da77d856
S
737 'upload_date': '20150625',
738 'uploader_id': 'dorappi2000',
ec85ded8 739 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 740 'uploader': 'dorappi2000',
eb6793ba 741 'formats': 'mincount:31',
da77d856 742 },
eb6793ba 743 'skip': 'not actual anymore',
2ee8f5d8 744 },
8a1a26ce
YCH
745 # DASH manifest with segment_list
746 {
747 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
748 'md5': '8ce563a1d667b599d21064e982ab9e31',
749 'info_dict': {
750 'id': 'CsmdDsKjzN8',
751 'ext': 'mp4',
17ee98e1 752 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
753 'uploader': 'Airtek',
754 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
755 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
756 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
757 },
758 'params': {
759 'youtube_include_dash_manifest': True,
760 'format': '135', # bestvideo
be49068d
S
761 },
762 'skip': 'This live event has ended.',
2ee8f5d8 763 },
cf7e015f
S
764 {
765 # Multifeed videos (multiple cameras), URL is for Main Camera
766 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
767 'info_dict': {
768 'id': 'jqWvoWXjCVs',
769 'title': 'teamPGP: Rocket League Noob Stream',
770 'description': 'md5:dc7872fb300e143831327f1bae3af010',
771 },
772 'playlist': [{
773 'info_dict': {
774 'id': 'jqWvoWXjCVs',
775 'ext': 'mp4',
776 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
777 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 778 'duration': 7335,
cf7e015f
S
779 'upload_date': '20150721',
780 'uploader': 'Beer Games Beer',
781 'uploader_id': 'beergamesbeer',
ec85ded8 782 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 783 'license': 'Standard YouTube License',
cf7e015f
S
784 },
785 }, {
786 'info_dict': {
787 'id': '6h8e8xoXJzg',
788 'ext': 'mp4',
789 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
790 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 791 'duration': 7337,
cf7e015f
S
792 'upload_date': '20150721',
793 'uploader': 'Beer Games Beer',
794 'uploader_id': 'beergamesbeer',
ec85ded8 795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 796 'license': 'Standard YouTube License',
cf7e015f
S
797 },
798 }, {
799 'info_dict': {
800 'id': 'PUOgX5z9xZw',
801 'ext': 'mp4',
802 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
803 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 804 'duration': 7337,
cf7e015f
S
805 'upload_date': '20150721',
806 'uploader': 'Beer Games Beer',
807 'uploader_id': 'beergamesbeer',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 809 'license': 'Standard YouTube License',
cf7e015f
S
810 },
811 }, {
812 'info_dict': {
813 'id': 'teuwxikvS5k',
814 'ext': 'mp4',
815 'title': 'teamPGP: Rocket League Noob Stream (zim)',
816 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 817 'duration': 7334,
cf7e015f
S
818 'upload_date': '20150721',
819 'uploader': 'Beer Games Beer',
820 'uploader_id': 'beergamesbeer',
ec85ded8 821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 822 'license': 'Standard YouTube License',
cf7e015f
S
823 },
824 }],
825 'params': {
826 'skip_download': True,
827 },
4fe54c12 828 'skip': 'This video is not available.',
cbaed4bb 829 },
f9f49d87 830 {
067aa17e 831 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
832 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
833 'info_dict': {
834 'id': 'gVfLd0zydlo',
835 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
836 },
837 'playlist_count': 2,
be49068d 838 'skip': 'Not multifeed anymore',
f9f49d87 839 },
cbaed4bb 840 {
2d3d2997 841 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 842 'only_matching': True,
0e49d9a6 843 },
6d4fc66b 844 {
2d3d2997 845 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
846 'only_matching': True,
847 },
0e49d9a6 848 {
067aa17e 849 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 850 # Also tests cut-off URL expansion in video description (see
067aa17e
S
851 # https://github.com/ytdl-org/youtube-dl/issues/1892,
852 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
853 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
854 'info_dict': {
855 'id': 'lsguqyKfVQg',
856 'ext': 'mp4',
857 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 858 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 859 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 860 'duration': 133,
0e49d9a6
LL
861 'upload_date': '20151119',
862 'uploader_id': 'IronSoulElf',
ec85ded8 863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 864 'uploader': 'IronSoulElf',
eb6793ba
S
865 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
866 'track': 'Dark Walk - Position Music',
867 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 868 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
869 },
870 'params': {
871 'skip_download': True,
872 },
873 },
61f92af1 874 {
067aa17e 875 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
876 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
877 'only_matching': True,
878 },
313dfc45
LL
879 {
880 # Video with yt:stretch=17:0
881 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
882 'info_dict': {
883 'id': 'Q39EVAstoRM',
884 'ext': 'mp4',
885 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
886 'description': 'md5:ee18a25c350637c8faff806845bddee9',
887 'upload_date': '20151107',
888 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
889 'uploader': 'CH GAMER DROID',
890 },
891 'params': {
892 'skip_download': True,
893 },
be49068d 894 'skip': 'This video does not exist.',
313dfc45 895 },
7caf9830
S
896 {
897 # Video licensed under Creative Commons
898 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
899 'info_dict': {
900 'id': 'M4gD1WSo5mA',
901 'ext': 'mp4',
902 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
903 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 904 'duration': 721,
7caf9830
S
905 'upload_date': '20150127',
906 'uploader_id': 'BerkmanCenter',
ec85ded8 907 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 908 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
909 'license': 'Creative Commons Attribution license (reuse allowed)',
910 },
911 'params': {
912 'skip_download': True,
913 },
914 },
fd050249
S
915 {
916 # Channel-like uploader_url
917 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
918 'info_dict': {
919 'id': 'eQcmzGIKrzg',
920 'ext': 'mp4',
921 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
922 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 923 'duration': 4060,
fd050249 924 'upload_date': '20151119',
eb6793ba 925 'uploader': 'Bernie Sanders',
fd050249 926 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 927 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
928 'license': 'Creative Commons Attribution license (reuse allowed)',
929 },
930 'params': {
931 'skip_download': True,
932 },
933 },
040ac686
S
934 {
935 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
936 'only_matching': True,
7f29cf54
S
937 },
938 {
067aa17e 939 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
940 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
941 'only_matching': True,
6496ccb4
S
942 },
943 {
944 # Rental video preview
945 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
946 'info_dict': {
947 'id': 'uGpuVWrhIzE',
948 'ext': 'mp4',
949 'title': 'Piku - Trailer',
950 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
951 'upload_date': '20150811',
952 'uploader': 'FlixMatrix',
953 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
955 'license': 'Standard YouTube License',
956 },
957 'params': {
958 'skip_download': True,
959 },
eb6793ba 960 'skip': 'This video is not available.',
022a5d66 961 },
12afdc2a
S
962 {
963 # YouTube Red video with episode data
964 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
965 'info_dict': {
966 'id': 'iqKdEhx-dD4',
967 'ext': 'mp4',
968 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 969 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 970 'duration': 2085,
12afdc2a
S
971 'upload_date': '20170118',
972 'uploader': 'Vsauce',
973 'uploader_id': 'Vsauce',
974 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
975 'series': 'Mind Field',
976 'season_number': 1,
977 'episode_number': 1,
978 },
979 'params': {
980 'skip_download': True,
981 },
982 'expected_warnings': [
983 'Skipping DASH manifest',
984 ],
985 },
c7121fa7
S
986 {
987 # The following content has been identified by the YouTube community
988 # as inappropriate or offensive to some audiences.
989 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
990 'info_dict': {
991 'id': '6SJNVb0GnPI',
992 'ext': 'mp4',
993 'title': 'Race Differences in Intelligence',
994 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
995 'duration': 965,
996 'upload_date': '20140124',
997 'uploader': 'New Century Foundation',
998 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
1004 },
022a5d66
S
1005 {
1006 # itag 212
1007 'url': '1t24XAntNCY',
1008 'only_matching': True,
fd5c4aab
S
1009 },
1010 {
1011 # geo restricted to JP
1012 'url': 'sJL6WA-aGkQ',
1013 'only_matching': True,
1014 },
cd5a74a2
S
1015 {
1016 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1017 'only_matching': True,
1018 },
825cd268
RA
1019 {
1020 # DRM protected
1021 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1022 'only_matching': True,
4fe54c12
S
1023 },
1024 {
1025 # Video with unsupported adaptive stream type formats
1026 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1027 'info_dict': {
1028 'id': 'Z4Vy8R84T1U',
1029 'ext': 'mp4',
1030 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1031 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1032 'duration': 433,
1033 'upload_date': '20130923',
1034 'uploader': 'Amelia Putri Harwita',
1035 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1036 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1037 'formats': 'maxcount:10',
1038 },
1039 'params': {
1040 'skip_download': True,
1041 'youtube_include_dash_manifest': False,
1042 },
5429d6a9 1043 'skip': 'not actual anymore',
5caabd3c 1044 },
1045 {
822b9d9c 1046 # Youtube Music Auto-generated description
5caabd3c 1047 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1048 'info_dict': {
1049 'id': 'MgNrAu2pzNs',
1050 'ext': 'mp4',
1051 'title': 'Voyeur Girl',
1052 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1053 'upload_date': '20190312',
5429d6a9
S
1054 'uploader': 'Stephen - Topic',
1055 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1056 'artist': 'Stephen',
1057 'track': 'Voyeur Girl',
1058 'album': 'it\'s too much love to know my dear',
1059 'release_date': '20190313',
1060 'release_year': 2019,
1061 },
1062 'params': {
1063 'skip_download': True,
1064 },
1065 },
66b48727
RA
1066 {
1067 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1068 'only_matching': True,
1069 },
011e75e6
S
1070 {
1071 # invalid -> valid video id redirection
1072 'url': 'DJztXj2GPfl',
1073 'info_dict': {
1074 'id': 'DJztXj2GPfk',
1075 'ext': 'mp4',
1076 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1077 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1078 'upload_date': '20090125',
1079 'uploader': 'Prochorowka',
1080 'uploader_id': 'Prochorowka',
1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1082 'artist': 'Panjabi MC',
1083 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1084 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1085 },
1086 'params': {
1087 'skip_download': True,
1088 },
ea74e00b
DP
1089 },
1090 {
1091 # empty description results in an empty string
1092 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1093 'info_dict': {
1094 'id': 'x41yOUIvK2k',
1095 'ext': 'mp4',
1096 'title': 'IMG 3456',
1097 'description': '',
1098 'upload_date': '20170613',
1099 'uploader_id': 'ElevageOrVert',
1100 'uploader': 'ElevageOrVert',
1101 },
1102 'params': {
1103 'skip_download': True,
1104 },
1105 },
a0566bbf 1106 {
29f7c58a 1107 # with '};' inside yt initial data (see [1])
1108 # see [2] for an example with '};' inside ytInitialPlayerResponse
1109 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1110 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1111 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1112 'info_dict': {
1113 'id': 'CHqg6qOn4no',
1114 'ext': 'mp4',
1115 'title': 'Part 77 Sort a list of simple types in c#',
1116 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1117 'upload_date': '20130831',
1118 'uploader_id': 'kudvenkat',
1119 'uploader': 'kudvenkat',
1120 },
1121 'params': {
1122 'skip_download': True,
1123 },
1124 },
29f7c58a 1125 {
1126 # another example of '};' in ytInitialData
1127 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1128 'only_matching': True,
1129 },
1130 {
1131 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1132 'only_matching': True,
1133 },
2eb88d95
PH
1134 ]
1135
e0df6211
PH
1136 def __init__(self, *args, **kwargs):
1137 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1138 self._player_cache = {}
e0df6211 1139
c5e8d7af
PH
1140 def report_video_info_webpage_download(self, video_id):
1141 """Report attempt to download video info webpage."""
69ea8ca4 1142 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1143
c5e8d7af
PH
1144 def report_information_extraction(self, video_id):
1145 """Report attempt to extract video information."""
69ea8ca4 1146 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1147
1148 def report_unavailable_format(self, video_id, format):
1149 """Report extracted video URL."""
69ea8ca4 1150 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1151
1152 def report_rtmp_download(self):
1153 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1154 self.to_screen('RTMP download detected')
c5e8d7af 1155
60064c53
PH
1156 def _signature_cache_id(self, example_sig):
1157 """ Return a string representation of a signature """
78caa52a 1158 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1159
e40c758c
S
1160 @classmethod
1161 def _extract_player_info(cls, player_url):
1162 for player_re in cls._PLAYER_INFO_RE:
1163 id_m = re.search(player_re, player_url)
1164 if id_m:
1165 break
1166 else:
c081b35c 1167 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1168 return id_m.group('ext'), id_m.group('id')
1169
1170 def _extract_signature_function(self, video_id, player_url, example_sig):
1171 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1172
c4417ddb 1173 # Read from filesystem cache
60064c53
PH
1174 func_id = '%s_%s_%s' % (
1175 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1176 assert os.path.basename(func_id) == func_id
a0e07d31 1177
69ea8ca4 1178 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1179 if cache_spec is not None:
78caa52a 1180 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1181
6d1a55a5
PH
1182 download_note = (
1183 'Downloading player %s' % player_url
1184 if self._downloader.params.get('verbose') else
1185 'Downloading %s player %s' % (player_type, player_id)
1186 )
e0df6211
PH
1187 if player_type == 'js':
1188 code = self._download_webpage(
1189 player_url, video_id,
6d1a55a5 1190 note=download_note,
69ea8ca4 1191 errnote='Download of %s failed' % player_url)
83799698 1192 res = self._parse_sig_js(code)
c4417ddb 1193 elif player_type == 'swf':
e0df6211
PH
1194 urlh = self._request_webpage(
1195 player_url, video_id,
6d1a55a5 1196 note=download_note,
69ea8ca4 1197 errnote='Download of %s failed' % player_url)
e0df6211 1198 code = urlh.read()
83799698 1199 res = self._parse_sig_swf(code)
e0df6211
PH
1200 else:
1201 assert False, 'Invalid player type %r' % player_type
1202
785521bf
PH
1203 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1204 cache_res = res(test_string)
1205 cache_spec = [ord(c) for c in cache_res]
83799698 1206
69ea8ca4 1207 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1208 return res
1209
60064c53 1210 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1211 def gen_sig_code(idxs):
1212 def _genslice(start, end, step):
78caa52a 1213 starts = '' if start == 0 else str(start)
8bcc8756 1214 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1215 steps = '' if step == 1 else (':%d' % step)
78caa52a 1216 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1217
1218 step = None
7af808a5
PH
1219 # Quelch pyflakes warnings - start will be set when step is set
1220 start = '(Never used)'
edf3e38e
PH
1221 for i, prev in zip(idxs[1:], idxs[:-1]):
1222 if step is not None:
1223 if i - prev == step:
1224 continue
1225 yield _genslice(start, prev, step)
1226 step = None
1227 continue
1228 if i - prev in [-1, 1]:
1229 step = i - prev
1230 start = prev
1231 continue
1232 else:
78caa52a 1233 yield 's[%d]' % prev
edf3e38e 1234 if step is None:
78caa52a 1235 yield 's[%d]' % i
edf3e38e
PH
1236 else:
1237 yield _genslice(start, i, step)
1238
78caa52a 1239 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1240 cache_res = func(test_string)
edf3e38e 1241 cache_spec = [ord(c) for c in cache_res]
78caa52a 1242 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1243 signature_id_tuple = '(%s)' % (
1244 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1245 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1246 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1247 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1248
e0df6211
PH
1249 def _parse_sig_js(self, jscode):
1250 funcname = self._search_regex(
abefc03f
S
1251 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1252 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1253 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1254 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1255 # Obsolete patterns
1256 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1257 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1258 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1259 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1260 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1261 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1263 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1264 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1265
1266 jsi = JSInterpreter(jscode)
1267 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1268 return lambda s: initial_function([s])
1269
1270 def _parse_sig_swf(self, file_contents):
54256267 1271 swfi = SWFInterpreter(file_contents)
78caa52a 1272 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1273 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1274 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1275 return lambda s: initial_function([s])
1276
83799698 1277 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1278 """Turn the encrypted s field into a working signature"""
6b37f0be 1279
c8bf86d5 1280 if player_url is None:
69ea8ca4 1281 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1282
69ea8ca4 1283 if player_url.startswith('//'):
78caa52a 1284 player_url = 'https:' + player_url
3c90cc8b
S
1285 elif not re.match(r'https?://', player_url):
1286 player_url = compat_urlparse.urljoin(
1287 'https://www.youtube.com', player_url)
c8bf86d5 1288 try:
62af3a0e 1289 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1290 if player_id not in self._player_cache:
1291 func = self._extract_signature_function(
60064c53 1292 video_id, player_url, s
c8bf86d5
PH
1293 )
1294 self._player_cache[player_id] = func
1295 func = self._player_cache[player_id]
1296 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1297 self._print_sig_code(func, s)
c8bf86d5
PH
1298 return func(s)
1299 except Exception as e:
1300 tb = traceback.format_exc()
1301 raise ExtractorError(
78caa52a 1302 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1303
f96f5dda 1304 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1305 try:
60e47a26 1306 subs_doc = self._download_xml(
38c2e5b8 1307 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1308 video_id, note=False)
1309 except ExtractorError as err:
9b9c5355 1310 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1311 return {}
de7f3446
JMF
1312
1313 sub_lang_list = {}
60e47a26
JMF
1314 for track in subs_doc.findall('track'):
1315 lang = track.attrib['lang_code']
7e660ac1
LD
1316 if lang in sub_lang_list:
1317 continue
360e1ca5 1318 sub_formats = []
23d17e4b 1319 for ext in self._SUBTITLE_FORMATS:
15707c7e 1320 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1321 'lang': lang,
1322 'v': video_id,
1323 'fmt': ext,
1324 'name': track.attrib['name'].encode('utf-8'),
1325 })
1326 sub_formats.append({
1327 'url': 'https://www.youtube.com/api/timedtext?' + params,
1328 'ext': ext,
1329 })
1330 sub_lang_list[lang] = sub_formats
9f448fcb 1331 if has_live_chat_replay:
321bf820 1332 sub_lang_list['live_chat'] = [
1333 {
1334 'video_id': video_id,
1335 'ext': 'json',
1336 'protocol': 'youtube_live_chat_replay',
1337 },
9f448fcb 1338 ]
de7f3446 1339 if not sub_lang_list:
69ea8ca4 1340 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1341 return {}
1342 return sub_lang_list
1343
a72778d3
S
1344 def _get_ytplayer_config(self, video_id, webpage):
1345 patterns = (
526b3b07
S
1346 # User data may contain arbitrary character sequences that may affect
1347 # JSON extraction with regex, e.g. when '};' is contained the second
1348 # regex won't capture the whole JSON. Yet working around by trying more
1349 # concrete regex first keeping in mind proper quoted string handling
1350 # to be implemented in future that will replace this workaround (see
067aa17e
S
1351 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1352 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1353 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1354 r';ytplayer\.config\s*=\s*({.+?});',
1355 )
1356 config = self._search_regex(
1357 patterns, webpage, 'ytplayer.config', default=None)
1358 if config:
1359 return self._parse_json(
1360 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1361
29f7c58a 1362 def _get_automatic_captions(self, video_id, player_response, player_config):
de7f3446
JMF
1363 """We need the webpage for getting the captions url, pass it as an
1364 argument to speed up the process."""
69ea8ca4 1365 self.to_screen('%s: Looking for automatic captions' % video_id)
78caa52a 1366 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
29f7c58a 1367 if not (player_response or player_config):
de7f3446
JMF
1368 self._downloader.report_warning(err_msg)
1369 return {}
de7f3446 1370 try:
29f7c58a 1371 args = player_config.get('args') if player_config else {}
8bdd16b4 1372 caption_url = args.get('ttsurl')
1373 if caption_url:
b78b292f
S
1374 timestamp = args['timestamp']
1375 # We get the available subtitles
15707c7e 1376 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1377 'type': 'list',
1378 'tlangs': 1,
1379 'asrs': 1,
1380 })
1381 list_url = caption_url + '&' + list_params
1382 caption_list = self._download_xml(list_url, video_id)
1383 original_lang_node = caption_list.find('track')
1384 if original_lang_node is None:
1385 self._downloader.report_warning('Video doesn\'t have automatic captions')
1386 return {}
1387 original_lang = original_lang_node.attrib['lang_code']
1388 caption_kind = original_lang_node.attrib.get('kind', '')
1389
1390 sub_lang_list = {}
1391 for lang_node in caption_list.findall('target'):
1392 sub_lang = lang_node.attrib['lang_code']
1393 sub_formats = []
1394 for ext in self._SUBTITLE_FORMATS:
15707c7e 1395 params = compat_urllib_parse_urlencode({
b78b292f
S
1396 'lang': original_lang,
1397 'tlang': sub_lang,
1398 'fmt': ext,
1399 'ts': timestamp,
1400 'kind': caption_kind,
1401 })
1402 sub_formats.append({
1403 'url': caption_url + '&' + params,
1404 'ext': ext,
1405 })
1406 sub_lang_list[sub_lang] = sub_formats
1407 return sub_lang_list
1408
ddbb4c5c
S
1409 def make_captions(sub_url, sub_langs):
1410 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1411 caption_qs = compat_parse_qs(parsed_sub_url.query)
1412 captions = {}
1413 for sub_lang in sub_langs:
1414 sub_formats = []
1415 for ext in self._SUBTITLE_FORMATS:
1416 caption_qs.update({
1417 'tlang': [sub_lang],
1418 'fmt': [ext],
1419 })
1420 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1421 query=compat_urllib_parse_urlencode(caption_qs, True)))
1422 sub_formats.append({
1423 'url': sub_url,
1424 'ext': ext,
1425 })
1426 captions[sub_lang] = sub_formats
1427 return captions
1428
1429 # New captions format as of 22.06.2017
29f7c58a 1430 if player_response:
1431 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1432 base_url = renderer['captionTracks'][0]['baseUrl']
1433 sub_lang_list = []
1434 for lang in renderer['translationLanguages']:
1435 lang_code = lang.get('languageCode')
1436 if lang_code:
1437 sub_lang_list.append(lang_code)
1438 return make_captions(base_url, sub_lang_list)
59c5fa91 1439
8bdd16b4 1440 # Some videos don't provide ttsurl but rather caption_tracks and
1441 # caption_translation_languages (e.g. 20LmZk1hakA)
1442 # Does not used anymore as of 22.06.2017
1443 caption_tracks = args['caption_tracks']
1444 caption_translation_languages = args['caption_translation_languages']
1445 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1446 sub_lang_list = []
1447 for lang in caption_translation_languages.split(','):
1448 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1449 sub_lang = lang_qs.get('lc', [None])[0]
1450 if sub_lang:
1451 sub_lang_list.append(sub_lang)
1452 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1453 # An extractor error can be raise by the download process if there are
1454 # no automatic captions but there are subtitles
ddbb4c5c 1455 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1456 self._downloader.report_warning(err_msg)
1457 return {}
1458
21c340b8
S
1459 def _mark_watched(self, video_id, video_info, player_response):
1460 playback_url = url_or_none(try_get(
1461 player_response,
1462 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1463 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1464 if not playback_url:
1465 return
1466 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1467 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1468
1469 # cpn generation algorithm is reverse engineered from base.js.
1470 # In fact it works even with dummy cpn.
1471 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1472 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1473
1474 qs.update({
1475 'ver': ['2'],
1476 'cpn': [cpn],
1477 })
1478 playback_url = compat_urlparse.urlunparse(
15707c7e 1479 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1480
1481 self._download_webpage(
1482 playback_url, video_id, 'Marking watched',
1483 'Unable to mark watched', fatal=False)
1484
66c9fa36
S
1485 @staticmethod
1486 def _extract_urls(webpage):
1487 # Embedded YouTube player
1488 entries = [
1489 unescapeHTML(mobj.group('url'))
1490 for mobj in re.finditer(r'''(?x)
1491 (?:
1492 <iframe[^>]+?src=|
1493 data-video-url=|
1494 <embed[^>]+?src=|
1495 embedSWF\(?:\s*|
1496 <object[^>]+data=|
1497 new\s+SWFObject\(
1498 )
1499 (["\'])
1500 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1501 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1502 \1''', webpage)]
1503
1504 # lazyYT YouTube embed
1505 entries.extend(list(map(
1506 unescapeHTML,
1507 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1508
1509 # Wordpress "YouTube Video Importer" plugin
1510 matches = re.findall(r'''(?x)<div[^>]+
1511 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1512 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1513 entries.extend(m[-1] for m in matches)
1514
1515 return entries
1516
1517 @staticmethod
1518 def _extract_url(webpage):
1519 urls = YoutubeIE._extract_urls(webpage)
1520 return urls[0] if urls else None
1521
97665381
PH
1522 @classmethod
1523 def extract_id(cls, url):
1524 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1525 if mobj is None:
69ea8ca4 1526 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1527 video_id = mobj.group(2)
1528 return video_id
1529
84213ea8
S
1530 def _extract_chapters_from_json(self, webpage, video_id, duration):
1531 if not webpage:
1532 return
8bdd16b4 1533 data = self._extract_yt_initial_data(video_id, webpage)
1534 if not data or not isinstance(data, dict):
84213ea8
S
1535 return
1536 chapters_list = try_get(
8bdd16b4 1537 data,
84213ea8
S
1538 lambda x: x['playerOverlays']
1539 ['playerOverlayRenderer']
1540 ['decoratedPlayerBarRenderer']
1541 ['decoratedPlayerBarRenderer']
1542 ['playerBar']
1543 ['chapteredPlayerBarRenderer']
1544 ['chapters'],
1545 list)
1546 if not chapters_list:
1547 return
1548
1549 def chapter_time(chapter):
1550 return float_or_none(
1551 try_get(
1552 chapter,
1553 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1554 int),
1555 scale=1000)
1556 chapters = []
1557 for next_num, chapter in enumerate(chapters_list, start=1):
1558 start_time = chapter_time(chapter)
1559 if start_time is None:
1560 continue
1561 end_time = (chapter_time(chapters_list[next_num])
1562 if next_num < len(chapters_list) else duration)
1563 if end_time is None:
1564 continue
1565 title = try_get(
1566 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1567 compat_str)
1568 chapters.append({
1569 'start_time': start_time,
1570 'end_time': end_time,
1571 'title': title,
1572 })
1573 return chapters
1574
9cafc3fd 1575 @staticmethod
84213ea8 1576 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1577 if not description:
1578 return None
1579 chapter_lines = re.findall(
1580 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1581 description)
1582 if not chapter_lines:
1583 return None
1584 chapters = []
1585 for next_num, (chapter_line, time_point) in enumerate(
1586 chapter_lines, start=1):
1587 start_time = parse_duration(time_point)
1588 if start_time is None:
1589 continue
39d4c1be
S
1590 if start_time > duration:
1591 break
9cafc3fd
S
1592 end_time = (duration if next_num == len(chapter_lines)
1593 else parse_duration(chapter_lines[next_num][1]))
1594 if end_time is None:
1595 continue
39d4c1be
S
1596 if end_time > duration:
1597 end_time = duration
1598 if start_time > end_time:
1599 break
9cafc3fd
S
1600 chapter_title = re.sub(
1601 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1602 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1603 chapters.append({
1604 'start_time': start_time,
1605 'end_time': end_time,
1606 'title': chapter_title,
1607 })
1608 return chapters
1609
84213ea8
S
1610 def _extract_chapters(self, webpage, description, video_id, duration):
1611 return (self._extract_chapters_from_json(webpage, video_id, duration)
1612 or self._extract_chapters_from_description(description, duration))
1613
c5e8d7af 1614 def _real_extract(self, url):
cf7e015f
S
1615 url, smuggled_data = unsmuggle_url(url, {})
1616
7e8c0af0 1617 proto = (
78caa52a
PH
1618 'http' if self._downloader.params.get('prefer_insecure', False)
1619 else 'https')
7e8c0af0 1620
7c80519c 1621 start_time = None
297a564b 1622 end_time = None
7c80519c
JMF
1623 parsed_url = compat_urllib_parse_urlparse(url)
1624 for component in [parsed_url.fragment, parsed_url.query]:
1625 query = compat_parse_qs(component)
297a564b 1626 if start_time is None and 't' in query:
7c80519c 1627 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1628 if start_time is None and 'start' in query:
1629 start_time = parse_duration(query['start'][0])
297a564b
JMF
1630 if end_time is None and 'end' in query:
1631 end_time = parse_duration(query['end'][0])
7c80519c 1632
c5e8d7af
PH
1633 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1634 mobj = re.search(self._NEXT_URL_RE, url)
1635 if mobj:
7fd002c0 1636 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1637 video_id = self.extract_id(url)
c5e8d7af
PH
1638
1639 # Get video webpage
aa79ac0c 1640 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1641 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1642
1643 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1644 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1645
1646 # Attempt to extract SWF player URL
e0df6211 1647 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1648 if mobj is not None:
1649 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1650 else:
1651 player_url = None
1652
d8d24a92
S
1653 dash_mpds = []
1654
1655 def add_dash_mpd(video_info):
1656 dash_mpd = video_info.get('dashmpd')
1657 if dash_mpd and dash_mpd[0] not in dash_mpds:
1658 dash_mpds.append(dash_mpd[0])
1659
561b456e
S
1660 def add_dash_mpd_pr(pl_response):
1661 dash_mpd = url_or_none(try_get(
1662 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1663 compat_str))
1664 if dash_mpd and dash_mpd not in dash_mpds:
1665 dash_mpds.append(dash_mpd)
1666
c7121fa7
S
1667 is_live = None
1668 view_count = None
1669
1670 def extract_view_count(v_info):
1671 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1672
c2d125d9
S
1673 def extract_player_response(player_response, video_id):
1674 pl_response = str_or_none(player_response)
1675 if not pl_response:
1676 return
1677 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1678 if isinstance(pl_response, dict):
1679 add_dash_mpd_pr(pl_response)
1680 return pl_response
1681
fb2c9277
U
1682 def extract_embedded_config(embed_webpage, video_id):
1683 embedded_config = self._search_regex(
1684 r'setConfig\(({.*})\);',
1685 embed_webpage, 'ytInitialData', default=None)
1686 if embedded_config:
1687 return embedded_config
1688
62d80ba1 1689 video_info = {}
dbdaaa23 1690 player_response = {}
62d80ba1 1691 ytplayer_config = None
1692 embed_webpage = None
dbdaaa23 1693
c5e8d7af 1694 # Get video info
39e7107d
U
1695 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1696 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1697 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1698 age_gate = True
1699 # We simulate the access to the video from www.youtube.com/v/{video_id}
1700 # this can be viewed without login into Youtube
beb95e77
CL
1701 url = proto + '://www.youtube.com/embed/%s' % video_id
1702 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
fb2c9277
U
1703 ext = extract_embedded_config(embed_webpage, video_id)
1704 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1705 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1706 if not playable_in_embed:
1707 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1708 playable_in_embed = ''
1709 else:
1710 playable_in_embed = playable_in_embed.group('playableinEmbed')
1711 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1712 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1713 if playable_in_embed == 'false':
c73baf23
U
1714 '''
1715 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1716 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1717 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1718 '''
1719 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1720 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1721 age_gate = False
1722 # Try looking directly into the video webpage
1723 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1724 if ytplayer_config:
59c5fa91
PO
1725 args = ytplayer_config.get("args")
1726 if args is not None:
1727 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1728 # Convert to the same format returned by compat_parse_qs
1729 video_info = dict((k, [v]) for k, v in args.items())
1730 add_dash_mpd(video_info)
1731 # Rental video is not rented but preview is available (e.g.
1732 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1733 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1734 if not video_info and args.get('ypc_vid'):
1735 return self.url_result(
1736 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1737 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1738 is_live = True
1739 if not player_response:
1740 player_response = extract_player_response(args.get('player_response'), video_id)
1741 elif not player_response:
1742 player_response = ytplayer_config
4bb9c880
U
1743 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1744 add_dash_mpd_pr(player_response)
9d9314cb
U
1745 else:
1746 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1747 else:
1748 data = compat_urllib_parse_urlencode({
1749 'video_id': video_id,
1750 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1751 'sts': self._search_regex(
1752 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1753 })
1754 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1755 try:
1756 video_info_webpage = self._download_webpage(
1757 video_info_url, video_id,
1758 note='Refetching age-gated info webpage',
1759 errnote='unable to download video info webpage')
1760 except ExtractorError:
1761 video_info_webpage = None
1762 if video_info_webpage:
1763 video_info = compat_parse_qs(video_info_webpage)
1764 pl_response = video_info.get('player_response', [None])[0]
1765 player_response = extract_player_response(pl_response, video_id)
1766 add_dash_mpd(video_info)
1767 view_count = extract_view_count(video_info)
c108eb73
JMF
1768 else:
1769 age_gate = False
d8d24a92 1770 # Try looking directly into the video webpage
a72778d3 1771 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
8bdd16b4 1772 if ytplayer_config:
1773 args = ytplayer_config.get('args', {})
4c76aa06 1774 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1775 # Convert to the same format returned by compat_parse_qs
1776 video_info = dict((k, [v]) for k, v in args.items())
1777 add_dash_mpd(video_info)
6496ccb4
S
1778 # Rental video is not rented but preview is available (e.g.
1779 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1780 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1781 if not video_info and args.get('ypc_vid'):
1782 return self.url_result(
1783 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1784 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1785 is_live = True
dbdaaa23 1786 if not player_response:
c2d125d9 1787 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1788 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1789 add_dash_mpd_pr(player_response)
bbb7c3f7 1790
8bdd16b4 1791 if not video_info and not player_response:
1792 player_response = extract_player_response(
1793 self._search_regex(
29f7c58a 1794 (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1795 self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
8bdd16b4 1796 'initial player response', default='{}'),
1797 video_id)
1798
bbb7c3f7 1799 def extract_unavailable_message():
0add33ab
S
1800 messages = []
1801 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1802 msg = self._html_search_regex(
1803 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1804 video_webpage, 'unavailable %s' % kind, default=None)
1805 if msg:
1806 messages.append(msg)
1807 if messages:
1808 return '\n'.join(messages)
bbb7c3f7 1809
f93abcf1 1810 if not video_info and not player_response:
15be3eb5
RA
1811 unavailable_message = extract_unavailable_message()
1812 if not unavailable_message:
1813 unavailable_message = 'Unable to extract video data'
1814 raise ExtractorError(
1815 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1816
f93abcf1
S
1817 if not isinstance(video_info, dict):
1818 video_info = {}
1819
dbdaaa23
S
1820 video_details = try_get(
1821 player_response, lambda x: x['videoDetails'], dict) or {}
1822
37357d21
S
1823 microformat = try_get(
1824 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1825
8dbf751a
RA
1826 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1827 if not video_title:
cf7e015f
S
1828 self._downloader.report_warning('Unable to extract video title')
1829 video_title = '_'
1830
9cafc3fd 1831 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1832 if video_description:
fa4bc6e7
RA
1833
1834 def replace_url(m):
1835 redir_url = compat_urlparse.urljoin(url, m.group(1))
1836 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1837 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1838 qs = compat_parse_qs(parsed_redir_url.query)
1839 q = qs.get('q')
1840 if q and q[0]:
1841 return q[0]
1842 return redir_url
1843
9cafc3fd 1844 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1845 <a\s+
25cb7a0e 1846 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1847 (?:title|href)="([^"]+)"\s+
25cb7a0e 1848 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1849 class="[^"]*"[^>]*>
23f13e97 1850 [^<]+\.{3}\s*
cf7e015f 1851 </a>
fa4bc6e7 1852 ''', replace_url, video_description)
cf7e015f
S
1853 video_description = clean_html(video_description)
1854 else:
ea74e00b
DP
1855 video_description = video_details.get('shortDescription')
1856 if video_description is None:
1857 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 1858
8fe10494 1859 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1860 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1861 multifeed_metadata_list = try_get(
1862 player_response,
1863 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1864 compat_str) or try_get(
1865 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1866 if multifeed_metadata_list:
1867 entries = []
1868 feed_ids = []
1869 for feed in multifeed_metadata_list.split(','):
1870 # Unquote should take place before split on comma (,) since textual
1871 # fields may contain comma as well (see
067aa17e 1872 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1873 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1874
1875 def feed_entry(name):
1876 return try_get(feed_data, lambda x: x[name][0], compat_str)
1877
1878 feed_id = feed_entry('id')
1879 if not feed_id:
1880 continue
1881 feed_title = feed_entry('title')
1882 title = video_title
1883 if feed_title:
1884 title += ' (%s)' % feed_title
8fe10494
S
1885 entries.append({
1886 '_type': 'url_transparent',
1887 'ie_key': 'Youtube',
1888 'url': smuggle_url(
1889 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1890 {'force_singlefeed': True}),
6b09401b 1891 'title': title,
8fe10494 1892 })
6b09401b 1893 feed_ids.append(feed_id)
8fe10494
S
1894 self.to_screen(
1895 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1896 % (', '.join(feed_ids), video_id))
1897 return self.playlist_result(entries, video_id, video_title, video_description)
1898 else:
1899 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1900
c7121fa7 1901 if view_count is None:
1c9c8de2 1902 view_count = extract_view_count(video_info)
dbdaaa23
S
1903 if view_count is None and video_details:
1904 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
1905 if view_count is None and microformat:
1906 view_count = int_or_none(microformat.get('viewCount'))
1d699755 1907
27019dbb 1908 if is_live is None:
898238e9 1909 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1910
321bf820 1911 has_live_chat_replay = False
f0f76a33 1912 if not is_live:
321bf820 1913 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1914 try:
1915 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1916 has_live_chat_replay = True
f0f76a33 1917 except (KeyError, IndexError, TypeError):
321bf820 1918 pass
1919
c5e8d7af
PH
1920 # Check for "rental" videos
1921 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1922 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1923
c63ca0ee
S
1924 def _extract_filesize(media_url):
1925 return int_or_none(self._search_regex(
1926 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1927
bf1317d2
S
1928 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1929 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1930
c5e8d7af
PH
1931 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1932 self.report_rtmp_download()
dd27fd17
PH
1933 formats = [{
1934 'format_id': '_rtmp',
1935 'protocol': 'rtmp',
1936 'url': video_info['conn'][0],
1937 'player_url': player_url,
1938 }]
bf1317d2 1939 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1940 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1941 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1942 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1943 formats = []
3318832e 1944 formats_spec = {}
82156fdb 1945 fmt_list = video_info.get('fmt_list', [''])[0]
1946 if fmt_list:
1947 for fmt in fmt_list.split(','):
1948 spec = fmt.split('/')
3318832e 1949 if len(spec) > 1:
1950 width_height = spec[1].split('x')
1951 if len(width_height) == 2:
1952 formats_spec[spec[0]] = {
1953 'resolution': spec[1],
1954 'width': int_or_none(width_height[0]),
1955 'height': int_or_none(width_height[1]),
1956 }
bf1317d2
S
1957 for fmt in streaming_formats:
1958 itag = str_or_none(fmt.get('itag'))
1959 if not itag:
201e9eaa 1960 continue
bf1317d2
S
1961 quality = fmt.get('quality')
1962 quality_label = fmt.get('qualityLabel') or quality
1963 formats_spec[itag] = {
1964 'asr': int_or_none(fmt.get('audioSampleRate')),
1965 'filesize': int_or_none(fmt.get('contentLength')),
1966 'format_note': quality_label,
1967 'fps': int_or_none(fmt.get('fps')),
1968 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1969 # bitrate for itag 43 is always 2147483647
1970 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1971 'width': int_or_none(fmt.get('width')),
1972 }
1973
1974 for fmt in streaming_formats:
00eb865b 1975 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
1976 continue
1977 url = url_or_none(fmt.get('url'))
1978
1979 if not url:
fa3db383 1980 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
1981 if not cipher:
1982 continue
1983 url_data = compat_parse_qs(cipher)
1984 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1985 if not url:
1986 continue
1987 else:
1988 cipher = None
1989 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1990
2f483bc1
S
1991 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1992 # Unsupported FORMAT_STREAM_TYPE_OTF
1993 if stream_type == 3:
1994 continue
6449cd80 1995
bf1317d2
S
1996 format_id = fmt.get('itag') or url_data['itag'][0]
1997 if not format_id:
1998 continue
1999 format_id = compat_str(format_id)
a49eccdf 2000
bf1317d2
S
2001 if cipher:
2002 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
8bdd16b4 2003 ASSETS_RE = (
2004 r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2005 r'"jsUrl"\s*:\s*("[^"]+")',
2006 r'"assets":.+?"js":\s*("[^"]+")')
bf1317d2
S
2007 jsplayer_url_json = self._search_regex(
2008 ASSETS_RE,
2009 embed_webpage if age_gate else video_webpage,
2010 'JS player URL (1)', default=None)
2011 if not jsplayer_url_json and not age_gate:
2012 # We need the embed website after all
2013 if embed_webpage is None:
2014 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2015 embed_webpage = self._download_webpage(
2016 embed_url, video_id, 'Downloading embed webpage')
2017 jsplayer_url_json = self._search_regex(
2018 ASSETS_RE, embed_webpage, 'JS player URL')
2019
2020 player_url = json.loads(jsplayer_url_json)
cf010131 2021 if player_url is None:
bf1317d2
S
2022 player_url_json = self._search_regex(
2023 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2024 video_webpage, 'age gate player URL')
2025 player_url = json.loads(player_url_json)
2026
2027 if 'sig' in url_data:
2028 url += '&signature=' + url_data['sig'][0]
2029 elif 's' in url_data:
2030 encrypted_sig = url_data['s'][0]
2031
2032 if self._downloader.params.get('verbose'):
2033 if player_url is None:
bf1317d2 2034 player_desc = 'unknown'
cf010131 2035 else:
e40c758c
S
2036 player_type, player_version = self._extract_player_info(player_url)
2037 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2038 parts_sizes = self._signature_cache_id(encrypted_sig)
2039 self.to_screen('{%s} signature length %s, %s' %
2040 (format_id, parts_sizes, player_desc))
2041
2042 signature = self._decrypt_signature(
2043 encrypted_sig, video_id, player_url, age_gate)
2044 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2045 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2046 if 'ratebypass' not in url:
2047 url += '&ratebypass=yes'
c9afb51c 2048
94278f72
YCH
2049 dct = {
2050 'format_id': format_id,
2051 'url': url,
2052 'player_url': player_url,
2053 }
2054 if format_id in self._formats:
2055 dct.update(self._formats[format_id])
3318832e 2056 if format_id in formats_spec:
2057 dct.update(formats_spec[format_id])
94278f72 2058
aabc2be6 2059 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2060 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2061 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2062 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2063 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2064
bf1317d2
S
2065 if width is None:
2066 width = int_or_none(fmt.get('width'))
2067 if height is None:
2068 height = int_or_none(fmt.get('height'))
2069
c63ca0ee
S
2070 filesize = int_or_none(url_data.get(
2071 'clen', [None])[0]) or _extract_filesize(url)
2072
bf1317d2
S
2073 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2074 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2075
4878759f
S
2076 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2077 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2078 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2079
94278f72 2080 more_fields = {
c63ca0ee 2081 'filesize': filesize,
bf1317d2 2082 'tbr': tbr,
c9afb51c
AH
2083 'width': width,
2084 'height': height,
bf1317d2
S
2085 'fps': fps,
2086 'format_note': quality_label or quality,
c9afb51c 2087 }
94278f72
YCH
2088 for key, value in more_fields.items():
2089 if value:
2090 dct[key] = value
bf1317d2 2091 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2092 if type_:
2093 type_split = type_.split(';')
2094 kind_ext = type_split[0].split('/')
2095 if len(kind_ext) == 2:
94278f72
YCH
2096 kind, _ = kind_ext
2097 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2098 if kind in ('audio', 'video'):
2099 codecs = None
2100 for mobj in re.finditer(
2101 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2102 if mobj.group('key') == 'codecs':
2103 codecs = mobj.group('val')
2104 break
2105 if codecs:
6310acf5 2106 dct.update(parse_codecs(codecs))
e4a60912
S
2107 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2108 dct['downloader_options'] = {
2109 # Youtube throttles chunks >~10M
2110 'http_chunk_size': 10485760,
2111 }
aabc2be6 2112 formats.append(dct)
c5e8d7af 2113 else:
c3e54389
S
2114 manifest_url = (
2115 url_or_none(try_get(
2116 player_response,
2117 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2118 compat_str))
2119 or url_or_none(try_get(
c3e54389
S
2120 video_info, lambda x: x['hlsvp'][0], compat_str)))
2121 if manifest_url:
2122 formats = []
2123 m3u8_formats = self._extract_m3u8_formats(
2124 manifest_url, video_id, 'mp4', fatal=False)
2125 for a_format in m3u8_formats:
2126 itag = self._search_regex(
2127 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2128 if itag:
2129 a_format['format_id'] = itag
2130 if itag in self._formats:
2131 dct = self._formats[itag].copy()
2132 dct.update(a_format)
2133 a_format = dct
2134 a_format['player_url'] = player_url
2135 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2136 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2137 if self._downloader.params.get('youtube_include_hls_manifest', True):
2138 formats.append(a_format)
c3e54389 2139 else:
13577349 2140 error_message = extract_unavailable_message()
a0566bbf 2141 if not error_message:
2142 reason_list = try_get(
2143 player_response,
2144 lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2145 list) or []
2146 for reason in reason_list:
2147 if not isinstance(reason, dict):
2148 continue
2149 reason_text = try_get(reason, lambda x: x['text'], compat_str)
2150 if reason_text:
2151 if not error_message:
2152 error_message = ''
2153 error_message += reason_text
2154 if error_message:
2155 error_message = clean_html(error_message)
c3e54389 2156 if not error_message:
13577349
S
2157 error_message = clean_html(try_get(
2158 player_response, lambda x: x['playabilityStatus']['reason'],
2159 compat_str))
2160 if not error_message:
2161 error_message = clean_html(
2162 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2163 if error_message:
2164 raise ExtractorError(error_message, expected=True)
2165 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2166
7e72694b 2167 # uploader
dbdaaa23
S
2168 video_uploader = try_get(
2169 video_info, lambda x: x['author'][0],
2170 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2171 if video_uploader:
2172 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2173 else:
2174 self._downloader.report_warning('unable to extract uploader name')
2175
2176 # uploader_id
2177 video_uploader_id = None
2178 video_uploader_url = None
2179 mobj = re.search(
2180 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2181 video_webpage)
2182 if mobj is not None:
2183 video_uploader_id = mobj.group('uploader_id')
2184 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2185 else:
2186 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2187 if owner_profile_url:
2188 video_uploader_id = self._search_regex(
2189 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2190 default=None)
2191 video_uploader_url = owner_profile_url
7e72694b 2192
b45a9e69 2193 channel_id = (
3089bc74
S
2194 str_or_none(video_details.get('channelId'))
2195 or self._html_search_meta(
2196 'channelId', video_webpage, 'channel id', default=None)
2197 or self._search_regex(
b45a9e69 2198 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2199 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2200 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2201
b477fc13
S
2202 thumbnails = []
2203 thumbnails_list = try_get(
2204 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2205 for t in thumbnails_list:
2206 if not isinstance(t, dict):
2207 continue
2208 thumbnail_url = url_or_none(t.get('url'))
2209 if not thumbnail_url:
2210 continue
2211 thumbnails.append({
2212 'url': thumbnail_url,
2213 'width': int_or_none(t.get('width')),
2214 'height': int_or_none(t.get('height')),
2215 })
2216
2217 if not thumbnails:
7e72694b 2218 video_thumbnail = None
b477fc13
S
2219 # We try first to get a high quality image:
2220 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2221 video_webpage, re.DOTALL)
2222 if m_thumb is not None:
2223 video_thumbnail = m_thumb.group(1)
2224 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2225 if thumbnail_url:
2226 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2227 if video_thumbnail:
2228 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2229
2230 # upload date
2231 upload_date = self._html_search_meta(
2232 'datePublished', video_webpage, 'upload date', default=None)
2233 if not upload_date:
2234 upload_date = self._search_regex(
2235 [r'(?s)id="eow-date.*?>(.*?)</span>',
2236 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2237 video_webpage, 'upload date', default=None)
37357d21
S
2238 if not upload_date:
2239 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2240 upload_date = unified_strdate(upload_date)
2241
2242 video_license = self._html_search_regex(
2243 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2244 video_webpage, 'license', default=None)
2245
2246 m_music = re.search(
2247 r'''(?x)
2248 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2249 <ul[^>]*>\s*
2250 <li>(?P<title>.+?)
2251 by (?P<creator>.+?)
2252 (?:
2253 \(.+?\)|
2254 <a[^>]*
2255 (?:
2256 \bhref=["\']/red[^>]*>| # drop possible
2257 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2258 )
2259 .*?
2260 )?</li
2261 ''',
2262 video_webpage)
2263 if m_music:
2264 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2265 video_creator = clean_html(m_music.group('creator'))
2266 else:
2267 video_alt_title = video_creator = None
2268
2269 def extract_meta(field):
2270 return self._html_search_regex(
2271 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2272 video_webpage, field, default=None)
2273
2274 track = extract_meta('Song')
2275 artist = extract_meta('Artist')
92bc97d3 2276 album = extract_meta('Album')
822b9d9c
RA
2277
2278 # Youtube Music Auto-generated description
92bc97d3 2279 release_date = release_year = None
822b9d9c 2280 if video_description:
38d70284 2281 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c
RA
2282 if mobj:
2283 if not track:
2284 track = mobj.group('track').strip()
2285 if not artist:
2286 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2287 if not album:
2288 album = mobj.group('album'.strip())
822b9d9c
RA
2289 release_year = mobj.group('release_year')
2290 release_date = mobj.group('release_date')
2291 if release_date:
2292 release_date = release_date.replace('-', '')
2293 if not release_year:
2294 release_year = int(release_date[:4])
2295 if release_year:
2296 release_year = int(release_year)
7e72694b 2297
38d70284 2298 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2299 contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2300 for content in contents:
2301 rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2302 multiple_songs = False
2303 for row in rows:
2304 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2305 multiple_songs = True
2306 break
2307 for row in rows:
2308 mrr = row.get('metadataRowRenderer') or {}
2309 mrr_title = try_get(
2310 mrr, lambda x: x['title']['simpleText'], compat_str)
2311 mrr_contents = try_get(
2312 mrr, lambda x: x['contents'][0], dict) or {}
2313 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2314 if not (mrr_title and mrr_contents_text):
2315 continue
2316 if mrr_title == 'License':
2317 video_license = mrr_contents_text
2318 elif not multiple_songs:
2319 if mrr_title == 'Album':
2320 album = mrr_contents_text
2321 elif mrr_title == 'Artist':
2322 artist = mrr_contents_text
2323 elif mrr_title == 'Song':
2324 track = mrr_contents_text
9322f116 2325
7e72694b
S
2326 m_episode = re.search(
2327 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2328 video_webpage)
2329 if m_episode:
c2dd2dc0 2330 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2331 season_number = int(m_episode.group('season'))
2332 episode_number = int(m_episode.group('episode'))
2333 else:
2334 series = season_number = episode_number = None
2335
2336 m_cat_container = self._search_regex(
2337 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2338 video_webpage, 'categories', default=None)
dbeafce5 2339 category = None
7e72694b
S
2340 if m_cat_container:
2341 category = self._html_search_regex(
2342 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2343 default=None)
dbeafce5
S
2344 if not category:
2345 category = try_get(
2346 microformat, lambda x: x['category'], compat_str)
2347 video_categories = None if category is None else [category]
7e72694b
S
2348
2349 video_tags = [
2350 unescapeHTML(m.group('content'))
2351 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2352 if not video_tags:
2353 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2354
2355 def _extract_count(count_name):
2356 return str_to_int(self._search_regex(
a0566bbf 2357 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2358 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
7e72694b
S
2359 video_webpage, count_name, default=None))
2360
2361 like_count = _extract_count('like')
2362 dislike_count = _extract_count('dislike')
2363
dbdaaa23
S
2364 if view_count is None:
2365 view_count = str_to_int(self._search_regex(
2366 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2367 'view count', default=None))
2368
bf3c9326
S
2369 average_rating = (
2370 float_or_none(video_details.get('averageRating'))
2371 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2372
7e72694b 2373 # subtitles
321bf820 2374 video_subtitles = self.extract_subtitles(
2375 video_id, video_webpage, has_live_chat_replay)
29f7c58a 2376 automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
7e72694b
S
2377
2378 video_duration = try_get(
2379 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2380 if not video_duration:
2381 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2382 if not video_duration:
2383 video_duration = parse_duration(self._html_search_meta(
2384 'duration', video_webpage, 'video duration'))
2385
b84071c0
JP
2386 # Get Subscriber Count of channel
2387 subscriber_count = parse_count(self._search_regex(
2388 r'"text":"([\d\.]+\w?) subscribers"',
2389 video_webpage,
2390 'subscriber count',
2391 default=None
2392 ))
2393
7e72694b
S
2394 # annotations
2395 video_annotations = None
2396 if self._downloader.params.get('writeannotations', False):
29f7c58a 2397 xsrf_token = None
2398 ytcfg = self._extract_ytcfg(video_id, video_webpage)
2399 if ytcfg:
2400 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2401 if not xsrf_token:
2402 xsrf_token = self._search_regex(
2403 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2404 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
64b6a4e9
RA
2405 invideo_url = try_get(
2406 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2407 if xsrf_token and invideo_url:
29f7c58a 2408 xsrf_field_name = None
2409 if ytcfg:
2410 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2411 if not xsrf_field_name:
2412 xsrf_field_name = self._search_regex(
2413 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2414 video_webpage, 'xsrf field name',
2415 group='xsrf_field_name', default='session_token')
64b6a4e9
RA
2416 video_annotations = self._download_webpage(
2417 self._proto_relative_url(invideo_url),
2418 video_id, note='Downloading annotations',
2419 errnote='Unable to download video annotations', fatal=False,
2420 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2421
84213ea8 2422 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2423
dd27fd17 2424 # Look for the DASH manifest
203fb43f 2425 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2426 dash_mpd_fatal = True
8ff648e4 2427 for mpd_url in dash_mpds:
d8d24a92 2428 dash_formats = {}
774e208f 2429 try:
05d0d131
YCH
2430 def decrypt_sig(mobj):
2431 s = mobj.group(1)
2432 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2433 return '/signature/%s' % dec_s
2434
8ff648e4 2435 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2436
8ff648e4 2437 for df in self._extract_mpd_formats(
2438 mpd_url, video_id, fatal=dash_mpd_fatal,
2439 formats_dict=self._formats):
c63ca0ee
S
2440 if not df.get('filesize'):
2441 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2442 # Do not overwrite DASH format found in some previous DASH manifest
2443 if df['format_id'] not in dash_formats:
2444 dash_formats[df['format_id']] = df
77c6fb5b
S
2445 # Additional DASH manifests may end up in HTTP Error 403 therefore
2446 # allow them to fail without bug report message if we already have
2447 # some DASH manifest succeeded. This is temporary workaround to reduce
2448 # burst of bug reports until we figure out the reason and whether it
2449 # can be fixed at all.
2450 dash_mpd_fatal = False
774e208f
PH
2451 except (ExtractorError, KeyError) as e:
2452 self.report_warning(
2453 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2454 if dash_formats:
04b3b3df
JMF
2455 # Remove the formats we found through non-DASH, they
2456 # contain less info and it can be wrong, because we use
2457 # fixed values (for example the resolution). See
067aa17e 2458 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2459 # example.
d80265cc 2460 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2461 formats.extend(dash_formats.values())
d80044c2 2462
6271f1ca
PH
2463 # Check for malformed aspect ratio
2464 stretched_m = re.search(
2465 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2466 video_webpage)
2467 if stretched_m:
313dfc45
LL
2468 w = float(stretched_m.group('w'))
2469 h = float(stretched_m.group('h'))
5faf9fed
S
2470 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2471 # We will only process correct ratios.
313dfc45 2472 if w > 0 and h > 0:
41f24c32 2473 ratio = w / h
313dfc45
LL
2474 for f in formats:
2475 if f.get('vcodec') != 'none':
2476 f['stretched_ratio'] = ratio
6271f1ca 2477
026fbedc 2478 if not formats:
43ebf77d
S
2479 if 'reason' in video_info:
2480 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2481 regions_allowed = self._html_search_meta(
2482 'regionsAllowed', video_webpage, default=None)
2483 countries = regions_allowed.split(',') if regions_allowed else None
2484 self.raise_geo_restricted(
2485 msg=video_info['reason'][0], countries=countries)
2486 reason = video_info['reason'][0]
2487 if 'Invalid parameters' in reason:
2488 unavailable_message = extract_unavailable_message()
2489 if unavailable_message:
2490 reason = unavailable_message
2491 raise ExtractorError(
2492 'YouTube said: %s' % reason,
2493 expected=True, video_id=video_id)
2494 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2495 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2496
4bcc7bd1 2497 self._sort_formats(formats)
4ea3be0a 2498
21c340b8 2499 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2500
4ea3be0a 2501 return {
8bcc8756
JW
2502 'id': video_id,
2503 'uploader': video_uploader,
2504 'uploader_id': video_uploader_id,
fd050249 2505 'uploader_url': video_uploader_url,
dd4c4492
S
2506 'channel_id': channel_id,
2507 'channel_url': channel_url,
8bcc8756 2508 'upload_date': upload_date,
7caf9830 2509 'license': video_license,
936784b2 2510 'creator': video_creator or artist,
8bcc8756 2511 'title': video_title,
936784b2 2512 'alt_title': video_alt_title or track,
b477fc13 2513 'thumbnails': thumbnails,
8bcc8756
JW
2514 'description': video_description,
2515 'categories': video_categories,
000b6b5a 2516 'tags': video_tags,
8bcc8756 2517 'subtitles': video_subtitles,
360e1ca5 2518 'automatic_captions': automatic_captions,
8bcc8756
JW
2519 'duration': video_duration,
2520 'age_limit': 18 if age_gate else 0,
2521 'annotations': video_annotations,
9cafc3fd 2522 'chapters': chapters,
7e8c0af0 2523 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2524 'view_count': view_count,
4ea3be0a 2525 'like_count': like_count,
2526 'dislike_count': dislike_count,
bf3c9326 2527 'average_rating': average_rating,
8bcc8756 2528 'formats': formats,
2fe1ff85 2529 'is_live': is_live,
7c80519c 2530 'start_time': start_time,
297a564b 2531 'end_time': end_time,
12afdc2a
S
2532 'series': series,
2533 'season_number': season_number,
2534 'episode_number': episode_number,
936784b2
S
2535 'track': track,
2536 'artist': artist,
5caabd3c 2537 'album': album,
2538 'release_date': release_date,
2539 'release_year': release_year,
b84071c0 2540 'subscriber_count': subscriber_count,
4ea3be0a 2541 }
c5e8d7af 2542
5f6a1245 2543
8bdd16b4 2544class YoutubeTabIE(YoutubeBaseInfoExtractor):
2545 IE_DESC = 'YouTube.com tab'
70d5c17b 2546 _VALID_URL = r'''(?x)
2547 https?://
2548 (?:\w+\.)?
2549 (?:
2550 youtube(?:kids)?\.com|
2551 invidio\.us
2552 )/
2553 (?:
2554 (?:channel|c|user)/|
2555 (?P<not_channel>
3d3dddc9 2556 feed/|
70d5c17b 2557 (?:playlist|watch)\?.*?\blist=
2558 )|
29f7c58a 2559 (?!(?:%s)\b) # Direct URLs
70d5c17b 2560 )
2561 (?P<id>[^/?\#&]+)
2562 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2563 IE_NAME = 'youtube:tab'
2564
81127aa5 2565 _TESTS = [{
8bdd16b4 2566 # playlists, multipage
2567 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2568 'playlist_mincount': 94,
2569 'info_dict': {
2570 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2571 'title': 'Игорь Клейнер - Playlists',
2572 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2573 },
2574 }, {
2575 # playlists, multipage, different order
2576 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2577 'playlist_mincount': 94,
2578 'info_dict': {
2579 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2580 'title': 'Игорь Клейнер - Playlists',
2581 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2582 },
2583 }, {
2584 # playlists, singlepage
2585 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2586 'playlist_mincount': 4,
2587 'info_dict': {
2588 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2589 'title': 'ThirstForScience - Playlists',
2590 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2591 }
2592 }, {
2593 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2594 'only_matching': True,
2595 }, {
2596 # basic, single video playlist
0e30a7b9 2597 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2598 'info_dict': {
0e30a7b9 2599 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2600 'uploader': 'Sergey M.',
2601 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2602 'title': 'youtube-dl public playlist',
81127aa5 2603 },
0e30a7b9 2604 'playlist_count': 1,
9291475f 2605 }, {
8bdd16b4 2606 # empty playlist
0e30a7b9 2607 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2608 'info_dict': {
0e30a7b9 2609 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2610 'uploader': 'Sergey M.',
2611 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2612 'title': 'youtube-dl empty playlist',
9291475f
PH
2613 },
2614 'playlist_count': 0,
2615 }, {
8bdd16b4 2616 # Home tab
2617 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2618 'info_dict': {
8bdd16b4 2619 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2620 'title': 'lex will - Home',
2621 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2622 },
8bdd16b4 2623 'playlist_mincount': 2,
9291475f 2624 }, {
8bdd16b4 2625 # Videos tab
2626 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2627 'info_dict': {
8bdd16b4 2628 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2629 'title': 'lex will - Videos',
2630 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2631 },
8bdd16b4 2632 'playlist_mincount': 975,
9291475f 2633 }, {
8bdd16b4 2634 # Videos tab, sorted by popular
2635 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2636 'info_dict': {
8bdd16b4 2637 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2638 'title': 'lex will - Videos',
2639 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2640 },
8bdd16b4 2641 'playlist_mincount': 199,
9291475f 2642 }, {
8bdd16b4 2643 # Playlists tab
2644 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2645 'info_dict': {
8bdd16b4 2646 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2647 'title': 'lex will - Playlists',
2648 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2649 },
8bdd16b4 2650 'playlist_mincount': 17,
ac7553d0 2651 }, {
8bdd16b4 2652 # Community tab
2653 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2654 'info_dict': {
8bdd16b4 2655 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2656 'title': 'lex will - Community',
2657 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2658 },
2659 'playlist_mincount': 18,
87dadd45 2660 }, {
8bdd16b4 2661 # Channels tab
2662 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2663 'info_dict': {
8bdd16b4 2664 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2665 'title': 'lex will - Channels',
2666 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2667 },
2668 'playlist_mincount': 138,
6b08cdf6 2669 }, {
a0566bbf 2670 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2671 'only_matching': True,
2672 }, {
a0566bbf 2673 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2674 'only_matching': True,
2675 }, {
a0566bbf 2676 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2677 'only_matching': True,
2678 }, {
2679 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2680 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2681 'info_dict': {
2682 'title': '29C3: Not my department',
2683 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2684 'uploader': 'Christiaan008',
2685 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2686 },
2687 'playlist_count': 96,
2688 }, {
2689 'note': 'Large playlist',
2690 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2691 'info_dict': {
8bdd16b4 2692 'title': 'Uploads from Cauchemar',
2693 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2694 'uploader': 'Cauchemar',
2695 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2696 },
8bdd16b4 2697 'playlist_mincount': 1123,
2698 }, {
2699 # even larger playlist, 8832 videos
2700 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2701 'only_matching': True,
4b7df0d3
JMF
2702 }, {
2703 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2704 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2705 'info_dict': {
acf757f4
PH
2706 'title': 'Uploads from Interstellar Movie',
2707 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2708 'uploader': 'Interstellar Movie',
8bdd16b4 2709 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2710 },
481cc733 2711 'playlist_mincount': 21,
8bdd16b4 2712 }, {
2713 # https://github.com/ytdl-org/youtube-dl/issues/21844
2714 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2715 'info_dict': {
2716 'title': 'Data Analysis with Dr Mike Pound',
2717 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2718 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2719 'uploader': 'Computerphile',
2720 },
2721 'playlist_mincount': 11,
2722 }, {
a0566bbf 2723 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2724 'only_matching': True,
dacb3a86
S
2725 }, {
2726 # Playlist URL that does not actually serve a playlist
2727 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2728 'info_dict': {
2729 'id': 'FqZTN594JQw',
2730 'ext': 'webm',
2731 'title': "Smiley's People 01 detective, Adventure Series, Action",
2732 'uploader': 'STREEM',
2733 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2734 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2735 'upload_date': '20150526',
2736 'license': 'Standard YouTube License',
2737 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2738 'categories': ['People & Blogs'],
2739 'tags': list,
dbdaaa23 2740 'view_count': int,
dacb3a86
S
2741 'like_count': int,
2742 'dislike_count': int,
2743 },
2744 'params': {
2745 'skip_download': True,
2746 },
13a75688 2747 'skip': 'This video is not available.',
dacb3a86 2748 'add_ie': [YoutubeIE.ie_key()],
481cc733 2749 }, {
8bdd16b4 2750 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2751 'only_matching': True,
66b48727 2752 }, {
8bdd16b4 2753 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2754 'only_matching': True,
a0566bbf 2755 }, {
2756 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2757 'info_dict': {
2758 'id': '9Auq9mYxFEE',
2759 'ext': 'mp4',
2760 'title': 'Watch Sky News live',
2761 'uploader': 'Sky News',
2762 'uploader_id': 'skynews',
2763 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2764 'upload_date': '20191102',
2765 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2766 'categories': ['News & Politics'],
2767 'tags': list,
2768 'like_count': int,
2769 'dislike_count': int,
2770 },
2771 'params': {
2772 'skip_download': True,
2773 },
2774 }, {
2775 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2776 'info_dict': {
2777 'id': 'a48o2S1cPoo',
2778 'ext': 'mp4',
2779 'title': 'The Young Turks - Live Main Show',
2780 'uploader': 'The Young Turks',
2781 'uploader_id': 'TheYoungTurks',
2782 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2783 'upload_date': '20150715',
2784 'license': 'Standard YouTube License',
2785 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2786 'categories': ['News & Politics'],
2787 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2788 'like_count': int,
2789 'dislike_count': int,
2790 },
2791 'params': {
2792 'skip_download': True,
2793 },
2794 'only_matching': True,
2795 }, {
2796 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2797 'only_matching': True,
2798 }, {
2799 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2800 'only_matching': True,
3d3dddc9 2801 }, {
2802 'url': 'https://www.youtube.com/feed/trending',
2803 'only_matching': True,
2804 }, {
2805 # needs auth
2806 'url': 'https://www.youtube.com/feed/library',
2807 'only_matching': True,
2808 }, {
2809 # needs auth
2810 'url': 'https://www.youtube.com/feed/history',
2811 'only_matching': True,
2812 }, {
2813 # needs auth
2814 'url': 'https://www.youtube.com/feed/subscriptions',
2815 'only_matching': True,
2816 }, {
2817 # needs auth
2818 'url': 'https://www.youtube.com/feed/watch_later',
2819 'only_matching': True,
2820 }, {
2821 # no longer available?
2822 'url': 'https://www.youtube.com/feed/recommended',
2823 'only_matching': True,
29f7c58a 2824 }, {
2825 # inline playlist with not always working continuations
2826 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2827 'only_matching': True,
2828 }, {
2829 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2830 'only_matching': True,
2831 }, {
2832 'url': 'https://www.youtube.com/course',
2833 'only_matching': True,
2834 }, {
2835 'url': 'https://www.youtube.com/zsecurity',
2836 'only_matching': True,
2837 }, {
2838 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2839 'only_matching': True,
2840 }, {
2841 'url': 'https://www.youtube.com/TheYoungTurks/live',
2842 'only_matching': True,
2843 }]
2844
2845 @classmethod
2846 def suitable(cls, url):
2847 return False if YoutubeIE.suitable(url) else super(
2848 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2849
2850 def _extract_channel_id(self, webpage):
2851 channel_id = self._html_search_meta(
2852 'channelId', webpage, 'channel id', default=None)
2853 if channel_id:
2854 return channel_id
2855 channel_url = self._html_search_meta(
2856 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2857 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2858 'twitter:app:url:googleplay'), webpage, 'channel url')
2859 return self._search_regex(
2860 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2861 channel_url, 'channel id')
15f6397c 2862
8bdd16b4 2863 @staticmethod
2864 def _extract_grid_item_renderer(item):
2865 for item_kind in ('Playlist', 'Video', 'Channel'):
2866 renderer = item.get('grid%sRenderer' % item_kind)
2867 if renderer:
2868 return renderer
2869
2870 def _extract_video(self, renderer):
2871 video_id = renderer.get('videoId')
2872 title = try_get(
2873 renderer,
2874 (lambda x: x['title']['runs'][0]['text'],
2875 lambda x: x['title']['simpleText']), compat_str)
2876 description = try_get(
2877 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2878 compat_str)
2879 duration = parse_duration(try_get(
2880 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2881 view_count_text = try_get(
2882 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2883 view_count = str_to_int(self._search_regex(
2884 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2885 'view count', default=None))
2886 uploader = try_get(
2887 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2888 return {
2889 '_type': 'url_transparent',
2890 'ie_key': YoutubeIE.ie_key(),
2891 'id': video_id,
2892 'url': video_id,
2893 'title': title,
2894 'description': description,
2895 'duration': duration,
2896 'view_count': view_count,
2897 'uploader': uploader,
2898 }
652cdaa2 2899
8bdd16b4 2900 def _grid_entries(self, grid_renderer):
2901 for item in grid_renderer['items']:
2902 if not isinstance(item, dict):
39b62db1 2903 continue
8bdd16b4 2904 renderer = self._extract_grid_item_renderer(item)
2905 if not isinstance(renderer, dict):
2906 continue
2907 title = try_get(
2908 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2909 # playlist
2910 playlist_id = renderer.get('playlistId')
2911 if playlist_id:
2912 yield self.url_result(
2913 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2914 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2915 video_title=title)
2916 # video
2917 video_id = renderer.get('videoId')
2918 if video_id:
2919 yield self._extract_video(renderer)
2920 # channel
2921 channel_id = renderer.get('channelId')
2922 if channel_id:
2923 title = try_get(
2924 renderer, lambda x: x['title']['simpleText'], compat_str)
2925 yield self.url_result(
2926 'https://www.youtube.com/channel/%s' % channel_id,
2927 ie=YoutubeTabIE.ie_key(), video_title=title)
2928
3d3dddc9 2929 def _shelf_entries_from_content(self, shelf_renderer):
2930 content = shelf_renderer.get('content')
2931 if not isinstance(content, dict):
8bdd16b4 2932 return
3d3dddc9 2933 renderer = content.get('gridRenderer')
2934 if renderer:
2935 # TODO: add support for nested playlists so each shelf is processed
2936 # as separate playlist
2937 # TODO: this includes only first N items
2938 for entry in self._grid_entries(renderer):
2939 yield entry
2940 renderer = content.get('horizontalListRenderer')
2941 if renderer:
2942 # TODO
2943 pass
8bdd16b4 2944
29f7c58a 2945 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2946 ep = try_get(
2947 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2948 compat_str)
2949 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2950 if shelf_url:
29f7c58a 2951 # Skipping links to another channels, note that checking for
2952 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2953 # will not work
2954 if skip_channels and '/channels?' in shelf_url:
2955 return
3d3dddc9 2956 title = try_get(
2957 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2958 yield self.url_result(shelf_url, video_title=title)
2959 # Shelf may not contain shelf URL, fallback to extraction from content
2960 for entry in self._shelf_entries_from_content(shelf_renderer):
2961 yield entry
c5e8d7af 2962
8bdd16b4 2963 def _playlist_entries(self, video_list_renderer):
2964 for content in video_list_renderer['contents']:
2965 if not isinstance(content, dict):
2966 continue
2967 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2968 if not isinstance(renderer, dict):
2969 continue
2970 video_id = renderer.get('videoId')
2971 if not video_id:
2972 continue
2973 yield self._extract_video(renderer)
07aeced6 2974
3d3dddc9 2975 r""" # Not needed in the new implementation
3462ffa8 2976 def _itemSection_entries(self, item_sect_renderer):
2977 for content in item_sect_renderer['contents']:
2978 if not isinstance(content, dict):
2979 continue
2980 renderer = content.get('videoRenderer', {})
2981 if not isinstance(renderer, dict):
2982 continue
2983 video_id = renderer.get('videoId')
2984 if not video_id:
2985 continue
2986 yield self._extract_video(renderer)
3d3dddc9 2987 """
3462ffa8 2988
2989 def _rich_entries(self, rich_grid_renderer):
2990 renderer = try_get(
70d5c17b 2991 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2992 video_id = renderer.get('videoId')
2993 if not video_id:
2994 return
2995 yield self._extract_video(renderer)
2996
8bdd16b4 2997 def _video_entry(self, video_renderer):
2998 video_id = video_renderer.get('videoId')
2999 if video_id:
3000 return self._extract_video(video_renderer)
dacb3a86 3001
8bdd16b4 3002 def _post_thread_entries(self, post_thread_renderer):
3003 post_renderer = try_get(
3004 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3005 if not post_renderer:
3006 return
3007 # video attachment
3008 video_renderer = try_get(
3009 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3010 video_id = None
3011 if video_renderer:
3012 entry = self._video_entry(video_renderer)
3013 if entry:
3014 yield entry
3015 # inline video links
3016 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3017 for run in runs:
3018 if not isinstance(run, dict):
3019 continue
3020 ep_url = try_get(
3021 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3022 if not ep_url:
3023 continue
3024 if not YoutubeIE.suitable(ep_url):
3025 continue
3026 ep_video_id = YoutubeIE._match_id(ep_url)
3027 if video_id == ep_video_id:
3028 continue
3029 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 3030
8bdd16b4 3031 def _post_thread_continuation_entries(self, post_thread_continuation):
3032 contents = post_thread_continuation.get('contents')
3033 if not isinstance(contents, list):
3034 return
3035 for content in contents:
3036 renderer = content.get('backstagePostThreadRenderer')
3037 if not isinstance(renderer, dict):
3038 continue
3039 for entry in self._post_thread_entries(renderer):
3040 yield entry
07aeced6 3041
29f7c58a 3042 @staticmethod
3043 def _build_continuation_query(continuation, ctp=None):
3044 query = {
3045 'ctoken': continuation,
3046 'continuation': continuation,
3047 }
3048 if ctp:
3049 query['itct'] = ctp
3050 return query
3051
8bdd16b4 3052 @staticmethod
3053 def _extract_next_continuation_data(renderer):
3054 next_continuation = try_get(
3055 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3056 if not next_continuation:
3057 return
3058 continuation = next_continuation.get('continuation')
3059 if not continuation:
3060 return
3061 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3062 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3063
8bdd16b4 3064 @classmethod
3065 def _extract_continuation(cls, renderer):
3066 next_continuation = cls._extract_next_continuation_data(renderer)
3067 if next_continuation:
3068 return next_continuation
3069 contents = renderer.get('contents')
3070 if not isinstance(contents, list):
3071 return
3072 for content in contents:
3073 if not isinstance(content, dict):
3074 continue
3075 continuation_ep = try_get(
3076 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3077 dict)
3078 if not continuation_ep:
3079 continue
3080 continuation = try_get(
3081 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3082 if not continuation:
3083 continue
3084 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3085 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3086
8bdd16b4 3087 def _entries(self, tab, identity_token):
3462ffa8 3088
70d5c17b 3089 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3090 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3091 for content in contents:
3092 if not isinstance(content, dict):
8bdd16b4 3093 continue
70d5c17b 3094 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3095 if not is_renderer:
70d5c17b 3096 renderer = content.get('richItemRenderer')
3462ffa8 3097 if renderer:
3098 for entry in self._rich_entries(renderer):
3099 yield entry
3100 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3101 continue
3462ffa8 3102 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3103 for isr_content in isr_contents:
3104 if not isinstance(isr_content, dict):
3105 continue
3106 renderer = isr_content.get('playlistVideoListRenderer')
3107 if renderer:
3108 for entry in self._playlist_entries(renderer):
3109 yield entry
3110 continuation_list[0] = self._extract_continuation(renderer)
3111 continue
3112 renderer = isr_content.get('gridRenderer')
3113 if renderer:
3114 for entry in self._grid_entries(renderer):
3115 yield entry
3116 continuation_list[0] = self._extract_continuation(renderer)
3117 continue
3118 renderer = isr_content.get('shelfRenderer')
3119 if renderer:
29f7c58a 3120 is_channels_tab = tab.get('title') == 'Channels'
3121 for entry in self._shelf_entries(renderer, not is_channels_tab):
3462ffa8 3122 yield entry
3462ffa8 3123 continue
3124 renderer = isr_content.get('backstagePostThreadRenderer')
3125 if renderer:
3126 for entry in self._post_thread_entries(renderer):
3127 yield entry
3128 continuation_list[0] = self._extract_continuation(renderer)
3129 continue
3130 renderer = isr_content.get('videoRenderer')
3131 if renderer:
3132 entry = self._video_entry(renderer)
3133 if entry:
3134 yield entry
70d5c17b 3135
3462ffa8 3136 if not continuation_list[0]:
3137 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3138
3139 if not continuation_list[0]:
3140 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3141
3142 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3143 tab_content = try_get(tab, lambda x: x['content'], dict)
3144 if not tab_content:
3145 return
3462ffa8 3146 parent_renderer = (
29f7c58a 3147 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3148 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3149 for entry in extract_entries(parent_renderer):
3150 yield entry
3462ffa8 3151 continuation = continuation_list[0]
8bdd16b4 3152
3153 headers = {
3154 'x-youtube-client-name': '1',
3155 'x-youtube-client-version': '2.20201112.04.01',
3156 }
3157 if identity_token:
3158 headers['x-youtube-identity-token'] = identity_token
ebf1b291 3159
8bdd16b4 3160 for page_num in itertools.count(1):
3161 if not continuation:
3162 break
29f7c58a 3163 count = 0
3164 retries = 3
3165 while count <= retries:
3166 try:
3167 # Downloading page may result in intermittent 5xx HTTP error
3168 # that is usually worked around with a retry
3169 browse = self._download_json(
3170 'https://www.youtube.com/browse_ajax', None,
3171 'Downloading page %d%s'
3172 % (page_num, ' (retry #%d)' % count if count else ''),
3173 headers=headers, query=continuation)
3174 break
3175 except ExtractorError as e:
3176 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3177 count += 1
3178 if count <= retries:
3179 continue
3180 raise
8bdd16b4 3181 if not browse:
3182 break
3183 response = try_get(browse, lambda x: x[1]['response'], dict)
3184 if not response:
3185 break
ebf1b291 3186
8bdd16b4 3187 continuation_contents = try_get(
3188 response, lambda x: x['continuationContents'], dict)
3189 if continuation_contents:
3190 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3191 if continuation_renderer:
3192 for entry in self._playlist_entries(continuation_renderer):
3193 yield entry
3194 continuation = self._extract_continuation(continuation_renderer)
3195 continue
3196 continuation_renderer = continuation_contents.get('gridContinuation')
3197 if continuation_renderer:
3198 for entry in self._grid_entries(continuation_renderer):
3199 yield entry
3200 continuation = self._extract_continuation(continuation_renderer)
3201 continue
3202 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3203 if continuation_renderer:
3204 for entry in self._post_thread_continuation_entries(continuation_renderer):
3205 yield entry
3206 continuation = self._extract_continuation(continuation_renderer)
3207 continue
70d5c17b 3208 continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
3462ffa8 3209 if continuation_renderer:
3210 continuation_list = [None]
3211 for entry in extract_entries(continuation_renderer):
3212 yield entry
3213 continuation = continuation_list[0]
3214 continue
c5e8d7af 3215
8bdd16b4 3216 continuation_items = try_get(
3217 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3218 if continuation_items:
3219 continuation_item = continuation_items[0]
3220 if not isinstance(continuation_item, dict):
3221 continue
70d5c17b 3222 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
8bdd16b4 3223 if renderer:
3224 video_list_renderer = {'contents': continuation_items}
3225 for entry in self._playlist_entries(video_list_renderer):
3226 yield entry
3227 continuation = self._extract_continuation(video_list_renderer)
3228 continue
8bdd16b4 3229 break
9558dcec 3230
8bdd16b4 3231 @staticmethod
3232 def _extract_selected_tab(tabs):
3233 for tab in tabs:
3234 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3235 return tab['tabRenderer']
2b3c2546 3236 else:
8bdd16b4 3237 raise ExtractorError('Unable to find selected tab')
b82f815f 3238
8bdd16b4 3239 @staticmethod
3240 def _extract_uploader(data):
3241 uploader = {}
3242 sidebar_renderer = try_get(
3243 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3244 if sidebar_renderer:
3245 for item in sidebar_renderer:
3246 if not isinstance(item, dict):
3247 continue
3248 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3249 if not isinstance(renderer, dict):
3250 continue
3251 owner = try_get(
3252 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3253 if owner:
3254 uploader['uploader'] = owner.get('text')
3255 uploader['uploader_id'] = try_get(
3256 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3257 uploader['uploader_url'] = urljoin(
3258 'https://www.youtube.com/',
3259 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3260 return uploader
3261
3262 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3263 selected_tab = self._extract_selected_tab(tabs)
3264 renderer = try_get(
3265 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
70d5c17b 3266 playlist_id = title = description = None
8bdd16b4 3267 if renderer:
3268 channel_title = renderer.get('title') or item_id
3269 tab_title = selected_tab.get('title')
3270 title = channel_title or item_id
3271 if tab_title:
3272 title += ' - %s' % tab_title
3273 description = renderer.get('description')
3274 playlist_id = renderer.get('externalId')
3275 renderer = try_get(
3276 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3277 if renderer:
3278 title = renderer.get('title')
3279 description = None
3280 playlist_id = item_id
3462ffa8 3281 if playlist_id is None:
70d5c17b 3282 playlist_id = item_id
3283 if title is None:
3284 title = "Youtube " + playlist_id.title()
8bdd16b4 3285 playlist = self.playlist_result(
29f7c58a 3286 self._entries(selected_tab, identity_token),
8bdd16b4 3287 playlist_id=playlist_id, playlist_title=title,
3288 playlist_description=description)
3289 playlist.update(self._extract_uploader(data))
3290 return playlist
73c4ac2c 3291
29f7c58a 3292 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3293 title = playlist.get('title') or try_get(
3294 data, lambda x: x['titleText']['simpleText'], compat_str)
3295 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 3296 # Inline playlist rendition continuation does not always work
3297 # at Youtube side, so delegating regular tab-based playlist URL
3298 # processing whenever possible.
3299 playlist_url = urljoin(url, try_get(
3300 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3301 compat_str))
3302 if playlist_url and playlist_url != url:
3303 return self.url_result(
3304 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3305 video_title=title)
8bdd16b4 3306 return self.playlist_result(
3307 self._playlist_entries(playlist), playlist_id=playlist_id,
3308 playlist_title=title)
c5e8d7af 3309
29f7c58a 3310 @staticmethod
3311 def _extract_alerts(data):
02ced43c 3312 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 3313 if not isinstance(alert_dict, dict):
3314 continue
02ced43c 3315 for renderer in alert_dict:
3316 alert = alert_dict[renderer]
3317 alert_type = alert.get('type')
3318 if not alert_type:
3319 continue
3320 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3321 if message:
3322 yield alert_type, message
3323 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3324 message = try_get(run, lambda x: x['text'], compat_str)
3325 if message:
3326 yield alert_type, message
3327
29f7c58a 3328 def _extract_identity_token(self, webpage, item_id):
3329 ytcfg = self._extract_ytcfg(item_id, webpage)
3330 if ytcfg:
3331 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3332 if token:
3333 return token
3334 return self._search_regex(
3335 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3336 'identity token', default=None)
3337
8bdd16b4 3338 def _real_extract(self, url):
3339 item_id = self._match_id(url)
3340 url = compat_urlparse.urlunparse(
3341 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 3342 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 3343 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 3344 self._downloader.report_warning(
3345 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 3346 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 3347 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3348
8bdd16b4 3349 # Handle both video/playlist URLs
3350 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3351 video_id = qs.get('v', [None])[0]
3352 playlist_id = qs.get('list', [None])[0]
f0c532a4 3353
29f7c58a 3354 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3355 if playlist_id:
3356 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3357 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3358 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3359 else:
3360 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3361 if video_id and playlist_id:
3362 if self._downloader.params.get('noplaylist'):
3363 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3364 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3365 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3366
8bdd16b4 3367 webpage = self._download_webpage(url, item_id)
29f7c58a 3368 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 3369 data = self._extract_yt_initial_data(item_id, webpage)
02ced43c 3370 for alert_type, alert_message in self._extract_alerts(data):
3371 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
8bdd16b4 3372 tabs = try_get(
3373 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3374 if tabs:
3375 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3376 playlist = try_get(
3377 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3378 if playlist:
29f7c58a 3379 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3380 # Fallback to video extraction if no playlist alike page is recognized.
3381 # First check for the current video then try the v attribute of URL query.
3382 video_id = try_get(
3383 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3384 compat_str) or video_id
8bdd16b4 3385 if video_id:
3386 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3387 # Failed to recognize
3388 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3389
c5e8d7af 3390
8bdd16b4 3391class YoutubePlaylistIE(InfoExtractor):
3392 IE_DESC = 'YouTube.com playlists'
3393 _VALID_URL = r'''(?x)(?:
3394 (?:https?://)?
3395 (?:\w+\.)?
3396 (?:
3397 (?:
3398 youtube(?:kids)?\.com|
29f7c58a 3399 invidio\.us
8bdd16b4 3400 )
3401 /.*?\?.*?\blist=
3402 )?
3403 (?P<id>%(playlist_id)s)
3404 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3405 IE_NAME = 'youtube:playlist'
cdc628a4 3406 _TESTS = [{
8bdd16b4 3407 'note': 'issue #673',
3408 'url': 'PLBB231211A4F62143',
cdc628a4 3409 'info_dict': {
8bdd16b4 3410 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3411 'id': 'PLBB231211A4F62143',
3412 'uploader': 'Wickydoo',
3413 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3414 },
3415 'playlist_mincount': 29,
3416 }, {
3417 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3418 'info_dict': {
3419 'title': 'YDL_safe_search',
3420 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3421 },
3422 'playlist_count': 2,
3423 'skip': 'This playlist is private',
9558dcec 3424 }, {
8bdd16b4 3425 'note': 'embedded',
3426 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3427 'playlist_count': 4,
9558dcec 3428 'info_dict': {
8bdd16b4 3429 'title': 'JODA15',
3430 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3431 'uploader': 'milan',
3432 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3433 }
cdc628a4 3434 }, {
8bdd16b4 3435 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3436 'playlist_mincount': 982,
3437 'info_dict': {
3438 'title': '2018 Chinese New Singles (11/6 updated)',
3439 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3440 'uploader': 'LBK',
3441 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3442 }
daa0df9e 3443 }, {
29f7c58a 3444 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3445 'only_matching': True,
3446 }, {
3447 # music album playlist
3448 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3449 'only_matching': True,
3450 }]
3451
3452 @classmethod
3453 def suitable(cls, url):
3454 return False if YoutubeTabIE.suitable(url) else super(
3455 YoutubePlaylistIE, cls).suitable(url)
3456
3457 def _real_extract(self, url):
3458 playlist_id = self._match_id(url)
3459 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3460 if not qs:
3461 qs = {'list': playlist_id}
3462 return self.url_result(
3463 update_url_query('https://www.youtube.com/playlist', qs),
3464 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3465
3466
3467class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3468 IE_DESC = 'youtu.be'
29f7c58a 3469 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3470 _TESTS = [{
8bdd16b4 3471 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3472 'info_dict': {
3473 'id': 'yeWKywCrFtk',
3474 'ext': 'mp4',
3475 'title': 'Small Scale Baler and Braiding Rugs',
3476 'uploader': 'Backus-Page House Museum',
3477 'uploader_id': 'backuspagemuseum',
3478 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3479 'upload_date': '20161008',
3480 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3481 'categories': ['Nonprofits & Activism'],
3482 'tags': list,
3483 'like_count': int,
3484 'dislike_count': int,
3485 },
3486 'params': {
3487 'noplaylist': True,
3488 'skip_download': True,
3489 },
39e7107d 3490 }, {
8bdd16b4 3491 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3492 'only_matching': True,
cdc628a4
PH
3493 }]
3494
8bdd16b4 3495 def _real_extract(self, url):
29f7c58a 3496 mobj = re.match(self._VALID_URL, url)
3497 video_id = mobj.group('id')
3498 playlist_id = mobj.group('playlist_id')
8bdd16b4 3499 return self.url_result(
29f7c58a 3500 update_url_query('https://www.youtube.com/watch', {
3501 'v': video_id,
3502 'list': playlist_id,
3503 'feature': 'youtu.be',
3504 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3505
3506
3507class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3508 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3509 _VALID_URL = r'ytuser:(?P<id>.+)'
3510 _TESTS = [{
3511 'url': 'ytuser:phihag',
3512 'only_matching': True,
3513 }]
3514
3515 def _real_extract(self, url):
3516 user_id = self._match_id(url)
3517 return self.url_result(
3518 'https://www.youtube.com/user/%s' % user_id,
3519 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3520
b05654f0 3521
3d3dddc9 3522class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3523 IE_NAME = 'youtube:favorites'
3524 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3525 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3526 _LOGIN_REQUIRED = True
3527 _TESTS = [{
3528 'url': ':ytfav',
3529 'only_matching': True,
3530 }, {
3531 'url': ':ytfavorites',
3532 'only_matching': True,
3533 }]
3534
3535 def _real_extract(self, url):
3536 return self.url_result(
3537 'https://www.youtube.com/playlist?list=LL',
3538 ie=YoutubeTabIE.ie_key())
3539
3540
8bdd16b4 3541class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
78caa52a 3542 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3543 # there doesn't appear to be a real limit, for example if you search for
3544 # 'python' you get more than 8.000.000 results
3545 _MAX_RESULTS = float('inf')
78caa52a 3546 IE_NAME = 'youtube:search'
b05654f0 3547 _SEARCH_KEY = 'ytsearch'
6c894ea1 3548 _SEARCH_PARAMS = None
9dd8e46a 3549 _TESTS = []
b05654f0 3550
6c894ea1
U
3551 def _entries(self, query, n):
3552 data = {
3553 'context': {
3554 'client': {
3555 'clientName': 'WEB',
3556 'clientVersion': '2.20201021.03.00',
3557 }
3558 },
3559 'query': query,
a22b2fd1 3560 }
6c894ea1
U
3561 if self._SEARCH_PARAMS:
3562 data['params'] = self._SEARCH_PARAMS
3563 total = 0
3564 for page_num in itertools.count(1):
3565 search = self._download_json(
3566 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3567 video_id='query "%s"' % query,
3568 note='Downloading page %s' % page_num,
3569 errnote='Unable to download API page', fatal=False,
3570 data=json.dumps(data).encode('utf8'),
3571 headers={'content-type': 'application/json'})
3572 if not search:
b4c08069 3573 break
6c894ea1
U
3574 slr_contents = try_get(
3575 search,
3576 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3577 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3578 list)
3579 if not slr_contents:
a22b2fd1 3580 break
0366ae87
M
3581
3582 isr_contents = []
3583 continuation_token = None
3584 # Youtube sometimes adds promoted content to searches,
3585 # changing the index location of videos and token.
3586 # So we search through all entries till we find them.
3587 for index, isr in enumerate(slr_contents):
9da76d30 3588 if not isr_contents:
0366ae87
M
3589 isr_contents = try_get(
3590 slr_contents,
3591 (lambda x: x[index]['itemSectionRenderer']['contents']),
3592 list)
3593 for content in isr_contents:
3594 if content.get('videoRenderer') is not None:
3595 break
3596 else:
3597 isr_contents = []
3598
3599 if continuation_token is None:
3600 continuation_token = try_get(
3601 slr_contents,
3602 lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
3603 'token'],
3604 compat_str)
9da76d30 3605 if continuation_token is not None and isr_contents:
0366ae87
M
3606 break
3607
6c894ea1
U
3608 if not isr_contents:
3609 break
3610 for content in isr_contents:
3611 if not isinstance(content, dict):
3612 continue
3613 video = content.get('videoRenderer')
3614 if not isinstance(video, dict):
3615 continue
3616 video_id = video.get('videoId')
3617 if not video_id:
3618 continue
3619 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3620 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3621 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3622 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3623 view_count = int_or_none(self._search_regex(
3624 r'^(\d+)', re.sub(r'\s', '', view_count_text),
3625 'view count', default=None))
3626 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3627 total += 1
3628 yield {
3629 '_type': 'url_transparent',
3630 'ie_key': YoutubeIE.ie_key(),
3631 'id': video_id,
3632 'url': video_id,
3633 'title': title,
3634 'description': description,
3635 'duration': duration,
3636 'view_count': view_count,
3637 'uploader': uploader,
3638 }
3639 if total == n:
3640 return
0366ae87 3641 if not continuation_token:
6c894ea1 3642 break
0366ae87 3643 data['continuation'] = continuation_token
b05654f0 3644
6c894ea1
U
3645 def _get_n_results(self, query, n):
3646 """Get a specified number of results for a query"""
3647 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3648
c9ae7b95 3649
a3dd9248 3650class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3651 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3652 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3653 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3654 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3655
c9ae7b95 3656
386e1dd9 3657class YoutubeSearchURLIE(YoutubeSearchIE):
c76eb41b 3658 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
386e1dd9 3659 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3660 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3661 # _MAX_RESULTS = 100
3462ffa8 3662 _TESTS = [{
3663 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3664 'playlist_mincount': 5,
3665 'info_dict': {
3666 'title': 'youtube-dl test video',
3667 }
3668 }, {
3669 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3670 'only_matching': True,
3671 }]
3672
386e1dd9 3673 @classmethod
3674 def _make_valid_url(cls):
3675 return cls._VALID_URL
3676
3462ffa8 3677 def _real_extract(self, url):
386e1dd9 3678 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3679 query = (qs.get('search_query') or qs.get('q'))[0]
3680 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3681 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3682
3683
3684class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3685 """
25f14e9f 3686 Base class for feed extractors
3d3dddc9 3687 Subclasses must define the _FEED_NAME property.
d7ae0639 3688 """
b2e8bc1b 3689 _LOGIN_REQUIRED = True
3462ffa8 3690 # _MAX_PAGES = 5
ef2f3c7f 3691 _TESTS = []
d7ae0639
JMF
3692
3693 @property
3694 def IE_NAME(self):
78caa52a 3695 return 'youtube:%s' % self._FEED_NAME
04cc9617 3696
81f0259b 3697 def _real_initialize(self):
b2e8bc1b 3698 self._login()
81f0259b 3699
3853309f 3700 def _real_extract(self, url):
3d3dddc9 3701 return self.url_result(
3702 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3703 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3704
3705
ef2f3c7f 3706class YoutubeWatchLaterIE(InfoExtractor):
3707 IE_NAME = 'youtube:watchlater'
70d5c17b 3708 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3709 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3710 _TESTS = [{
8bdd16b4 3711 'url': ':ytwatchlater',
bc7a9cd8
S
3712 'only_matching': True,
3713 }]
25f14e9f
S
3714
3715 def _real_extract(self, url):
ef2f3c7f 3716 return self.url_result(
3717 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3718
3719
25f14e9f
S
3720class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3721 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3722 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3723 _FEED_NAME = 'recommended'
3d3dddc9 3724 _TESTS = [{
3725 'url': ':ytrec',
3726 'only_matching': True,
3727 }, {
3728 'url': ':ytrecommended',
3729 'only_matching': True,
3730 }, {
3731 'url': 'https://youtube.com',
3732 'only_matching': True,
3733 }]
1ed5b5c9 3734
1ed5b5c9 3735
25f14e9f 3736class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3737 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3738 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3739 _FEED_NAME = 'subscriptions'
3d3dddc9 3740 _TESTS = [{
3741 'url': ':ytsubs',
3742 'only_matching': True,
3743 }, {
3744 'url': ':ytsubscriptions',
3745 'only_matching': True,
3746 }]
1ed5b5c9 3747
1ed5b5c9 3748
25f14e9f
S
3749class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3750 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3751 _VALID_URL = r':ythistory'
25f14e9f 3752 _FEED_NAME = 'history'
3d3dddc9 3753 _TESTS = [{
3754 'url': ':ythistory',
3755 'only_matching': True,
3756 }]
1ed5b5c9
JMF
3757
3758
15870e90
PH
3759class YoutubeTruncatedURLIE(InfoExtractor):
3760 IE_NAME = 'youtube:truncated_url'
3761 IE_DESC = False # Do not list
975d35db 3762 _VALID_URL = r'''(?x)
b95aab84
PH
3763 (?:https?://)?
3764 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3765 (?:watch\?(?:
c4808c60 3766 feature=[a-z_]+|
b95aab84
PH
3767 annotation_id=annotation_[^&]+|
3768 x-yt-cl=[0-9]+|
c1708b89 3769 hl=[^&]*|
287be8c6 3770 t=[0-9]+
b95aab84
PH
3771 )?
3772 |
3773 attribution_link\?a=[^&]+
3774 )
3775 $
975d35db 3776 '''
15870e90 3777
c4808c60 3778 _TESTS = [{
2d3d2997 3779 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3780 'only_matching': True,
dc2fc736 3781 }, {
2d3d2997 3782 'url': 'https://www.youtube.com/watch?',
dc2fc736 3783 'only_matching': True,
b95aab84
PH
3784 }, {
3785 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3786 'only_matching': True,
3787 }, {
3788 'url': 'https://www.youtube.com/watch?feature=foo',
3789 'only_matching': True,
c1708b89
PH
3790 }, {
3791 'url': 'https://www.youtube.com/watch?hl=en-GB',
3792 'only_matching': True,
287be8c6
PH
3793 }, {
3794 'url': 'https://www.youtube.com/watch?t=2372',
3795 'only_matching': True,
c4808c60
PH
3796 }]
3797
15870e90
PH
3798 def _real_extract(self, url):
3799 raise ExtractorError(
78caa52a
PH
3800 'Did you forget to quote the URL? Remember that & is a meta '
3801 'character in most shells, so you want to put the URL in quotes, '
3867038a 3802 'like youtube-dl '
2d3d2997 3803 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3804 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3805 expected=True)
772fd5cc
PH
3806
3807
3808class YoutubeTruncatedIDIE(InfoExtractor):
3809 IE_NAME = 'youtube:truncated_id'
3810 IE_DESC = False # Do not list
b95aab84 3811 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3812
3813 _TESTS = [{
3814 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3815 'only_matching': True,
3816 }]
3817
3818 def _real_extract(self, url):
3819 video_id = self._match_id(url)
3820 raise ExtractorError(
3821 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3822 expected=True)
8bdd16b4 3823
3824
3462ffa8 3825# Do Youtube show urls even exist anymore? I couldn't find any
3826r'''
3827class YoutubeShowIE(YoutubeTabIE):
8bdd16b4 3828 IE_DESC = 'YouTube.com (multi-season) shows'
3829 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3830 IE_NAME = 'youtube:show'
3831 _TESTS = [{
3832 'url': 'https://www.youtube.com/show/airdisasters',
3833 'playlist_mincount': 5,
3834 'info_dict': {
3835 'id': 'airdisasters',
3836 'title': 'Air Disasters',
3837 }
3838 }]
3839
3840 def _real_extract(self, url):
3841 playlist_id = self._match_id(url)
3842 return super(YoutubeShowIE, self)._real_extract(
3843 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3462ffa8 3844'''