]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
Strip out internal fields such as `_filename` from infojson (Closes #42)
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
2d30521a 34 float_or_none,
4bb4a188 35 get_element_by_id,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
b84071c0 39 parse_count,
7c80519c 40 parse_duration,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
81c2f20b 51 uppercase_escape,
21c340b8 52 url_or_none,
6e6bc8da 53 urlencode_postdata,
8bdd16b4 54 urljoin,
c5e8d7af
PH
55)
56
5f6a1245 57
de7f3446 58class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 66
3462ffa8 67 _RESERVED_NAMES = (
29f7c58a 68 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
69 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
70 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 71
b2e8bc1b
JMF
72 _NETRC_MACHINE = 'youtube'
73 # If True it will raise an error if no login info is provided
74 _LOGIN_REQUIRED = False
75
70d5c17b 76 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 77
b2e8bc1b 78 def _set_language(self):
810fb84d 79 self._set_cookie(
ee0b726c 80 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 81 # YouTube sets the expire time to about two months
810fb84d 82 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 83
25f14e9f
S
84 def _ids_to_results(self, ids):
85 return [
86 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 for vid_id in ids]
88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
68217024 97 username, password = self._get_login_info()
b2e8bc1b
JMF
98 # No authentication to be performed
99 if username is None:
70d35d16 100 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 101 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
102 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
103 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 104 return True
b2e8bc1b 105
7cc3570e
PH
106 login_page = self._download_webpage(
107 self._LOGIN_URL, None,
69ea8ca4
PH
108 note='Downloading login page',
109 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
110 if login_page is False:
111 return
b2e8bc1b 112
1212e997 113 login_form = self._hidden_inputs(login_page)
c5e8d7af 114
e00eb564
S
115 def req(url, f_req, note, errnote):
116 data = login_form.copy()
117 data.update({
118 'pstMsg': 1,
119 'checkConnection': 'youtube',
120 'checkedDomains': 'youtube',
121 'hl': 'en',
122 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 123 'f.req': json.dumps(f_req),
e00eb564
S
124 'flowName': 'GlifWebSignIn',
125 'flowEntry': 'ServiceLogin',
baf67a60
S
126 # TODO: reverse actual botguard identifier generation algo
127 'bgRequest': '["identifier",""]',
041bc3ad 128 })
e00eb564
S
129 return self._download_json(
130 url, None, note=note, errnote=errnote,
131 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
132 fatal=False,
133 data=urlencode_postdata(data), headers={
134 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
135 'Google-Accounts-XSRF': 1,
136 })
137
3995d37d
S
138 def warn(message):
139 self._downloader.report_warning(message)
140
141 lookup_req = [
142 username,
143 None, [], None, 'US', None, None, 2, False, True,
144 [
145 None, None,
146 [2, 1, None, 1,
147 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
148 None, [], 4],
149 1, [None, None, []], None, None, None, True
150 ],
151 username,
152 ]
153
e00eb564 154 lookup_results = req(
3995d37d 155 self._LOOKUP_URL, lookup_req,
e00eb564
S
156 'Looking up account info', 'Unable to look up account info')
157
158 if lookup_results is False:
159 return False
041bc3ad 160
3995d37d
S
161 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
162 if not user_hash:
163 warn('Unable to extract user hash')
164 return False
165
166 challenge_req = [
167 user_hash,
168 None, 1, None, [1, None, None, None, [password, None, True]],
169 [
170 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
171 1, [None, None, []], None, None, None, True
172 ]]
83317f69 173
3995d37d
S
174 challenge_results = req(
175 self._CHALLENGE_URL, challenge_req,
176 'Logging in', 'Unable to log in')
83317f69 177
3995d37d 178 if challenge_results is False:
e00eb564 179 return
83317f69 180
3995d37d
S
181 login_res = try_get(challenge_results, lambda x: x[0][5], list)
182 if login_res:
183 login_msg = try_get(login_res, lambda x: x[5], compat_str)
184 warn(
185 'Unable to login: %s' % 'Invalid password'
186 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
187 return False
188
189 res = try_get(challenge_results, lambda x: x[0][-1], list)
190 if not res:
191 warn('Unable to extract result entry')
192 return False
193
9a6628aa
S
194 login_challenge = try_get(res, lambda x: x[0][0], list)
195 if login_challenge:
196 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
197 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
198 # SEND_SUCCESS - TFA code has been successfully sent to phone
199 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 200 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
201 if status == 'QUOTA_EXCEEDED':
202 warn('Exceeded the limit of TFA codes, try later')
203 return False
204
205 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
206 if not tl:
207 warn('Unable to extract TL')
208 return False
209
210 tfa_code = self._get_tfa_info('2-step verification code')
211
212 if not tfa_code:
213 warn(
214 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
215 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
216 return False
217
218 tfa_code = remove_start(tfa_code, 'G-')
219
220 tfa_req = [
221 user_hash, None, 2, None,
222 [
223 9, None, None, None, None, None, None, None,
224 [None, tfa_code, True, 2]
225 ]]
226
227 tfa_results = req(
228 self._TFA_URL.format(tl), tfa_req,
229 'Submitting TFA code', 'Unable to submit TFA code')
230
231 if tfa_results is False:
232 return False
233
234 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
235 if tfa_res:
236 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
237 warn(
238 'Unable to finish TFA: %s' % 'Invalid TFA code'
239 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
240 return False
241
242 check_cookie_url = try_get(
243 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
244 else:
245 CHALLENGES = {
246 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
247 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
248 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
249 }
250 challenge = CHALLENGES.get(
251 challenge_str,
252 '%s returned error %s.' % (self.IE_NAME, challenge_str))
253 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
254 return False
3995d37d
S
255 else:
256 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
257
258 if not check_cookie_url:
259 warn('Unable to extract CheckCookie URL')
260 return False
e00eb564
S
261
262 check_cookie_results = self._download_webpage(
3995d37d
S
263 check_cookie_url, None, 'Checking cookie', fatal=False)
264
265 if check_cookie_results is False:
266 return False
e00eb564 267
3995d37d
S
268 if 'https://myaccount.google.com/' not in check_cookie_results:
269 warn('Unable to log in')
b2e8bc1b 270 return False
e00eb564 271
b2e8bc1b
JMF
272 return True
273
30226342 274 def _download_webpage_handle(self, *args, **kwargs):
c1148516 275 query = kwargs.get('query', {}).copy()
c1148516 276 kwargs['query'] = query
30226342 277 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
278 *args, **compat_kwargs(kwargs))
279
b2e8bc1b
JMF
280 def _real_initialize(self):
281 if self._downloader is None:
282 return
42939b61 283 self._set_language()
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
8bdd16b4 287 _DEFAULT_API_DATA = {
288 'context': {
289 'client': {
290 'clientName': 'WEB',
291 'clientVersion': '2.20201021.03.00',
292 }
293 },
294 }
8377574c 295
a0566bbf 296 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 297 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
298 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 299
8bdd16b4 300 def _call_api(self, ep, query, video_id):
301 data = self._DEFAULT_API_DATA.copy()
302 data.update(query)
9833e7a0 303
8bdd16b4 304 response = self._download_json(
305 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
306 note='Downloading API JSON', errnote='Unable to download API page',
307 data=json.dumps(data).encode('utf8'),
308 headers={'content-type': 'application/json'},
309 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 310
8bdd16b4 311 return response
061a75ed 312
8bdd16b4 313 def _extract_yt_initial_data(self, video_id, webpage):
314 return self._parse_json(
315 self._search_regex(
29f7c58a 316 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 317 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 318 video_id)
0c148415 319
29f7c58a 320 def _extract_ytcfg(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
323 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
324 default='{}'), video_id, fatal=False)
325
30a074c2 326 def _extract_video(self, renderer):
327 video_id = renderer.get('videoId')
328 title = try_get(
329 renderer,
330 (lambda x: x['title']['runs'][0]['text'],
331 lambda x: x['title']['simpleText']), compat_str)
332 description = try_get(
333 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
334 compat_str)
335 duration = parse_duration(try_get(
336 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
337 view_count_text = try_get(
338 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
339 view_count = str_to_int(self._search_regex(
340 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
341 'view count', default=None))
342 uploader = try_get(
343 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
344 return {
345 '_type': 'url_transparent',
346 'ie_key': YoutubeIE.ie_key(),
347 'id': video_id,
348 'url': video_id,
349 'title': title,
350 'description': description,
351 'duration': duration,
352 'view_count': view_count,
353 'uploader': uploader,
354 }
355
0c148415 356
360e1ca5 357class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 358 IE_DESC = 'YouTube.com'
cb7dfeea 359 _VALID_URL = r"""(?x)^
c5e8d7af 360 (
edb53e2d 361 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 362 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 363 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 364 (?:www\.)?pwnyoutube\.com/|
8b561bfc 365 (?:www\.)?hooktube\.com/|
f7000f3a 366 (?:www\.)?yourepeat\.com/|
e69ae5b9 367 tube\.majestyc\.net/|
ba036333 368 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 369 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 370 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 371 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 372 (?:www\.)?invidious\.kabi\.tk/|
ba036333 373 (?:www\.)?invidious\.13ad\.de/|
791d2e81 374 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 375 (?:www\.)?invidious\.zapashcanon\.fr/|
376 (?:www\.)?invidious\.kavin\.rocks/|
377 (?:www\.)?invidious\.tube/|
378 (?:www\.)?invidiou\.site/|
379 (?:www\.)?invidious\.site/|
380 (?:www\.)?invidious\.xyz/|
494d664e 381 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 382 (?:www\.)?invidious\.drycat\.fr/|
ba036333 383 (?:www\.)?tube\.poal\.co/|
29f7c58a 384 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 385 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 386 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 387 (?:www\.)?yewtu\.be/|
494d664e 388 (?:www\.)?yt\.elukerio\.org/|
894b3826 389 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 390 (?:www\.)?invidious\.ggc-project\.de/|
391 (?:www\.)?yt\.maisputain\.ovh/|
392 (?:www\.)?invidious\.13ad\.de/|
393 (?:www\.)?invidious\.toot\.koeln/|
394 (?:www\.)?invidious\.fdn\.fr/|
395 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 396 (?:www\.)?kgg2m7yk5aybusll\.onion/|
397 (?:www\.)?qklhadlycap4cnod\.onion/|
398 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
399 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
400 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
401 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 402 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 403 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 404 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
405 (?:.*?\#/)? # handle anchor (#/) redirect urls
406 (?: # the various things that can precede the ID:
ac7553d0 407 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 408 |(?: # or the v= param in all its forms
f7000f3a 409 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 410 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 411 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
412 v=
413 )
f4b05232 414 ))
cbaed4bb
S
415 |(?:
416 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
417 vid\.plus| # or vid.plus/xxxx
418 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 419 )/
edb53e2d 420 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 421 )
c5e8d7af 422 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 423 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
424 (?!.*?\blist=
425 (?:
426 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
427 WL # WL are handled by the watch later IE
428 )
429 )
c5e8d7af 430 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 431 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 432 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
433 _PLAYER_INFO_RE = (
434 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
435 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
436 )
2c62dc26 437 _formats = {
c2d3cb4c 438 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
439 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
440 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
441 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
442 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
443 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
444 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
445 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 446 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 447 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
448 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
449 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
450 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
451 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
452 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 453 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 454 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
455 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 456
457
458 # 3D videos
c2d3cb4c 459 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
460 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
461 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
462 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 463 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
464 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
465 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 466
96fb5605 467 # Apple HTTP Live Streaming
11f12195 468 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 469 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
470 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
471 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
472 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
473 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 474 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
475 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
476
477 # DASH mp4 video
d23028a8
S
478 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
482 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 483 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
484 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
485 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
487 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
488 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
489 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 490
f6f1fc92 491 # Dash mp4 audio
d23028a8
S
492 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
493 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
494 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
495 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
496 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
497 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
498 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
499
500 # Dash webm
d23028a8
S
501 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
504 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
505 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
506 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
507 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
508 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 516 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
517 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
519 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
520 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
521 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
523
524 # Dash webm audio
d23028a8
S
525 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
526 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 527
0857baad 528 # Dash webm audio with opus inside
d23028a8
S
529 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
530 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
531 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 532
ce6b9a2d
PH
533 # RTMP (unnamed)
534 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
535
536 # av01 video only formats sometimes served with "unknown" codecs
537 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
538 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
539 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
540 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 541 }
29f7c58a 542 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 543
fd5c4aab
S
544 _GEO_BYPASS = False
545
78caa52a 546 IE_NAME = 'youtube'
2eb88d95
PH
547 _TESTS = [
548 {
2d3d2997 549 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
550 'info_dict': {
551 'id': 'BaW_jenozKc',
552 'ext': 'mp4',
3867038a 553 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
554 'uploader': 'Philipp Hagemeister',
555 'uploader_id': 'phihag',
ec85ded8 556 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
557 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
558 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 559 'upload_date': '20121002',
3867038a 560 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 561 'categories': ['Science & Technology'],
3867038a 562 'tags': ['youtube-dl'],
556dbe7f 563 'duration': 10,
dbdaaa23 564 'view_count': int,
3e7c1224
PH
565 'like_count': int,
566 'dislike_count': int,
7c80519c 567 'start_time': 1,
297a564b 568 'end_time': 9,
2eb88d95 569 }
0e853ca4 570 },
fccd3771 571 {
4bc3a23e
PH
572 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
573 'note': 'Embed-only video (#1746)',
574 'info_dict': {
575 'id': 'yZIXLfi8CZQ',
576 'ext': 'mp4',
577 'upload_date': '20120608',
578 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
579 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
580 'uploader': 'SET India',
94bfcd23 581 'uploader_id': 'setindia',
ec85ded8 582 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 583 'age_limit': 18,
fccd3771
PH
584 }
585 },
11b56058 586 {
8bdd16b4 587 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
588 'note': 'Use the first video ID in the URL',
589 'info_dict': {
590 'id': 'BaW_jenozKc',
591 'ext': 'mp4',
3867038a 592 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
593 'uploader': 'Philipp Hagemeister',
594 'uploader_id': 'phihag',
ec85ded8 595 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 596 'upload_date': '20121002',
3867038a 597 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 598 'categories': ['Science & Technology'],
3867038a 599 'tags': ['youtube-dl'],
556dbe7f 600 'duration': 10,
dbdaaa23 601 'view_count': int,
11b56058
PM
602 'like_count': int,
603 'dislike_count': int,
34a7de29
S
604 },
605 'params': {
606 'skip_download': True,
607 },
11b56058 608 },
dd27fd17 609 {
2d3d2997 610 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
611 'note': '256k DASH audio (format 141) via DASH manifest',
612 'info_dict': {
613 'id': 'a9LDPn-MO4I',
614 'ext': 'm4a',
615 'upload_date': '20121002',
616 'uploader_id': '8KVIDEO',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
618 'description': '',
619 'uploader': '8KVIDEO',
620 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 621 },
4bc3a23e
PH
622 'params': {
623 'youtube_include_dash_manifest': True,
624 'format': '141',
4919603f 625 },
de3c7fe0 626 'skip': 'format 141 not served anymore',
dd27fd17 627 },
8bdd16b4 628 # DASH manifest with encrypted signature
629 {
630 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
631 'info_dict': {
632 'id': 'IB3lcPjvWLA',
633 'ext': 'm4a',
634 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
635 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
636 'duration': 244,
637 'uploader': 'AfrojackVEVO',
638 'uploader_id': 'AfrojackVEVO',
639 'upload_date': '20131011',
640 },
641 'params': {
642 'youtube_include_dash_manifest': True,
643 'format': '141/bestaudio[ext=m4a]',
644 },
645 },
aa79ac0c
PH
646 # Controversy video
647 {
648 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
649 'info_dict': {
650 'id': 'T4XJQO3qol8',
651 'ext': 'mp4',
556dbe7f 652 'duration': 219,
aa79ac0c 653 'upload_date': '20100909',
4fe54c12 654 'uploader': 'Amazing Atheist',
aa79ac0c 655 'uploader_id': 'TheAmazingAtheist',
ec85ded8 656 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
657 'title': 'Burning Everyone\'s Koran',
658 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
659 }
c522adb1 660 },
dd2d55f1 661 # Normal age-gate video (embed allowed)
c522adb1 662 {
2d3d2997 663 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
664 'info_dict': {
665 'id': 'HtVdAasjOgU',
666 'ext': 'mp4',
667 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 668 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 669 'duration': 142,
c522adb1
JMF
670 'uploader': 'The Witcher',
671 'uploader_id': 'WitcherGame',
ec85ded8 672 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 673 'upload_date': '20140605',
34952f09 674 'age_limit': 18,
c522adb1
JMF
675 },
676 },
8bdd16b4 677 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
678 # YouTube Red ad is not captured for creator
679 {
680 'url': '__2ABJjxzNo',
681 'info_dict': {
682 'id': '__2ABJjxzNo',
683 'ext': 'mp4',
684 'duration': 266,
685 'upload_date': '20100430',
686 'uploader_id': 'deadmau5',
687 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
688 'creator': 'Dada Life, deadmau5',
689 'description': 'md5:12c56784b8032162bb936a5f76d55360',
690 'uploader': 'deadmau5',
691 'title': 'Deadmau5 - Some Chords (HD)',
692 'alt_title': 'This Machine Kills Some Chords',
693 },
694 'expected_warnings': [
695 'DASH manifest missing',
696 ]
697 },
067aa17e 698 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
699 {
700 'url': 'lqQg6PlCWgI',
701 'info_dict': {
702 'id': 'lqQg6PlCWgI',
703 'ext': 'mp4',
556dbe7f 704 'duration': 6085,
90227264 705 'upload_date': '20150827',
cbe2bd91 706 'uploader_id': 'olympic',
ec85ded8 707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 708 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 709 'uploader': 'Olympic',
cbe2bd91
PH
710 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
711 },
712 'params': {
713 'skip_download': 'requires avconv',
e52a40ab 714 }
cbe2bd91 715 },
6271f1ca
PH
716 # Non-square pixels
717 {
718 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
719 'info_dict': {
720 'id': '_b-2C3KPAM0',
721 'ext': 'mp4',
722 'stretched_ratio': 16 / 9.,
556dbe7f 723 'duration': 85,
6271f1ca
PH
724 'upload_date': '20110310',
725 'uploader_id': 'AllenMeow',
ec85ded8 726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 727 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 728 'uploader': '孫ᄋᄅ',
6271f1ca
PH
729 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
730 },
06b491eb
S
731 },
732 # url_encoded_fmt_stream_map is empty string
733 {
734 'url': 'qEJwOuvDf7I',
735 'info_dict': {
736 'id': 'qEJwOuvDf7I',
f57b7835 737 'ext': 'webm',
06b491eb
S
738 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
739 'description': '',
740 'upload_date': '20150404',
741 'uploader_id': 'spbelect',
742 'uploader': 'Наблюдатели Петербурга',
743 },
744 'params': {
745 'skip_download': 'requires avconv',
e323cf3f
S
746 },
747 'skip': 'This live event has ended.',
06b491eb 748 },
067aa17e 749 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
750 {
751 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
752 'info_dict': {
753 'id': 'FIl7x6_3R5Y',
eb6793ba 754 'ext': 'webm',
da77d856
S
755 'title': 'md5:7b81415841e02ecd4313668cde88737a',
756 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 757 'duration': 220,
da77d856
S
758 'upload_date': '20150625',
759 'uploader_id': 'dorappi2000',
ec85ded8 760 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 761 'uploader': 'dorappi2000',
eb6793ba 762 'formats': 'mincount:31',
da77d856 763 },
eb6793ba 764 'skip': 'not actual anymore',
2ee8f5d8 765 },
8a1a26ce
YCH
766 # DASH manifest with segment_list
767 {
768 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
769 'md5': '8ce563a1d667b599d21064e982ab9e31',
770 'info_dict': {
771 'id': 'CsmdDsKjzN8',
772 'ext': 'mp4',
17ee98e1 773 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
774 'uploader': 'Airtek',
775 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
776 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
777 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
778 },
779 'params': {
780 'youtube_include_dash_manifest': True,
781 'format': '135', # bestvideo
be49068d
S
782 },
783 'skip': 'This live event has ended.',
2ee8f5d8 784 },
cf7e015f
S
785 {
786 # Multifeed videos (multiple cameras), URL is for Main Camera
787 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
788 'info_dict': {
789 'id': 'jqWvoWXjCVs',
790 'title': 'teamPGP: Rocket League Noob Stream',
791 'description': 'md5:dc7872fb300e143831327f1bae3af010',
792 },
793 'playlist': [{
794 'info_dict': {
795 'id': 'jqWvoWXjCVs',
796 'ext': 'mp4',
797 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
798 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 799 'duration': 7335,
cf7e015f
S
800 'upload_date': '20150721',
801 'uploader': 'Beer Games Beer',
802 'uploader_id': 'beergamesbeer',
ec85ded8 803 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 804 'license': 'Standard YouTube License',
cf7e015f
S
805 },
806 }, {
807 'info_dict': {
808 'id': '6h8e8xoXJzg',
809 'ext': 'mp4',
810 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
811 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 812 'duration': 7337,
cf7e015f
S
813 'upload_date': '20150721',
814 'uploader': 'Beer Games Beer',
815 'uploader_id': 'beergamesbeer',
ec85ded8 816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 817 'license': 'Standard YouTube License',
cf7e015f
S
818 },
819 }, {
820 'info_dict': {
821 'id': 'PUOgX5z9xZw',
822 'ext': 'mp4',
823 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
824 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 825 'duration': 7337,
cf7e015f
S
826 'upload_date': '20150721',
827 'uploader': 'Beer Games Beer',
828 'uploader_id': 'beergamesbeer',
ec85ded8 829 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 830 'license': 'Standard YouTube License',
cf7e015f
S
831 },
832 }, {
833 'info_dict': {
834 'id': 'teuwxikvS5k',
835 'ext': 'mp4',
836 'title': 'teamPGP: Rocket League Noob Stream (zim)',
837 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 838 'duration': 7334,
cf7e015f
S
839 'upload_date': '20150721',
840 'uploader': 'Beer Games Beer',
841 'uploader_id': 'beergamesbeer',
ec85ded8 842 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 843 'license': 'Standard YouTube License',
cf7e015f
S
844 },
845 }],
846 'params': {
847 'skip_download': True,
848 },
4fe54c12 849 'skip': 'This video is not available.',
cbaed4bb 850 },
f9f49d87 851 {
067aa17e 852 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
853 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
854 'info_dict': {
855 'id': 'gVfLd0zydlo',
856 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
857 },
858 'playlist_count': 2,
be49068d 859 'skip': 'Not multifeed anymore',
f9f49d87 860 },
cbaed4bb 861 {
2d3d2997 862 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 863 'only_matching': True,
0e49d9a6 864 },
6d4fc66b 865 {
2d3d2997 866 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
867 'only_matching': True,
868 },
0e49d9a6 869 {
067aa17e 870 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 871 # Also tests cut-off URL expansion in video description (see
067aa17e
S
872 # https://github.com/ytdl-org/youtube-dl/issues/1892,
873 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
874 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
875 'info_dict': {
876 'id': 'lsguqyKfVQg',
877 'ext': 'mp4',
878 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 879 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 880 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 881 'duration': 133,
0e49d9a6
LL
882 'upload_date': '20151119',
883 'uploader_id': 'IronSoulElf',
ec85ded8 884 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 885 'uploader': 'IronSoulElf',
eb6793ba
S
886 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
887 'track': 'Dark Walk - Position Music',
888 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 889 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
890 },
891 'params': {
892 'skip_download': True,
893 },
894 },
61f92af1 895 {
067aa17e 896 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
897 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
898 'only_matching': True,
899 },
313dfc45
LL
900 {
901 # Video with yt:stretch=17:0
902 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
903 'info_dict': {
904 'id': 'Q39EVAstoRM',
905 'ext': 'mp4',
906 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
907 'description': 'md5:ee18a25c350637c8faff806845bddee9',
908 'upload_date': '20151107',
909 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
910 'uploader': 'CH GAMER DROID',
911 },
912 'params': {
913 'skip_download': True,
914 },
be49068d 915 'skip': 'This video does not exist.',
313dfc45 916 },
7caf9830
S
917 {
918 # Video licensed under Creative Commons
919 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
920 'info_dict': {
921 'id': 'M4gD1WSo5mA',
922 'ext': 'mp4',
923 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
924 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 925 'duration': 721,
7caf9830
S
926 'upload_date': '20150127',
927 'uploader_id': 'BerkmanCenter',
ec85ded8 928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 929 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
930 'license': 'Creative Commons Attribution license (reuse allowed)',
931 },
932 'params': {
933 'skip_download': True,
934 },
935 },
fd050249
S
936 {
937 # Channel-like uploader_url
938 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
939 'info_dict': {
940 'id': 'eQcmzGIKrzg',
941 'ext': 'mp4',
942 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
943 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 944 'duration': 4060,
fd050249 945 'upload_date': '20151119',
eb6793ba 946 'uploader': 'Bernie Sanders',
fd050249 947 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 948 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
949 'license': 'Creative Commons Attribution license (reuse allowed)',
950 },
951 'params': {
952 'skip_download': True,
953 },
954 },
040ac686
S
955 {
956 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
957 'only_matching': True,
7f29cf54
S
958 },
959 {
067aa17e 960 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
961 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
962 'only_matching': True,
6496ccb4
S
963 },
964 {
965 # Rental video preview
966 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
967 'info_dict': {
968 'id': 'uGpuVWrhIzE',
969 'ext': 'mp4',
970 'title': 'Piku - Trailer',
971 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
972 'upload_date': '20150811',
973 'uploader': 'FlixMatrix',
974 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 975 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
976 'license': 'Standard YouTube License',
977 },
978 'params': {
979 'skip_download': True,
980 },
eb6793ba 981 'skip': 'This video is not available.',
022a5d66 982 },
12afdc2a
S
983 {
984 # YouTube Red video with episode data
985 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
986 'info_dict': {
987 'id': 'iqKdEhx-dD4',
988 'ext': 'mp4',
989 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 990 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 991 'duration': 2085,
12afdc2a
S
992 'upload_date': '20170118',
993 'uploader': 'Vsauce',
994 'uploader_id': 'Vsauce',
995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
996 'series': 'Mind Field',
997 'season_number': 1,
998 'episode_number': 1,
999 },
1000 'params': {
1001 'skip_download': True,
1002 },
1003 'expected_warnings': [
1004 'Skipping DASH manifest',
1005 ],
1006 },
c7121fa7
S
1007 {
1008 # The following content has been identified by the YouTube community
1009 # as inappropriate or offensive to some audiences.
1010 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1011 'info_dict': {
1012 'id': '6SJNVb0GnPI',
1013 'ext': 'mp4',
1014 'title': 'Race Differences in Intelligence',
1015 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1016 'duration': 965,
1017 'upload_date': '20140124',
1018 'uploader': 'New Century Foundation',
1019 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1020 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1021 },
1022 'params': {
1023 'skip_download': True,
1024 },
1025 },
022a5d66
S
1026 {
1027 # itag 212
1028 'url': '1t24XAntNCY',
1029 'only_matching': True,
fd5c4aab
S
1030 },
1031 {
1032 # geo restricted to JP
1033 'url': 'sJL6WA-aGkQ',
1034 'only_matching': True,
1035 },
cd5a74a2
S
1036 {
1037 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1038 'only_matching': True,
1039 },
825cd268
RA
1040 {
1041 # DRM protected
1042 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1043 'only_matching': True,
4fe54c12
S
1044 },
1045 {
1046 # Video with unsupported adaptive stream type formats
1047 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1048 'info_dict': {
1049 'id': 'Z4Vy8R84T1U',
1050 'ext': 'mp4',
1051 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1052 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1053 'duration': 433,
1054 'upload_date': '20130923',
1055 'uploader': 'Amelia Putri Harwita',
1056 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1057 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1058 'formats': 'maxcount:10',
1059 },
1060 'params': {
1061 'skip_download': True,
1062 'youtube_include_dash_manifest': False,
1063 },
5429d6a9 1064 'skip': 'not actual anymore',
5caabd3c 1065 },
1066 {
822b9d9c 1067 # Youtube Music Auto-generated description
5caabd3c 1068 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1069 'info_dict': {
1070 'id': 'MgNrAu2pzNs',
1071 'ext': 'mp4',
1072 'title': 'Voyeur Girl',
1073 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1074 'upload_date': '20190312',
5429d6a9
S
1075 'uploader': 'Stephen - Topic',
1076 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1077 'artist': 'Stephen',
1078 'track': 'Voyeur Girl',
1079 'album': 'it\'s too much love to know my dear',
1080 'release_date': '20190313',
1081 'release_year': 2019,
1082 },
1083 'params': {
1084 'skip_download': True,
1085 },
1086 },
66b48727
RA
1087 {
1088 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1089 'only_matching': True,
1090 },
011e75e6
S
1091 {
1092 # invalid -> valid video id redirection
1093 'url': 'DJztXj2GPfl',
1094 'info_dict': {
1095 'id': 'DJztXj2GPfk',
1096 'ext': 'mp4',
1097 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1098 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1099 'upload_date': '20090125',
1100 'uploader': 'Prochorowka',
1101 'uploader_id': 'Prochorowka',
1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1103 'artist': 'Panjabi MC',
1104 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1105 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1106 },
1107 'params': {
1108 'skip_download': True,
1109 },
ea74e00b
DP
1110 },
1111 {
1112 # empty description results in an empty string
1113 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1114 'info_dict': {
1115 'id': 'x41yOUIvK2k',
1116 'ext': 'mp4',
1117 'title': 'IMG 3456',
1118 'description': '',
1119 'upload_date': '20170613',
1120 'uploader_id': 'ElevageOrVert',
1121 'uploader': 'ElevageOrVert',
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
1126 },
a0566bbf 1127 {
29f7c58a 1128 # with '};' inside yt initial data (see [1])
1129 # see [2] for an example with '};' inside ytInitialPlayerResponse
1130 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1131 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1132 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1133 'info_dict': {
1134 'id': 'CHqg6qOn4no',
1135 'ext': 'mp4',
1136 'title': 'Part 77 Sort a list of simple types in c#',
1137 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1138 'upload_date': '20130831',
1139 'uploader_id': 'kudvenkat',
1140 'uploader': 'kudvenkat',
1141 },
1142 'params': {
1143 'skip_download': True,
1144 },
1145 },
29f7c58a 1146 {
1147 # another example of '};' in ytInitialData
1148 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1149 'only_matching': True,
1150 },
1151 {
1152 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1153 'only_matching': True,
1154 },
2eb88d95
PH
1155 ]
1156
e0df6211
PH
1157 def __init__(self, *args, **kwargs):
1158 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1159 self._player_cache = {}
e0df6211 1160
c5e8d7af
PH
1161 def report_video_info_webpage_download(self, video_id):
1162 """Report attempt to download video info webpage."""
69ea8ca4 1163 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1164
c5e8d7af
PH
1165 def report_information_extraction(self, video_id):
1166 """Report attempt to extract video information."""
69ea8ca4 1167 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1168
1169 def report_unavailable_format(self, video_id, format):
1170 """Report extracted video URL."""
69ea8ca4 1171 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1172
1173 def report_rtmp_download(self):
1174 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1175 self.to_screen('RTMP download detected')
c5e8d7af 1176
60064c53
PH
1177 def _signature_cache_id(self, example_sig):
1178 """ Return a string representation of a signature """
78caa52a 1179 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1180
e40c758c
S
1181 @classmethod
1182 def _extract_player_info(cls, player_url):
1183 for player_re in cls._PLAYER_INFO_RE:
1184 id_m = re.search(player_re, player_url)
1185 if id_m:
1186 break
1187 else:
c081b35c 1188 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1189 return id_m.group('ext'), id_m.group('id')
1190
1191 def _extract_signature_function(self, video_id, player_url, example_sig):
1192 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1193
c4417ddb 1194 # Read from filesystem cache
60064c53
PH
1195 func_id = '%s_%s_%s' % (
1196 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1197 assert os.path.basename(func_id) == func_id
a0e07d31 1198
69ea8ca4 1199 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1200 if cache_spec is not None:
78caa52a 1201 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1202
6d1a55a5
PH
1203 download_note = (
1204 'Downloading player %s' % player_url
1205 if self._downloader.params.get('verbose') else
1206 'Downloading %s player %s' % (player_type, player_id)
1207 )
e0df6211
PH
1208 if player_type == 'js':
1209 code = self._download_webpage(
1210 player_url, video_id,
6d1a55a5 1211 note=download_note,
69ea8ca4 1212 errnote='Download of %s failed' % player_url)
83799698 1213 res = self._parse_sig_js(code)
c4417ddb 1214 elif player_type == 'swf':
e0df6211
PH
1215 urlh = self._request_webpage(
1216 player_url, video_id,
6d1a55a5 1217 note=download_note,
69ea8ca4 1218 errnote='Download of %s failed' % player_url)
e0df6211 1219 code = urlh.read()
83799698 1220 res = self._parse_sig_swf(code)
e0df6211
PH
1221 else:
1222 assert False, 'Invalid player type %r' % player_type
1223
785521bf
PH
1224 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1225 cache_res = res(test_string)
1226 cache_spec = [ord(c) for c in cache_res]
83799698 1227
69ea8ca4 1228 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1229 return res
1230
60064c53 1231 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1232 def gen_sig_code(idxs):
1233 def _genslice(start, end, step):
78caa52a 1234 starts = '' if start == 0 else str(start)
8bcc8756 1235 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1236 steps = '' if step == 1 else (':%d' % step)
78caa52a 1237 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1238
1239 step = None
7af808a5
PH
1240 # Quelch pyflakes warnings - start will be set when step is set
1241 start = '(Never used)'
edf3e38e
PH
1242 for i, prev in zip(idxs[1:], idxs[:-1]):
1243 if step is not None:
1244 if i - prev == step:
1245 continue
1246 yield _genslice(start, prev, step)
1247 step = None
1248 continue
1249 if i - prev in [-1, 1]:
1250 step = i - prev
1251 start = prev
1252 continue
1253 else:
78caa52a 1254 yield 's[%d]' % prev
edf3e38e 1255 if step is None:
78caa52a 1256 yield 's[%d]' % i
edf3e38e
PH
1257 else:
1258 yield _genslice(start, i, step)
1259
78caa52a 1260 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1261 cache_res = func(test_string)
edf3e38e 1262 cache_spec = [ord(c) for c in cache_res]
78caa52a 1263 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1264 signature_id_tuple = '(%s)' % (
1265 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1266 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1267 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1268 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1269
e0df6211
PH
1270 def _parse_sig_js(self, jscode):
1271 funcname = self._search_regex(
abefc03f
S
1272 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1273 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1274 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1275 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1276 # Obsolete patterns
1277 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1278 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1279 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1280 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1281 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1283 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1284 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1285 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1286
1287 jsi = JSInterpreter(jscode)
1288 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1289 return lambda s: initial_function([s])
1290
1291 def _parse_sig_swf(self, file_contents):
54256267 1292 swfi = SWFInterpreter(file_contents)
78caa52a 1293 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1294 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1295 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1296 return lambda s: initial_function([s])
1297
83799698 1298 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1299 """Turn the encrypted s field into a working signature"""
6b37f0be 1300
c8bf86d5 1301 if player_url is None:
69ea8ca4 1302 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1303
69ea8ca4 1304 if player_url.startswith('//'):
78caa52a 1305 player_url = 'https:' + player_url
3c90cc8b
S
1306 elif not re.match(r'https?://', player_url):
1307 player_url = compat_urlparse.urljoin(
1308 'https://www.youtube.com', player_url)
c8bf86d5 1309 try:
62af3a0e 1310 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1311 if player_id not in self._player_cache:
1312 func = self._extract_signature_function(
60064c53 1313 video_id, player_url, s
c8bf86d5
PH
1314 )
1315 self._player_cache[player_id] = func
1316 func = self._player_cache[player_id]
1317 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1318 self._print_sig_code(func, s)
c8bf86d5
PH
1319 return func(s)
1320 except Exception as e:
1321 tb = traceback.format_exc()
1322 raise ExtractorError(
78caa52a 1323 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1324
f96f5dda 1325 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1326 try:
60e47a26 1327 subs_doc = self._download_xml(
38c2e5b8 1328 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1329 video_id, note=False)
1330 except ExtractorError as err:
9b9c5355 1331 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1332 return {}
de7f3446
JMF
1333
1334 sub_lang_list = {}
60e47a26
JMF
1335 for track in subs_doc.findall('track'):
1336 lang = track.attrib['lang_code']
7e660ac1
LD
1337 if lang in sub_lang_list:
1338 continue
360e1ca5 1339 sub_formats = []
23d17e4b 1340 for ext in self._SUBTITLE_FORMATS:
15707c7e 1341 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1342 'lang': lang,
1343 'v': video_id,
1344 'fmt': ext,
1345 'name': track.attrib['name'].encode('utf-8'),
1346 })
1347 sub_formats.append({
1348 'url': 'https://www.youtube.com/api/timedtext?' + params,
1349 'ext': ext,
1350 })
1351 sub_lang_list[lang] = sub_formats
9f448fcb 1352 if has_live_chat_replay:
321bf820 1353 sub_lang_list['live_chat'] = [
1354 {
1355 'video_id': video_id,
1356 'ext': 'json',
1357 'protocol': 'youtube_live_chat_replay',
1358 },
9f448fcb 1359 ]
de7f3446 1360 if not sub_lang_list:
69ea8ca4 1361 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1362 return {}
1363 return sub_lang_list
1364
a72778d3
S
1365 def _get_ytplayer_config(self, video_id, webpage):
1366 patterns = (
526b3b07
S
1367 # User data may contain arbitrary character sequences that may affect
1368 # JSON extraction with regex, e.g. when '};' is contained the second
1369 # regex won't capture the whole JSON. Yet working around by trying more
1370 # concrete regex first keeping in mind proper quoted string handling
1371 # to be implemented in future that will replace this workaround (see
067aa17e
S
1372 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1373 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1374 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1375 r';ytplayer\.config\s*=\s*({.+?});',
1376 )
1377 config = self._search_regex(
1378 patterns, webpage, 'ytplayer.config', default=None)
1379 if config:
1380 return self._parse_json(
1381 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1382
29f7c58a 1383 def _get_automatic_captions(self, video_id, player_response, player_config):
de7f3446
JMF
1384 """We need the webpage for getting the captions url, pass it as an
1385 argument to speed up the process."""
69ea8ca4 1386 self.to_screen('%s: Looking for automatic captions' % video_id)
78caa52a 1387 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
29f7c58a 1388 if not (player_response or player_config):
de7f3446
JMF
1389 self._downloader.report_warning(err_msg)
1390 return {}
de7f3446 1391 try:
29f7c58a 1392 args = player_config.get('args') if player_config else {}
8bdd16b4 1393 caption_url = args.get('ttsurl')
1394 if caption_url:
b78b292f
S
1395 timestamp = args['timestamp']
1396 # We get the available subtitles
15707c7e 1397 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1398 'type': 'list',
1399 'tlangs': 1,
1400 'asrs': 1,
1401 })
1402 list_url = caption_url + '&' + list_params
1403 caption_list = self._download_xml(list_url, video_id)
1404 original_lang_node = caption_list.find('track')
1405 if original_lang_node is None:
1406 self._downloader.report_warning('Video doesn\'t have automatic captions')
1407 return {}
1408 original_lang = original_lang_node.attrib['lang_code']
1409 caption_kind = original_lang_node.attrib.get('kind', '')
1410
1411 sub_lang_list = {}
1412 for lang_node in caption_list.findall('target'):
1413 sub_lang = lang_node.attrib['lang_code']
1414 sub_formats = []
1415 for ext in self._SUBTITLE_FORMATS:
15707c7e 1416 params = compat_urllib_parse_urlencode({
b78b292f
S
1417 'lang': original_lang,
1418 'tlang': sub_lang,
1419 'fmt': ext,
1420 'ts': timestamp,
1421 'kind': caption_kind,
1422 })
1423 sub_formats.append({
1424 'url': caption_url + '&' + params,
1425 'ext': ext,
1426 })
1427 sub_lang_list[sub_lang] = sub_formats
1428 return sub_lang_list
1429
ddbb4c5c
S
1430 def make_captions(sub_url, sub_langs):
1431 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1432 caption_qs = compat_parse_qs(parsed_sub_url.query)
1433 captions = {}
1434 for sub_lang in sub_langs:
1435 sub_formats = []
1436 for ext in self._SUBTITLE_FORMATS:
1437 caption_qs.update({
1438 'tlang': [sub_lang],
1439 'fmt': [ext],
1440 })
1441 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1442 query=compat_urllib_parse_urlencode(caption_qs, True)))
1443 sub_formats.append({
1444 'url': sub_url,
1445 'ext': ext,
1446 })
1447 captions[sub_lang] = sub_formats
1448 return captions
1449
1450 # New captions format as of 22.06.2017
29f7c58a 1451 if player_response:
1452 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1453 base_url = renderer['captionTracks'][0]['baseUrl']
1454 sub_lang_list = []
1455 for lang in renderer['translationLanguages']:
1456 lang_code = lang.get('languageCode')
1457 if lang_code:
1458 sub_lang_list.append(lang_code)
1459 return make_captions(base_url, sub_lang_list)
59c5fa91 1460
8bdd16b4 1461 # Some videos don't provide ttsurl but rather caption_tracks and
1462 # caption_translation_languages (e.g. 20LmZk1hakA)
1463 # Does not used anymore as of 22.06.2017
1464 caption_tracks = args['caption_tracks']
1465 caption_translation_languages = args['caption_translation_languages']
1466 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1467 sub_lang_list = []
1468 for lang in caption_translation_languages.split(','):
1469 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1470 sub_lang = lang_qs.get('lc', [None])[0]
1471 if sub_lang:
1472 sub_lang_list.append(sub_lang)
1473 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1474 # An extractor error can be raise by the download process if there are
1475 # no automatic captions but there are subtitles
ddbb4c5c 1476 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1477 self._downloader.report_warning(err_msg)
1478 return {}
1479
21c340b8
S
1480 def _mark_watched(self, video_id, video_info, player_response):
1481 playback_url = url_or_none(try_get(
1482 player_response,
1483 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1484 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1485 if not playback_url:
1486 return
1487 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1488 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1489
1490 # cpn generation algorithm is reverse engineered from base.js.
1491 # In fact it works even with dummy cpn.
1492 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1493 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1494
1495 qs.update({
1496 'ver': ['2'],
1497 'cpn': [cpn],
1498 })
1499 playback_url = compat_urlparse.urlunparse(
15707c7e 1500 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1501
1502 self._download_webpage(
1503 playback_url, video_id, 'Marking watched',
1504 'Unable to mark watched', fatal=False)
1505
66c9fa36
S
1506 @staticmethod
1507 def _extract_urls(webpage):
1508 # Embedded YouTube player
1509 entries = [
1510 unescapeHTML(mobj.group('url'))
1511 for mobj in re.finditer(r'''(?x)
1512 (?:
1513 <iframe[^>]+?src=|
1514 data-video-url=|
1515 <embed[^>]+?src=|
1516 embedSWF\(?:\s*|
1517 <object[^>]+data=|
1518 new\s+SWFObject\(
1519 )
1520 (["\'])
1521 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1522 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1523 \1''', webpage)]
1524
1525 # lazyYT YouTube embed
1526 entries.extend(list(map(
1527 unescapeHTML,
1528 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1529
1530 # Wordpress "YouTube Video Importer" plugin
1531 matches = re.findall(r'''(?x)<div[^>]+
1532 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1533 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1534 entries.extend(m[-1] for m in matches)
1535
1536 return entries
1537
1538 @staticmethod
1539 def _extract_url(webpage):
1540 urls = YoutubeIE._extract_urls(webpage)
1541 return urls[0] if urls else None
1542
97665381
PH
1543 @classmethod
1544 def extract_id(cls, url):
1545 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1546 if mobj is None:
69ea8ca4 1547 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1548 video_id = mobj.group(2)
1549 return video_id
1550
84213ea8
S
1551 def _extract_chapters_from_json(self, webpage, video_id, duration):
1552 if not webpage:
1553 return
8bdd16b4 1554 data = self._extract_yt_initial_data(video_id, webpage)
1555 if not data or not isinstance(data, dict):
84213ea8
S
1556 return
1557 chapters_list = try_get(
8bdd16b4 1558 data,
84213ea8
S
1559 lambda x: x['playerOverlays']
1560 ['playerOverlayRenderer']
1561 ['decoratedPlayerBarRenderer']
1562 ['decoratedPlayerBarRenderer']
1563 ['playerBar']
1564 ['chapteredPlayerBarRenderer']
1565 ['chapters'],
1566 list)
1567 if not chapters_list:
1568 return
1569
1570 def chapter_time(chapter):
1571 return float_or_none(
1572 try_get(
1573 chapter,
1574 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1575 int),
1576 scale=1000)
1577 chapters = []
1578 for next_num, chapter in enumerate(chapters_list, start=1):
1579 start_time = chapter_time(chapter)
1580 if start_time is None:
1581 continue
1582 end_time = (chapter_time(chapters_list[next_num])
1583 if next_num < len(chapters_list) else duration)
1584 if end_time is None:
1585 continue
1586 title = try_get(
1587 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1588 compat_str)
1589 chapters.append({
1590 'start_time': start_time,
1591 'end_time': end_time,
1592 'title': title,
1593 })
1594 return chapters
1595
9cafc3fd 1596 @staticmethod
84213ea8 1597 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1598 if not description:
1599 return None
1600 chapter_lines = re.findall(
1601 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1602 description)
1603 if not chapter_lines:
1604 return None
1605 chapters = []
1606 for next_num, (chapter_line, time_point) in enumerate(
1607 chapter_lines, start=1):
1608 start_time = parse_duration(time_point)
1609 if start_time is None:
1610 continue
39d4c1be
S
1611 if start_time > duration:
1612 break
9cafc3fd
S
1613 end_time = (duration if next_num == len(chapter_lines)
1614 else parse_duration(chapter_lines[next_num][1]))
1615 if end_time is None:
1616 continue
39d4c1be
S
1617 if end_time > duration:
1618 end_time = duration
1619 if start_time > end_time:
1620 break
9cafc3fd
S
1621 chapter_title = re.sub(
1622 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1623 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1624 chapters.append({
1625 'start_time': start_time,
1626 'end_time': end_time,
1627 'title': chapter_title,
1628 })
1629 return chapters
1630
84213ea8
S
1631 def _extract_chapters(self, webpage, description, video_id, duration):
1632 return (self._extract_chapters_from_json(webpage, video_id, duration)
1633 or self._extract_chapters_from_description(description, duration))
1634
c5e8d7af 1635 def _real_extract(self, url):
cf7e015f
S
1636 url, smuggled_data = unsmuggle_url(url, {})
1637
7e8c0af0 1638 proto = (
78caa52a
PH
1639 'http' if self._downloader.params.get('prefer_insecure', False)
1640 else 'https')
7e8c0af0 1641
7c80519c 1642 start_time = None
297a564b 1643 end_time = None
7c80519c
JMF
1644 parsed_url = compat_urllib_parse_urlparse(url)
1645 for component in [parsed_url.fragment, parsed_url.query]:
1646 query = compat_parse_qs(component)
297a564b 1647 if start_time is None and 't' in query:
7c80519c 1648 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1649 if start_time is None and 'start' in query:
1650 start_time = parse_duration(query['start'][0])
297a564b
JMF
1651 if end_time is None and 'end' in query:
1652 end_time = parse_duration(query['end'][0])
7c80519c 1653
c5e8d7af
PH
1654 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1655 mobj = re.search(self._NEXT_URL_RE, url)
1656 if mobj:
7fd002c0 1657 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1658 video_id = self.extract_id(url)
c5e8d7af
PH
1659
1660 # Get video webpage
aa79ac0c 1661 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1662 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1663
1664 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1665 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1666
1667 # Attempt to extract SWF player URL
e0df6211 1668 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1669 if mobj is not None:
1670 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1671 else:
1672 player_url = None
1673
d8d24a92
S
1674 dash_mpds = []
1675
1676 def add_dash_mpd(video_info):
1677 dash_mpd = video_info.get('dashmpd')
1678 if dash_mpd and dash_mpd[0] not in dash_mpds:
1679 dash_mpds.append(dash_mpd[0])
1680
561b456e
S
1681 def add_dash_mpd_pr(pl_response):
1682 dash_mpd = url_or_none(try_get(
1683 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1684 compat_str))
1685 if dash_mpd and dash_mpd not in dash_mpds:
1686 dash_mpds.append(dash_mpd)
1687
c7121fa7
S
1688 is_live = None
1689 view_count = None
1690
1691 def extract_view_count(v_info):
1692 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1693
c2d125d9
S
1694 def extract_player_response(player_response, video_id):
1695 pl_response = str_or_none(player_response)
1696 if not pl_response:
1697 return
1698 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1699 if isinstance(pl_response, dict):
1700 add_dash_mpd_pr(pl_response)
1701 return pl_response
1702
fb2c9277
U
1703 def extract_embedded_config(embed_webpage, video_id):
1704 embedded_config = self._search_regex(
1705 r'setConfig\(({.*})\);',
1706 embed_webpage, 'ytInitialData', default=None)
1707 if embedded_config:
1708 return embedded_config
1709
62d80ba1 1710 video_info = {}
dbdaaa23 1711 player_response = {}
62d80ba1 1712 ytplayer_config = None
1713 embed_webpage = None
dbdaaa23 1714
c5e8d7af 1715 # Get video info
39e7107d
U
1716 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1717 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1718 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1719 age_gate = True
1720 # We simulate the access to the video from www.youtube.com/v/{video_id}
1721 # this can be viewed without login into Youtube
beb95e77
CL
1722 url = proto + '://www.youtube.com/embed/%s' % video_id
1723 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
fb2c9277
U
1724 ext = extract_embedded_config(embed_webpage, video_id)
1725 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1726 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1727 if not playable_in_embed:
1728 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1729 playable_in_embed = ''
1730 else:
1731 playable_in_embed = playable_in_embed.group('playableinEmbed')
1732 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1733 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1734 if playable_in_embed == 'false':
c73baf23
U
1735 '''
1736 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1737 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1738 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1739 '''
1740 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1741 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1742 age_gate = False
1743 # Try looking directly into the video webpage
1744 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1745 if ytplayer_config:
59c5fa91
PO
1746 args = ytplayer_config.get("args")
1747 if args is not None:
1748 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1749 # Convert to the same format returned by compat_parse_qs
1750 video_info = dict((k, [v]) for k, v in args.items())
1751 add_dash_mpd(video_info)
1752 # Rental video is not rented but preview is available (e.g.
1753 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1754 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1755 if not video_info and args.get('ypc_vid'):
1756 return self.url_result(
1757 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1758 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1759 is_live = True
1760 if not player_response:
1761 player_response = extract_player_response(args.get('player_response'), video_id)
1762 elif not player_response:
1763 player_response = ytplayer_config
4bb9c880
U
1764 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1765 add_dash_mpd_pr(player_response)
9d9314cb
U
1766 else:
1767 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1768 else:
1769 data = compat_urllib_parse_urlencode({
1770 'video_id': video_id,
1771 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1772 'sts': self._search_regex(
1773 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1774 })
1775 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1776 try:
1777 video_info_webpage = self._download_webpage(
1778 video_info_url, video_id,
1779 note='Refetching age-gated info webpage',
1780 errnote='unable to download video info webpage')
1781 except ExtractorError:
1782 video_info_webpage = None
1783 if video_info_webpage:
1784 video_info = compat_parse_qs(video_info_webpage)
1785 pl_response = video_info.get('player_response', [None])[0]
1786 player_response = extract_player_response(pl_response, video_id)
1787 add_dash_mpd(video_info)
1788 view_count = extract_view_count(video_info)
c108eb73
JMF
1789 else:
1790 age_gate = False
d8d24a92 1791 # Try looking directly into the video webpage
a72778d3 1792 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
8bdd16b4 1793 if ytplayer_config:
1794 args = ytplayer_config.get('args', {})
4c76aa06 1795 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1796 # Convert to the same format returned by compat_parse_qs
1797 video_info = dict((k, [v]) for k, v in args.items())
1798 add_dash_mpd(video_info)
6496ccb4
S
1799 # Rental video is not rented but preview is available (e.g.
1800 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1801 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1802 if not video_info and args.get('ypc_vid'):
1803 return self.url_result(
1804 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1805 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1806 is_live = True
dbdaaa23 1807 if not player_response:
c2d125d9 1808 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1809 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1810 add_dash_mpd_pr(player_response)
bbb7c3f7 1811
8bdd16b4 1812 if not video_info and not player_response:
1813 player_response = extract_player_response(
1814 self._search_regex(
29f7c58a 1815 (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1816 self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
8bdd16b4 1817 'initial player response', default='{}'),
1818 video_id)
1819
bbb7c3f7 1820 def extract_unavailable_message():
0add33ab
S
1821 messages = []
1822 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1823 msg = self._html_search_regex(
1824 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1825 video_webpage, 'unavailable %s' % kind, default=None)
1826 if msg:
1827 messages.append(msg)
1828 if messages:
1829 return '\n'.join(messages)
bbb7c3f7 1830
f93abcf1 1831 if not video_info and not player_response:
15be3eb5
RA
1832 unavailable_message = extract_unavailable_message()
1833 if not unavailable_message:
1834 unavailable_message = 'Unable to extract video data'
1835 raise ExtractorError(
1836 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1837
f93abcf1
S
1838 if not isinstance(video_info, dict):
1839 video_info = {}
1840
5ac23244 1841 playable_in_embed = try_get(
1842 player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1843
dbdaaa23
S
1844 video_details = try_get(
1845 player_response, lambda x: x['videoDetails'], dict) or {}
1846
37357d21
S
1847 microformat = try_get(
1848 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1849
8dbf751a
RA
1850 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1851 if not video_title:
cf7e015f
S
1852 self._downloader.report_warning('Unable to extract video title')
1853 video_title = '_'
1854
9cafc3fd 1855 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1856 if video_description:
fa4bc6e7
RA
1857
1858 def replace_url(m):
1859 redir_url = compat_urlparse.urljoin(url, m.group(1))
1860 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1861 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1862 qs = compat_parse_qs(parsed_redir_url.query)
1863 q = qs.get('q')
1864 if q and q[0]:
1865 return q[0]
1866 return redir_url
1867
9cafc3fd 1868 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1869 <a\s+
25cb7a0e 1870 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1871 (?:title|href)="([^"]+)"\s+
25cb7a0e 1872 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1873 class="[^"]*"[^>]*>
23f13e97 1874 [^<]+\.{3}\s*
cf7e015f 1875 </a>
fa4bc6e7 1876 ''', replace_url, video_description)
cf7e015f
S
1877 video_description = clean_html(video_description)
1878 else:
ea74e00b
DP
1879 video_description = video_details.get('shortDescription')
1880 if video_description is None:
1881 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 1882
8fe10494 1883 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1884 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1885 multifeed_metadata_list = try_get(
1886 player_response,
1887 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1888 compat_str) or try_get(
1889 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1890 if multifeed_metadata_list:
1891 entries = []
1892 feed_ids = []
1893 for feed in multifeed_metadata_list.split(','):
1894 # Unquote should take place before split on comma (,) since textual
1895 # fields may contain comma as well (see
067aa17e 1896 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1897 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1898
1899 def feed_entry(name):
1900 return try_get(feed_data, lambda x: x[name][0], compat_str)
1901
1902 feed_id = feed_entry('id')
1903 if not feed_id:
1904 continue
1905 feed_title = feed_entry('title')
1906 title = video_title
1907 if feed_title:
1908 title += ' (%s)' % feed_title
8fe10494
S
1909 entries.append({
1910 '_type': 'url_transparent',
1911 'ie_key': 'Youtube',
1912 'url': smuggle_url(
1913 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1914 {'force_singlefeed': True}),
6b09401b 1915 'title': title,
8fe10494 1916 })
6b09401b 1917 feed_ids.append(feed_id)
8fe10494
S
1918 self.to_screen(
1919 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1920 % (', '.join(feed_ids), video_id))
1921 return self.playlist_result(entries, video_id, video_title, video_description)
1922 else:
1923 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1924
c7121fa7 1925 if view_count is None:
1c9c8de2 1926 view_count = extract_view_count(video_info)
dbdaaa23
S
1927 if view_count is None and video_details:
1928 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
1929 if view_count is None and microformat:
1930 view_count = int_or_none(microformat.get('viewCount'))
1d699755 1931
27019dbb 1932 if is_live is None:
898238e9 1933 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1934
321bf820 1935 has_live_chat_replay = False
f0f76a33 1936 if not is_live:
82e3f6eb 1937 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
321bf820 1938 try:
1939 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1940 has_live_chat_replay = True
f0f76a33 1941 except (KeyError, IndexError, TypeError):
321bf820 1942 pass
1943
c5e8d7af
PH
1944 # Check for "rental" videos
1945 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1946 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1947
c63ca0ee
S
1948 def _extract_filesize(media_url):
1949 return int_or_none(self._search_regex(
1950 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1951
bf1317d2
S
1952 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1953 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1954
c5e8d7af
PH
1955 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1956 self.report_rtmp_download()
dd27fd17
PH
1957 formats = [{
1958 'format_id': '_rtmp',
1959 'protocol': 'rtmp',
1960 'url': video_info['conn'][0],
1961 'player_url': player_url,
1962 }]
bf1317d2 1963 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1964 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1965 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1966 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1967 formats = []
3318832e 1968 formats_spec = {}
82156fdb 1969 fmt_list = video_info.get('fmt_list', [''])[0]
1970 if fmt_list:
1971 for fmt in fmt_list.split(','):
1972 spec = fmt.split('/')
3318832e 1973 if len(spec) > 1:
1974 width_height = spec[1].split('x')
1975 if len(width_height) == 2:
1976 formats_spec[spec[0]] = {
1977 'resolution': spec[1],
1978 'width': int_or_none(width_height[0]),
1979 'height': int_or_none(width_height[1]),
1980 }
bf1317d2
S
1981 for fmt in streaming_formats:
1982 itag = str_or_none(fmt.get('itag'))
1983 if not itag:
201e9eaa 1984 continue
bf1317d2
S
1985 quality = fmt.get('quality')
1986 quality_label = fmt.get('qualityLabel') or quality
1987 formats_spec[itag] = {
1988 'asr': int_or_none(fmt.get('audioSampleRate')),
1989 'filesize': int_or_none(fmt.get('contentLength')),
1990 'format_note': quality_label,
1991 'fps': int_or_none(fmt.get('fps')),
1992 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1993 # bitrate for itag 43 is always 2147483647
1994 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1995 'width': int_or_none(fmt.get('width')),
1996 }
1997
1998 for fmt in streaming_formats:
00eb865b 1999 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2000 continue
2001 url = url_or_none(fmt.get('url'))
2002
2003 if not url:
fa3db383 2004 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2005 if not cipher:
2006 continue
2007 url_data = compat_parse_qs(cipher)
2008 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2009 if not url:
2010 continue
2011 else:
2012 cipher = None
2013 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2014
2f483bc1
S
2015 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2016 # Unsupported FORMAT_STREAM_TYPE_OTF
2017 if stream_type == 3:
2018 continue
6449cd80 2019
bf1317d2
S
2020 format_id = fmt.get('itag') or url_data['itag'][0]
2021 if not format_id:
2022 continue
2023 format_id = compat_str(format_id)
a49eccdf 2024
bf1317d2
S
2025 if cipher:
2026 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
8bdd16b4 2027 ASSETS_RE = (
2028 r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2029 r'"jsUrl"\s*:\s*("[^"]+")',
2030 r'"assets":.+?"js":\s*("[^"]+")')
bf1317d2
S
2031 jsplayer_url_json = self._search_regex(
2032 ASSETS_RE,
2033 embed_webpage if age_gate else video_webpage,
2034 'JS player URL (1)', default=None)
2035 if not jsplayer_url_json and not age_gate:
2036 # We need the embed website after all
2037 if embed_webpage is None:
2038 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2039 embed_webpage = self._download_webpage(
2040 embed_url, video_id, 'Downloading embed webpage')
2041 jsplayer_url_json = self._search_regex(
2042 ASSETS_RE, embed_webpage, 'JS player URL')
2043
2044 player_url = json.loads(jsplayer_url_json)
cf010131 2045 if player_url is None:
bf1317d2
S
2046 player_url_json = self._search_regex(
2047 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2048 video_webpage, 'age gate player URL')
2049 player_url = json.loads(player_url_json)
2050
2051 if 'sig' in url_data:
2052 url += '&signature=' + url_data['sig'][0]
2053 elif 's' in url_data:
2054 encrypted_sig = url_data['s'][0]
2055
2056 if self._downloader.params.get('verbose'):
2057 if player_url is None:
bf1317d2 2058 player_desc = 'unknown'
cf010131 2059 else:
e40c758c
S
2060 player_type, player_version = self._extract_player_info(player_url)
2061 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2062 parts_sizes = self._signature_cache_id(encrypted_sig)
2063 self.to_screen('{%s} signature length %s, %s' %
2064 (format_id, parts_sizes, player_desc))
2065
2066 signature = self._decrypt_signature(
2067 encrypted_sig, video_id, player_url, age_gate)
2068 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2069 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2070 if 'ratebypass' not in url:
2071 url += '&ratebypass=yes'
c9afb51c 2072
94278f72
YCH
2073 dct = {
2074 'format_id': format_id,
2075 'url': url,
2076 'player_url': player_url,
2077 }
2078 if format_id in self._formats:
2079 dct.update(self._formats[format_id])
3318832e 2080 if format_id in formats_spec:
2081 dct.update(formats_spec[format_id])
94278f72 2082
aabc2be6 2083 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2084 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2085 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2086 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2087 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2088
bf1317d2
S
2089 if width is None:
2090 width = int_or_none(fmt.get('width'))
2091 if height is None:
2092 height = int_or_none(fmt.get('height'))
2093
c63ca0ee
S
2094 filesize = int_or_none(url_data.get(
2095 'clen', [None])[0]) or _extract_filesize(url)
2096
bf1317d2
S
2097 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2098 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2099
4878759f
S
2100 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2101 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2102 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2103
94278f72 2104 more_fields = {
c63ca0ee 2105 'filesize': filesize,
bf1317d2 2106 'tbr': tbr,
c9afb51c
AH
2107 'width': width,
2108 'height': height,
bf1317d2
S
2109 'fps': fps,
2110 'format_note': quality_label or quality,
c9afb51c 2111 }
94278f72
YCH
2112 for key, value in more_fields.items():
2113 if value:
2114 dct[key] = value
bf1317d2 2115 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2116 if type_:
2117 type_split = type_.split(';')
2118 kind_ext = type_split[0].split('/')
2119 if len(kind_ext) == 2:
94278f72
YCH
2120 kind, _ = kind_ext
2121 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2122 if kind in ('audio', 'video'):
2123 codecs = None
2124 for mobj in re.finditer(
2125 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2126 if mobj.group('key') == 'codecs':
2127 codecs = mobj.group('val')
2128 break
2129 if codecs:
6310acf5 2130 dct.update(parse_codecs(codecs))
e4a60912
S
2131 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2132 dct['downloader_options'] = {
2133 # Youtube throttles chunks >~10M
2134 'http_chunk_size': 10485760,
2135 }
aabc2be6 2136 formats.append(dct)
c5e8d7af 2137 else:
c3e54389
S
2138 manifest_url = (
2139 url_or_none(try_get(
2140 player_response,
2141 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2142 compat_str))
2143 or url_or_none(try_get(
c3e54389
S
2144 video_info, lambda x: x['hlsvp'][0], compat_str)))
2145 if manifest_url:
2146 formats = []
2147 m3u8_formats = self._extract_m3u8_formats(
2148 manifest_url, video_id, 'mp4', fatal=False)
2149 for a_format in m3u8_formats:
2150 itag = self._search_regex(
2151 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2152 if itag:
2153 a_format['format_id'] = itag
2154 if itag in self._formats:
2155 dct = self._formats[itag].copy()
2156 dct.update(a_format)
2157 a_format = dct
2158 a_format['player_url'] = player_url
2159 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2160 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2161 if self._downloader.params.get('youtube_include_hls_manifest', True):
2162 formats.append(a_format)
c3e54389 2163 else:
13577349 2164 error_message = extract_unavailable_message()
a0566bbf 2165 if not error_message:
2166 reason_list = try_get(
2167 player_response,
2168 lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2169 list) or []
2170 for reason in reason_list:
2171 if not isinstance(reason, dict):
2172 continue
2173 reason_text = try_get(reason, lambda x: x['text'], compat_str)
2174 if reason_text:
2175 if not error_message:
2176 error_message = ''
2177 error_message += reason_text
2178 if error_message:
2179 error_message = clean_html(error_message)
c3e54389 2180 if not error_message:
13577349
S
2181 error_message = clean_html(try_get(
2182 player_response, lambda x: x['playabilityStatus']['reason'],
2183 compat_str))
2184 if not error_message:
2185 error_message = clean_html(
2186 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2187 if error_message:
2188 raise ExtractorError(error_message, expected=True)
2189 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2190
7e72694b 2191 # uploader
dbdaaa23
S
2192 video_uploader = try_get(
2193 video_info, lambda x: x['author'][0],
2194 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2195 if video_uploader:
2196 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2197 else:
2198 self._downloader.report_warning('unable to extract uploader name')
2199
2200 # uploader_id
2201 video_uploader_id = None
2202 video_uploader_url = None
2203 mobj = re.search(
2204 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2205 video_webpage)
2206 if mobj is not None:
2207 video_uploader_id = mobj.group('uploader_id')
2208 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2209 else:
2210 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2211 if owner_profile_url:
2212 video_uploader_id = self._search_regex(
2213 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2214 default=None)
2215 video_uploader_url = owner_profile_url
7e72694b 2216
b45a9e69 2217 channel_id = (
3089bc74
S
2218 str_or_none(video_details.get('channelId'))
2219 or self._html_search_meta(
2220 'channelId', video_webpage, 'channel id', default=None)
2221 or self._search_regex(
b45a9e69 2222 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2223 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2224 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2225
b477fc13
S
2226 thumbnails = []
2227 thumbnails_list = try_get(
2228 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2229 for t in thumbnails_list:
2230 if not isinstance(t, dict):
2231 continue
2232 thumbnail_url = url_or_none(t.get('url'))
2233 if not thumbnail_url:
2234 continue
2235 thumbnails.append({
2236 'url': thumbnail_url,
2237 'width': int_or_none(t.get('width')),
2238 'height': int_or_none(t.get('height')),
2239 })
2240
2241 if not thumbnails:
7e72694b 2242 video_thumbnail = None
b477fc13
S
2243 # We try first to get a high quality image:
2244 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2245 video_webpage, re.DOTALL)
2246 if m_thumb is not None:
2247 video_thumbnail = m_thumb.group(1)
2248 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2249 if thumbnail_url:
2250 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2251 if video_thumbnail:
2252 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2253
2254 # upload date
2255 upload_date = self._html_search_meta(
2256 'datePublished', video_webpage, 'upload date', default=None)
2257 if not upload_date:
2258 upload_date = self._search_regex(
2259 [r'(?s)id="eow-date.*?>(.*?)</span>',
2260 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2261 video_webpage, 'upload date', default=None)
37357d21
S
2262 if not upload_date:
2263 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2264 upload_date = unified_strdate(upload_date)
2265
2266 video_license = self._html_search_regex(
2267 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2268 video_webpage, 'license', default=None)
2269
2270 m_music = re.search(
2271 r'''(?x)
2272 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2273 <ul[^>]*>\s*
2274 <li>(?P<title>.+?)
2275 by (?P<creator>.+?)
2276 (?:
2277 \(.+?\)|
2278 <a[^>]*
2279 (?:
2280 \bhref=["\']/red[^>]*>| # drop possible
2281 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2282 )
2283 .*?
2284 )?</li
2285 ''',
2286 video_webpage)
2287 if m_music:
2288 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2289 video_creator = clean_html(m_music.group('creator'))
2290 else:
2291 video_alt_title = video_creator = None
2292
2293 def extract_meta(field):
2294 return self._html_search_regex(
2295 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2296 video_webpage, field, default=None)
2297
2298 track = extract_meta('Song')
2299 artist = extract_meta('Artist')
92bc97d3 2300 album = extract_meta('Album')
822b9d9c
RA
2301
2302 # Youtube Music Auto-generated description
92bc97d3 2303 release_date = release_year = None
822b9d9c 2304 if video_description:
38d70284 2305 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c
RA
2306 if mobj:
2307 if not track:
2308 track = mobj.group('track').strip()
2309 if not artist:
2310 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2311 if not album:
2312 album = mobj.group('album'.strip())
822b9d9c
RA
2313 release_year = mobj.group('release_year')
2314 release_date = mobj.group('release_date')
2315 if release_date:
2316 release_date = release_date.replace('-', '')
2317 if not release_year:
2318 release_year = int(release_date[:4])
2319 if release_year:
2320 release_year = int(release_year)
7e72694b 2321
38d70284 2322 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2323 contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2324 for content in contents:
2325 rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2326 multiple_songs = False
2327 for row in rows:
2328 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2329 multiple_songs = True
2330 break
2331 for row in rows:
2332 mrr = row.get('metadataRowRenderer') or {}
2333 mrr_title = try_get(
2334 mrr, lambda x: x['title']['simpleText'], compat_str)
2335 mrr_contents = try_get(
2336 mrr, lambda x: x['contents'][0], dict) or {}
2337 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2338 if not (mrr_title and mrr_contents_text):
2339 continue
2340 if mrr_title == 'License':
2341 video_license = mrr_contents_text
2342 elif not multiple_songs:
2343 if mrr_title == 'Album':
2344 album = mrr_contents_text
2345 elif mrr_title == 'Artist':
2346 artist = mrr_contents_text
2347 elif mrr_title == 'Song':
2348 track = mrr_contents_text
9322f116 2349
7e72694b
S
2350 m_episode = re.search(
2351 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2352 video_webpage)
2353 if m_episode:
c2dd2dc0 2354 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2355 season_number = int(m_episode.group('season'))
2356 episode_number = int(m_episode.group('episode'))
2357 else:
2358 series = season_number = episode_number = None
2359
2360 m_cat_container = self._search_regex(
2361 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2362 video_webpage, 'categories', default=None)
dbeafce5 2363 category = None
7e72694b
S
2364 if m_cat_container:
2365 category = self._html_search_regex(
2366 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2367 default=None)
dbeafce5
S
2368 if not category:
2369 category = try_get(
2370 microformat, lambda x: x['category'], compat_str)
2371 video_categories = None if category is None else [category]
7e72694b
S
2372
2373 video_tags = [
2374 unescapeHTML(m.group('content'))
2375 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2376 if not video_tags:
2377 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2378
2379 def _extract_count(count_name):
2380 return str_to_int(self._search_regex(
a0566bbf 2381 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2382 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
7e72694b
S
2383 video_webpage, count_name, default=None))
2384
2385 like_count = _extract_count('like')
2386 dislike_count = _extract_count('dislike')
2387
dbdaaa23
S
2388 if view_count is None:
2389 view_count = str_to_int(self._search_regex(
2390 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2391 'view count', default=None))
2392
bf3c9326
S
2393 average_rating = (
2394 float_or_none(video_details.get('averageRating'))
2395 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2396
7e72694b 2397 # subtitles
321bf820 2398 video_subtitles = self.extract_subtitles(
2399 video_id, video_webpage, has_live_chat_replay)
29f7c58a 2400 automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
7e72694b
S
2401
2402 video_duration = try_get(
2403 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2404 if not video_duration:
2405 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2406 if not video_duration:
2407 video_duration = parse_duration(self._html_search_meta(
2408 'duration', video_webpage, 'video duration'))
2409
b84071c0
JP
2410 # Get Subscriber Count of channel
2411 subscriber_count = parse_count(self._search_regex(
2412 r'"text":"([\d\.]+\w?) subscribers"',
2413 video_webpage,
2414 'subscriber count',
2415 default=None
2416 ))
2417
06167fbb 2418 # get xsrf for annotations or comments
2419 get_annotations = self._downloader.params.get('writeannotations', False)
2420 get_comments = self._downloader.params.get('getcomments', False)
2421 if get_annotations or get_comments:
29f7c58a 2422 xsrf_token = None
2423 ytcfg = self._extract_ytcfg(video_id, video_webpage)
2424 if ytcfg:
2425 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2426 if not xsrf_token:
2427 xsrf_token = self._search_regex(
2428 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2429 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2430
2431 # annotations
2432 video_annotations = None
2433 if get_annotations:
64b6a4e9
RA
2434 invideo_url = try_get(
2435 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2436 if xsrf_token and invideo_url:
29f7c58a 2437 xsrf_field_name = None
2438 if ytcfg:
2439 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2440 if not xsrf_field_name:
2441 xsrf_field_name = self._search_regex(
2442 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2443 video_webpage, 'xsrf field name',
2444 group='xsrf_field_name', default='session_token')
64b6a4e9
RA
2445 video_annotations = self._download_webpage(
2446 self._proto_relative_url(invideo_url),
2447 video_id, note='Downloading annotations',
2448 errnote='Unable to download video annotations', fatal=False,
2449 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2450
84213ea8 2451 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2452
06167fbb 2453 # Get comments
2454 # TODO: Refactor and move to seperate function
2455 if get_comments:
2456 expected_video_comment_count = 0
2457 video_comments = []
2458
2459 def find_value(html, key, num_chars=2, separator='"'):
2460 pos_begin = html.find(key) + len(key) + num_chars
2461 pos_end = html.find(separator, pos_begin)
2462 return html[pos_begin: pos_end]
2463
2464 def search_dict(partial, key):
2465 if isinstance(partial, dict):
2466 for k, v in partial.items():
2467 if k == key:
2468 yield v
2469 else:
2470 for o in search_dict(v, key):
2471 yield o
2472 elif isinstance(partial, list):
2473 for i in partial:
2474 for o in search_dict(i, key):
2475 yield o
2476
2477 try:
2478 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
8d0ea5f9 2479 continuations = [ncd['continuation']]
06167fbb 2480 # Handle videos where comments have been disabled entirely
2481 except StopIteration:
2482 continuations = []
2483
8d0ea5f9 2484 def get_continuation(continuation, session_token, replies=False):
06167fbb 2485 query = {
66c935fb 2486 'pbj': 1,
2487 'ctoken': continuation,
06167fbb 2488 }
2489 if replies:
2490 query['action_get_comment_replies'] = 1
2491 else:
2492 query['action_get_comments'] = 1
2493
2494 while True:
2495 content, handle = self._download_webpage_handle(
2496 'https://www.youtube.com/comment_service_ajax',
2497 video_id,
2498 note=False,
2499 expected_status=[413],
2500 data=urlencode_postdata({
2501 'session_token': session_token
2502 }),
2503 query=query,
2504 headers={
2505 'Accept': '*/*',
2506 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2507 'X-YouTube-Client-Name': '1',
2508 'X-YouTube-Client-Version': '2.20201202.06.01'
2509 }
2510 )
2511
2512 response_code = handle.getcode()
2513 if (response_code == 200):
2514 return self._parse_json(content, video_id)
8d0ea5f9 2515 if (response_code == 413):
06167fbb 2516 return None
2517 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2518
2519 first_continuation = True
2520 while continuations:
2521 continuation, itct = continuations.pop()
8d0ea5f9 2522 comment_response = get_continuation(continuation, xsrf_token)
06167fbb 2523 if not comment_response:
2524 continue
2525 if list(search_dict(comment_response, 'externalErrorMessage')):
2526 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2527
8d0ea5f9
B
2528 if 'continuationContents' not in comment_response['response']:
2529 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2530 continue
2531 # not sure if this actually helps
2532 if 'xsrf_token' in comment_response:
2533 xsrf_token = comment_response['xsrf_token']
2534
06167fbb 2535 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2536 if first_continuation:
2537 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2538 first_continuation = False
2539 if 'contents' not in item_section:
2540 # continuation returned no comments?
2541 # set an empty array as to not break the for loop
2542 item_section['contents'] = []
2543
2544 for meta_comment in item_section['contents']:
2545 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2546 video_comments.append({
2547 'id': comment['commentId'],
2548 'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
8d0ea5f9 2549 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2550 'author': comment.get('authorText', {}).get('simpleText', ''),
2551 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2552 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2553 'parent': 'root'
2554 })
2555 if 'replies' not in meta_comment['commentThreadRenderer']:
2556 continue
2557
8d0ea5f9
B
2558 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2559 while reply_continuations:
06167fbb 2560 time.sleep(1)
8d0ea5f9
B
2561 continuation = reply_continuations.pop()
2562 replies_data = get_continuation(continuation, xsrf_token, True)
06167fbb 2563 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2564 continue
06167fbb 2565
2566 if self._downloader.params.get('verbose', False):
2567 self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
2568 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
2569 for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
2570 reply_comment = reply_meta['commentRenderer']
2571 video_comments.append({
2572 'id': reply_comment['commentId'],
2573 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2574 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2575 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2576 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2577 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2578 'parent': comment['commentId']
2579 })
2580 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2581 continue
06167fbb 2582
8d0ea5f9 2583 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2584
2585 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2586
2587 if 'continuations' in item_section:
8d0ea5f9 2588 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2589 time.sleep(1)
2590
2591 self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2592 else:
2593 expected_video_comment_count = None
2594 video_comments = None
2595
dd27fd17 2596 # Look for the DASH manifest
203fb43f 2597 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2598 dash_mpd_fatal = True
8ff648e4 2599 for mpd_url in dash_mpds:
d8d24a92 2600 dash_formats = {}
774e208f 2601 try:
05d0d131
YCH
2602 def decrypt_sig(mobj):
2603 s = mobj.group(1)
2604 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2605 return '/signature/%s' % dec_s
2606
8ff648e4 2607 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2608
8ff648e4 2609 for df in self._extract_mpd_formats(
2610 mpd_url, video_id, fatal=dash_mpd_fatal,
2611 formats_dict=self._formats):
c63ca0ee
S
2612 if not df.get('filesize'):
2613 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2614 # Do not overwrite DASH format found in some previous DASH manifest
2615 if df['format_id'] not in dash_formats:
2616 dash_formats[df['format_id']] = df
77c6fb5b
S
2617 # Additional DASH manifests may end up in HTTP Error 403 therefore
2618 # allow them to fail without bug report message if we already have
2619 # some DASH manifest succeeded. This is temporary workaround to reduce
2620 # burst of bug reports until we figure out the reason and whether it
2621 # can be fixed at all.
2622 dash_mpd_fatal = False
774e208f
PH
2623 except (ExtractorError, KeyError) as e:
2624 self.report_warning(
2625 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2626 if dash_formats:
04b3b3df
JMF
2627 # Remove the formats we found through non-DASH, they
2628 # contain less info and it can be wrong, because we use
2629 # fixed values (for example the resolution). See
067aa17e 2630 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2631 # example.
d80265cc 2632 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2633 formats.extend(dash_formats.values())
d80044c2 2634
6271f1ca
PH
2635 # Check for malformed aspect ratio
2636 stretched_m = re.search(
2637 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2638 video_webpage)
2639 if stretched_m:
313dfc45
LL
2640 w = float(stretched_m.group('w'))
2641 h = float(stretched_m.group('h'))
5faf9fed
S
2642 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2643 # We will only process correct ratios.
313dfc45 2644 if w > 0 and h > 0:
41f24c32 2645 ratio = w / h
313dfc45
LL
2646 for f in formats:
2647 if f.get('vcodec') != 'none':
2648 f['stretched_ratio'] = ratio
6271f1ca 2649
026fbedc 2650 if not formats:
43ebf77d
S
2651 if 'reason' in video_info:
2652 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2653 regions_allowed = self._html_search_meta(
2654 'regionsAllowed', video_webpage, default=None)
2655 countries = regions_allowed.split(',') if regions_allowed else None
2656 self.raise_geo_restricted(
2657 msg=video_info['reason'][0], countries=countries)
2658 reason = video_info['reason'][0]
2659 if 'Invalid parameters' in reason:
2660 unavailable_message = extract_unavailable_message()
2661 if unavailable_message:
2662 reason = unavailable_message
2663 raise ExtractorError(
2664 'YouTube said: %s' % reason,
2665 expected=True, video_id=video_id)
2666 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2667 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2668
4bcc7bd1 2669 self._sort_formats(formats)
4ea3be0a 2670
21c340b8 2671 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2672
4ea3be0a 2673 return {
8bcc8756
JW
2674 'id': video_id,
2675 'uploader': video_uploader,
2676 'uploader_id': video_uploader_id,
fd050249 2677 'uploader_url': video_uploader_url,
dd4c4492
S
2678 'channel_id': channel_id,
2679 'channel_url': channel_url,
8bcc8756 2680 'upload_date': upload_date,
7caf9830 2681 'license': video_license,
936784b2 2682 'creator': video_creator or artist,
8bcc8756 2683 'title': video_title,
936784b2 2684 'alt_title': video_alt_title or track,
b477fc13 2685 'thumbnails': thumbnails,
8bcc8756
JW
2686 'description': video_description,
2687 'categories': video_categories,
000b6b5a 2688 'tags': video_tags,
8bcc8756 2689 'subtitles': video_subtitles,
360e1ca5 2690 'automatic_captions': automatic_captions,
8bcc8756
JW
2691 'duration': video_duration,
2692 'age_limit': 18 if age_gate else 0,
2693 'annotations': video_annotations,
9cafc3fd 2694 'chapters': chapters,
7e8c0af0 2695 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2696 'view_count': view_count,
4ea3be0a 2697 'like_count': like_count,
2698 'dislike_count': dislike_count,
bf3c9326 2699 'average_rating': average_rating,
8bcc8756 2700 'formats': formats,
2fe1ff85 2701 'is_live': is_live,
7c80519c 2702 'start_time': start_time,
297a564b 2703 'end_time': end_time,
12afdc2a
S
2704 'series': series,
2705 'season_number': season_number,
2706 'episode_number': episode_number,
936784b2
S
2707 'track': track,
2708 'artist': artist,
5caabd3c 2709 'album': album,
2710 'release_date': release_date,
2711 'release_year': release_year,
b84071c0 2712 'subscriber_count': subscriber_count,
5ac23244 2713 'playable_in_embed': playable_in_embed,
06167fbb 2714 'comments': video_comments,
2715 'comment_count': expected_video_comment_count,
4ea3be0a 2716 }
c5e8d7af 2717
5f6a1245 2718
8bdd16b4 2719class YoutubeTabIE(YoutubeBaseInfoExtractor):
2720 IE_DESC = 'YouTube.com tab'
70d5c17b 2721 _VALID_URL = r'''(?x)
2722 https?://
2723 (?:\w+\.)?
2724 (?:
2725 youtube(?:kids)?\.com|
2726 invidio\.us
2727 )/
2728 (?:
2729 (?:channel|c|user)/|
2730 (?P<not_channel>
3d3dddc9 2731 feed/|
70d5c17b 2732 (?:playlist|watch)\?.*?\blist=
2733 )|
29f7c58a 2734 (?!(?:%s)\b) # Direct URLs
70d5c17b 2735 )
2736 (?P<id>[^/?\#&]+)
2737 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2738 IE_NAME = 'youtube:tab'
2739
81127aa5 2740 _TESTS = [{
8bdd16b4 2741 # playlists, multipage
2742 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2743 'playlist_mincount': 94,
2744 'info_dict': {
2745 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2746 'title': 'Игорь Клейнер - Playlists',
2747 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2748 },
2749 }, {
2750 # playlists, multipage, different order
2751 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2752 'playlist_mincount': 94,
2753 'info_dict': {
2754 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2755 'title': 'Игорь Клейнер - Playlists',
2756 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2757 },
2758 }, {
2759 # playlists, singlepage
2760 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2761 'playlist_mincount': 4,
2762 'info_dict': {
2763 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2764 'title': 'ThirstForScience - Playlists',
2765 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2766 }
2767 }, {
2768 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2769 'only_matching': True,
2770 }, {
2771 # basic, single video playlist
0e30a7b9 2772 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2773 'info_dict': {
0e30a7b9 2774 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2775 'uploader': 'Sergey M.',
2776 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2777 'title': 'youtube-dl public playlist',
81127aa5 2778 },
0e30a7b9 2779 'playlist_count': 1,
9291475f 2780 }, {
8bdd16b4 2781 # empty playlist
0e30a7b9 2782 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2783 'info_dict': {
0e30a7b9 2784 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2785 'uploader': 'Sergey M.',
2786 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2787 'title': 'youtube-dl empty playlist',
9291475f
PH
2788 },
2789 'playlist_count': 0,
2790 }, {
8bdd16b4 2791 # Home tab
2792 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2793 'info_dict': {
8bdd16b4 2794 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2795 'title': 'lex will - Home',
2796 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2797 },
8bdd16b4 2798 'playlist_mincount': 2,
9291475f 2799 }, {
8bdd16b4 2800 # Videos tab
2801 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2802 'info_dict': {
8bdd16b4 2803 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2804 'title': 'lex will - Videos',
2805 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2806 },
8bdd16b4 2807 'playlist_mincount': 975,
9291475f 2808 }, {
8bdd16b4 2809 # Videos tab, sorted by popular
2810 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2811 'info_dict': {
8bdd16b4 2812 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2813 'title': 'lex will - Videos',
2814 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2815 },
8bdd16b4 2816 'playlist_mincount': 199,
9291475f 2817 }, {
8bdd16b4 2818 # Playlists tab
2819 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2820 'info_dict': {
8bdd16b4 2821 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2822 'title': 'lex will - Playlists',
2823 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
9291475f 2824 },
8bdd16b4 2825 'playlist_mincount': 17,
ac7553d0 2826 }, {
8bdd16b4 2827 # Community tab
2828 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2829 'info_dict': {
8bdd16b4 2830 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2831 'title': 'lex will - Community',
2832 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2833 },
2834 'playlist_mincount': 18,
87dadd45 2835 }, {
8bdd16b4 2836 # Channels tab
2837 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2838 'info_dict': {
8bdd16b4 2839 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2840 'title': 'lex will - Channels',
2841 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2842 },
2843 'playlist_mincount': 138,
6b08cdf6 2844 }, {
a0566bbf 2845 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2846 'only_matching': True,
2847 }, {
a0566bbf 2848 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2849 'only_matching': True,
2850 }, {
a0566bbf 2851 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2852 'only_matching': True,
2853 }, {
2854 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2855 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2856 'info_dict': {
2857 'title': '29C3: Not my department',
2858 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2859 'uploader': 'Christiaan008',
2860 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2861 },
2862 'playlist_count': 96,
2863 }, {
2864 'note': 'Large playlist',
2865 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2866 'info_dict': {
8bdd16b4 2867 'title': 'Uploads from Cauchemar',
2868 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2869 'uploader': 'Cauchemar',
2870 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2871 },
8bdd16b4 2872 'playlist_mincount': 1123,
2873 }, {
2874 # even larger playlist, 8832 videos
2875 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2876 'only_matching': True,
4b7df0d3
JMF
2877 }, {
2878 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2879 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2880 'info_dict': {
acf757f4
PH
2881 'title': 'Uploads from Interstellar Movie',
2882 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2883 'uploader': 'Interstellar Movie',
8bdd16b4 2884 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2885 },
481cc733 2886 'playlist_mincount': 21,
8bdd16b4 2887 }, {
2888 # https://github.com/ytdl-org/youtube-dl/issues/21844
2889 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2890 'info_dict': {
2891 'title': 'Data Analysis with Dr Mike Pound',
2892 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2893 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2894 'uploader': 'Computerphile',
2895 },
2896 'playlist_mincount': 11,
2897 }, {
a0566bbf 2898 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2899 'only_matching': True,
dacb3a86
S
2900 }, {
2901 # Playlist URL that does not actually serve a playlist
2902 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2903 'info_dict': {
2904 'id': 'FqZTN594JQw',
2905 'ext': 'webm',
2906 'title': "Smiley's People 01 detective, Adventure Series, Action",
2907 'uploader': 'STREEM',
2908 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2910 'upload_date': '20150526',
2911 'license': 'Standard YouTube License',
2912 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2913 'categories': ['People & Blogs'],
2914 'tags': list,
dbdaaa23 2915 'view_count': int,
dacb3a86
S
2916 'like_count': int,
2917 'dislike_count': int,
2918 },
2919 'params': {
2920 'skip_download': True,
2921 },
13a75688 2922 'skip': 'This video is not available.',
dacb3a86 2923 'add_ie': [YoutubeIE.ie_key()],
481cc733 2924 }, {
8bdd16b4 2925 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2926 'only_matching': True,
66b48727 2927 }, {
8bdd16b4 2928 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2929 'only_matching': True,
a0566bbf 2930 }, {
2931 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2932 'info_dict': {
2933 'id': '9Auq9mYxFEE',
2934 'ext': 'mp4',
2935 'title': 'Watch Sky News live',
2936 'uploader': 'Sky News',
2937 'uploader_id': 'skynews',
2938 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2939 'upload_date': '20191102',
2940 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2941 'categories': ['News & Politics'],
2942 'tags': list,
2943 'like_count': int,
2944 'dislike_count': int,
2945 },
2946 'params': {
2947 'skip_download': True,
2948 },
2949 }, {
2950 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2951 'info_dict': {
2952 'id': 'a48o2S1cPoo',
2953 'ext': 'mp4',
2954 'title': 'The Young Turks - Live Main Show',
2955 'uploader': 'The Young Turks',
2956 'uploader_id': 'TheYoungTurks',
2957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2958 'upload_date': '20150715',
2959 'license': 'Standard YouTube License',
2960 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2961 'categories': ['News & Politics'],
2962 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2963 'like_count': int,
2964 'dislike_count': int,
2965 },
2966 'params': {
2967 'skip_download': True,
2968 },
2969 'only_matching': True,
2970 }, {
2971 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2972 'only_matching': True,
2973 }, {
2974 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2975 'only_matching': True,
3d3dddc9 2976 }, {
2977 'url': 'https://www.youtube.com/feed/trending',
2978 'only_matching': True,
2979 }, {
2980 # needs auth
2981 'url': 'https://www.youtube.com/feed/library',
2982 'only_matching': True,
2983 }, {
2984 # needs auth
2985 'url': 'https://www.youtube.com/feed/history',
2986 'only_matching': True,
2987 }, {
2988 # needs auth
2989 'url': 'https://www.youtube.com/feed/subscriptions',
2990 'only_matching': True,
2991 }, {
2992 # needs auth
2993 'url': 'https://www.youtube.com/feed/watch_later',
2994 'only_matching': True,
2995 }, {
2996 # no longer available?
2997 'url': 'https://www.youtube.com/feed/recommended',
2998 'only_matching': True,
29f7c58a 2999 }, {
3000 # inline playlist with not always working continuations
3001 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3002 'only_matching': True,
3003 }, {
3004 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3005 'only_matching': True,
3006 }, {
3007 'url': 'https://www.youtube.com/course',
3008 'only_matching': True,
3009 }, {
3010 'url': 'https://www.youtube.com/zsecurity',
3011 'only_matching': True,
3012 }, {
3013 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3014 'only_matching': True,
3015 }, {
3016 'url': 'https://www.youtube.com/TheYoungTurks/live',
3017 'only_matching': True,
3018 }]
3019
3020 @classmethod
3021 def suitable(cls, url):
3022 return False if YoutubeIE.suitable(url) else super(
3023 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3024
3025 def _extract_channel_id(self, webpage):
3026 channel_id = self._html_search_meta(
3027 'channelId', webpage, 'channel id', default=None)
3028 if channel_id:
3029 return channel_id
3030 channel_url = self._html_search_meta(
3031 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3032 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3033 'twitter:app:url:googleplay'), webpage, 'channel url')
3034 return self._search_regex(
3035 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3036 channel_url, 'channel id')
15f6397c 3037
8bdd16b4 3038 @staticmethod
3039 def _extract_grid_item_renderer(item):
3040 for item_kind in ('Playlist', 'Video', 'Channel'):
3041 renderer = item.get('grid%sRenderer' % item_kind)
3042 if renderer:
3043 return renderer
3044
8bdd16b4 3045 def _grid_entries(self, grid_renderer):
3046 for item in grid_renderer['items']:
3047 if not isinstance(item, dict):
39b62db1 3048 continue
8bdd16b4 3049 renderer = self._extract_grid_item_renderer(item)
3050 if not isinstance(renderer, dict):
3051 continue
3052 title = try_get(
3053 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3054 # playlist
3055 playlist_id = renderer.get('playlistId')
3056 if playlist_id:
3057 yield self.url_result(
3058 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3059 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3060 video_title=title)
3061 # video
3062 video_id = renderer.get('videoId')
3063 if video_id:
3064 yield self._extract_video(renderer)
3065 # channel
3066 channel_id = renderer.get('channelId')
3067 if channel_id:
3068 title = try_get(
3069 renderer, lambda x: x['title']['simpleText'], compat_str)
3070 yield self.url_result(
3071 'https://www.youtube.com/channel/%s' % channel_id,
3072 ie=YoutubeTabIE.ie_key(), video_title=title)
3073
3d3dddc9 3074 def _shelf_entries_from_content(self, shelf_renderer):
3075 content = shelf_renderer.get('content')
3076 if not isinstance(content, dict):
8bdd16b4 3077 return
3d3dddc9 3078 renderer = content.get('gridRenderer')
3079 if renderer:
3080 # TODO: add support for nested playlists so each shelf is processed
3081 # as separate playlist
3082 # TODO: this includes only first N items
3083 for entry in self._grid_entries(renderer):
3084 yield entry
3085 renderer = content.get('horizontalListRenderer')
3086 if renderer:
3087 # TODO
3088 pass
8bdd16b4 3089
29f7c58a 3090 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3091 ep = try_get(
3092 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3093 compat_str)
3094 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3095 if shelf_url:
29f7c58a 3096 # Skipping links to another channels, note that checking for
3097 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3098 # will not work
3099 if skip_channels and '/channels?' in shelf_url:
3100 return
3d3dddc9 3101 title = try_get(
3102 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3103 yield self.url_result(shelf_url, video_title=title)
3104 # Shelf may not contain shelf URL, fallback to extraction from content
3105 for entry in self._shelf_entries_from_content(shelf_renderer):
3106 yield entry
c5e8d7af 3107
8bdd16b4 3108 def _playlist_entries(self, video_list_renderer):
3109 for content in video_list_renderer['contents']:
3110 if not isinstance(content, dict):
3111 continue
3112 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3113 if not isinstance(renderer, dict):
3114 continue
3115 video_id = renderer.get('videoId')
3116 if not video_id:
3117 continue
3118 yield self._extract_video(renderer)
07aeced6 3119
3d3dddc9 3120 r""" # Not needed in the new implementation
3462ffa8 3121 def _itemSection_entries(self, item_sect_renderer):
3122 for content in item_sect_renderer['contents']:
3123 if not isinstance(content, dict):
3124 continue
3125 renderer = content.get('videoRenderer', {})
3126 if not isinstance(renderer, dict):
3127 continue
3128 video_id = renderer.get('videoId')
3129 if not video_id:
3130 continue
3131 yield self._extract_video(renderer)
3d3dddc9 3132 """
3462ffa8 3133
3134 def _rich_entries(self, rich_grid_renderer):
3135 renderer = try_get(
70d5c17b 3136 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3137 video_id = renderer.get('videoId')
3138 if not video_id:
3139 return
3140 yield self._extract_video(renderer)
3141
8bdd16b4 3142 def _video_entry(self, video_renderer):
3143 video_id = video_renderer.get('videoId')
3144 if video_id:
3145 return self._extract_video(video_renderer)
dacb3a86 3146
8bdd16b4 3147 def _post_thread_entries(self, post_thread_renderer):
3148 post_renderer = try_get(
3149 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3150 if not post_renderer:
3151 return
3152 # video attachment
3153 video_renderer = try_get(
3154 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3155 video_id = None
3156 if video_renderer:
3157 entry = self._video_entry(video_renderer)
3158 if entry:
3159 yield entry
3160 # inline video links
3161 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3162 for run in runs:
3163 if not isinstance(run, dict):
3164 continue
3165 ep_url = try_get(
3166 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3167 if not ep_url:
3168 continue
3169 if not YoutubeIE.suitable(ep_url):
3170 continue
3171 ep_video_id = YoutubeIE._match_id(ep_url)
3172 if video_id == ep_video_id:
3173 continue
3174 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 3175
8bdd16b4 3176 def _post_thread_continuation_entries(self, post_thread_continuation):
3177 contents = post_thread_continuation.get('contents')
3178 if not isinstance(contents, list):
3179 return
3180 for content in contents:
3181 renderer = content.get('backstagePostThreadRenderer')
3182 if not isinstance(renderer, dict):
3183 continue
3184 for entry in self._post_thread_entries(renderer):
3185 yield entry
07aeced6 3186
29f7c58a 3187 @staticmethod
3188 def _build_continuation_query(continuation, ctp=None):
3189 query = {
3190 'ctoken': continuation,
3191 'continuation': continuation,
3192 }
3193 if ctp:
3194 query['itct'] = ctp
3195 return query
3196
8bdd16b4 3197 @staticmethod
3198 def _extract_next_continuation_data(renderer):
3199 next_continuation = try_get(
3200 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3201 if not next_continuation:
3202 return
3203 continuation = next_continuation.get('continuation')
3204 if not continuation:
3205 return
3206 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3207 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3208
8bdd16b4 3209 @classmethod
3210 def _extract_continuation(cls, renderer):
3211 next_continuation = cls._extract_next_continuation_data(renderer)
3212 if next_continuation:
3213 return next_continuation
3214 contents = renderer.get('contents')
3215 if not isinstance(contents, list):
3216 return
3217 for content in contents:
3218 if not isinstance(content, dict):
3219 continue
3220 continuation_ep = try_get(
3221 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3222 dict)
3223 if not continuation_ep:
3224 continue
3225 continuation = try_get(
3226 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3227 if not continuation:
3228 continue
3229 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3230 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3231
8bdd16b4 3232 def _entries(self, tab, identity_token):
3462ffa8 3233
70d5c17b 3234 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3235 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3236 for content in contents:
3237 if not isinstance(content, dict):
8bdd16b4 3238 continue
70d5c17b 3239 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3240 if not is_renderer:
70d5c17b 3241 renderer = content.get('richItemRenderer')
3462ffa8 3242 if renderer:
3243 for entry in self._rich_entries(renderer):
3244 yield entry
3245 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3246 continue
3462ffa8 3247 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3248 for isr_content in isr_contents:
3249 if not isinstance(isr_content, dict):
3250 continue
3251 renderer = isr_content.get('playlistVideoListRenderer')
3252 if renderer:
3253 for entry in self._playlist_entries(renderer):
3254 yield entry
3255 continuation_list[0] = self._extract_continuation(renderer)
3256 continue
3257 renderer = isr_content.get('gridRenderer')
3258 if renderer:
3259 for entry in self._grid_entries(renderer):
3260 yield entry
3261 continuation_list[0] = self._extract_continuation(renderer)
3262 continue
3263 renderer = isr_content.get('shelfRenderer')
3264 if renderer:
29f7c58a 3265 is_channels_tab = tab.get('title') == 'Channels'
3266 for entry in self._shelf_entries(renderer, not is_channels_tab):
3462ffa8 3267 yield entry
3462ffa8 3268 continue
3269 renderer = isr_content.get('backstagePostThreadRenderer')
3270 if renderer:
3271 for entry in self._post_thread_entries(renderer):
3272 yield entry
3273 continuation_list[0] = self._extract_continuation(renderer)
3274 continue
3275 renderer = isr_content.get('videoRenderer')
3276 if renderer:
3277 entry = self._video_entry(renderer)
3278 if entry:
3279 yield entry
70d5c17b 3280
3462ffa8 3281 if not continuation_list[0]:
3282 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3283
3284 if not continuation_list[0]:
3285 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3286
3287 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3288 tab_content = try_get(tab, lambda x: x['content'], dict)
3289 if not tab_content:
3290 return
3462ffa8 3291 parent_renderer = (
29f7c58a 3292 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3293 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3294 for entry in extract_entries(parent_renderer):
3295 yield entry
3462ffa8 3296 continuation = continuation_list[0]
8bdd16b4 3297
3298 headers = {
3299 'x-youtube-client-name': '1',
3300 'x-youtube-client-version': '2.20201112.04.01',
3301 }
3302 if identity_token:
3303 headers['x-youtube-identity-token'] = identity_token
ebf1b291 3304
8bdd16b4 3305 for page_num in itertools.count(1):
3306 if not continuation:
3307 break
29f7c58a 3308 count = 0
3309 retries = 3
3310 while count <= retries:
3311 try:
3312 # Downloading page may result in intermittent 5xx HTTP error
3313 # that is usually worked around with a retry
3314 browse = self._download_json(
3315 'https://www.youtube.com/browse_ajax', None,
3316 'Downloading page %d%s'
3317 % (page_num, ' (retry #%d)' % count if count else ''),
3318 headers=headers, query=continuation)
3319 break
3320 except ExtractorError as e:
3321 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3322 count += 1
3323 if count <= retries:
3324 continue
3325 raise
8bdd16b4 3326 if not browse:
3327 break
3328 response = try_get(browse, lambda x: x[1]['response'], dict)
3329 if not response:
3330 break
ebf1b291 3331
8bdd16b4 3332 continuation_contents = try_get(
3333 response, lambda x: x['continuationContents'], dict)
3334 if continuation_contents:
3335 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3336 if continuation_renderer:
3337 for entry in self._playlist_entries(continuation_renderer):
3338 yield entry
3339 continuation = self._extract_continuation(continuation_renderer)
3340 continue
3341 continuation_renderer = continuation_contents.get('gridContinuation')
3342 if continuation_renderer:
3343 for entry in self._grid_entries(continuation_renderer):
3344 yield entry
3345 continuation = self._extract_continuation(continuation_renderer)
3346 continue
3347 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3348 if continuation_renderer:
3349 for entry in self._post_thread_continuation_entries(continuation_renderer):
3350 yield entry
3351 continuation = self._extract_continuation(continuation_renderer)
3352 continue
70d5c17b 3353 continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
3462ffa8 3354 if continuation_renderer:
3355 continuation_list = [None]
3356 for entry in extract_entries(continuation_renderer):
3357 yield entry
3358 continuation = continuation_list[0]
3359 continue
c5e8d7af 3360
8bdd16b4 3361 continuation_items = try_get(
3362 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3363 if continuation_items:
3364 continuation_item = continuation_items[0]
3365 if not isinstance(continuation_item, dict):
3366 continue
70d5c17b 3367 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
8bdd16b4 3368 if renderer:
3369 video_list_renderer = {'contents': continuation_items}
3370 for entry in self._playlist_entries(video_list_renderer):
3371 yield entry
3372 continuation = self._extract_continuation(video_list_renderer)
3373 continue
8bdd16b4 3374 break
9558dcec 3375
8bdd16b4 3376 @staticmethod
3377 def _extract_selected_tab(tabs):
3378 for tab in tabs:
3379 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3380 return tab['tabRenderer']
2b3c2546 3381 else:
8bdd16b4 3382 raise ExtractorError('Unable to find selected tab')
b82f815f 3383
8bdd16b4 3384 @staticmethod
3385 def _extract_uploader(data):
3386 uploader = {}
3387 sidebar_renderer = try_get(
3388 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3389 if sidebar_renderer:
3390 for item in sidebar_renderer:
3391 if not isinstance(item, dict):
3392 continue
3393 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3394 if not isinstance(renderer, dict):
3395 continue
3396 owner = try_get(
3397 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3398 if owner:
3399 uploader['uploader'] = owner.get('text')
3400 uploader['uploader_id'] = try_get(
3401 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3402 uploader['uploader_url'] = urljoin(
3403 'https://www.youtube.com/',
3404 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3405 return uploader
3406
3407 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3408 selected_tab = self._extract_selected_tab(tabs)
3409 renderer = try_get(
3410 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
70d5c17b 3411 playlist_id = title = description = None
8bdd16b4 3412 if renderer:
3413 channel_title = renderer.get('title') or item_id
3414 tab_title = selected_tab.get('title')
3415 title = channel_title or item_id
3416 if tab_title:
3417 title += ' - %s' % tab_title
3418 description = renderer.get('description')
3419 playlist_id = renderer.get('externalId')
64c0d954 3420
3421 # this has thumbnails, but there is currently no thumbnail field for playlists
3422 # sidebar.playlistSidebarRenderer has even more data, but its stucture is more complec
8bdd16b4 3423 renderer = try_get(
64c0d954 3424 data, lambda x: x['microformat']['microformatDataRenderer'], dict)
3425 if not renderer:
3426 renderer = try_get(
3427 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 3428 if renderer:
3429 title = renderer.get('title')
64c0d954 3430 description = renderer.get('description')
8bdd16b4 3431 playlist_id = item_id
64c0d954 3432
3462ffa8 3433 if playlist_id is None:
70d5c17b 3434 playlist_id = item_id
3435 if title is None:
3436 title = "Youtube " + playlist_id.title()
8bdd16b4 3437 playlist = self.playlist_result(
29f7c58a 3438 self._entries(selected_tab, identity_token),
8bdd16b4 3439 playlist_id=playlist_id, playlist_title=title,
3440 playlist_description=description)
3441 playlist.update(self._extract_uploader(data))
3442 return playlist
73c4ac2c 3443
29f7c58a 3444 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3445 title = playlist.get('title') or try_get(
3446 data, lambda x: x['titleText']['simpleText'], compat_str)
3447 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 3448 # Inline playlist rendition continuation does not always work
3449 # at Youtube side, so delegating regular tab-based playlist URL
3450 # processing whenever possible.
3451 playlist_url = urljoin(url, try_get(
3452 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3453 compat_str))
3454 if playlist_url and playlist_url != url:
3455 return self.url_result(
3456 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3457 video_title=title)
8bdd16b4 3458 return self.playlist_result(
3459 self._playlist_entries(playlist), playlist_id=playlist_id,
3460 playlist_title=title)
c5e8d7af 3461
29f7c58a 3462 @staticmethod
3463 def _extract_alerts(data):
02ced43c 3464 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 3465 if not isinstance(alert_dict, dict):
3466 continue
02ced43c 3467 for renderer in alert_dict:
3468 alert = alert_dict[renderer]
3469 alert_type = alert.get('type')
3470 if not alert_type:
3471 continue
3472 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3473 if message:
3474 yield alert_type, message
3475 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3476 message = try_get(run, lambda x: x['text'], compat_str)
3477 if message:
3478 yield alert_type, message
3479
29f7c58a 3480 def _extract_identity_token(self, webpage, item_id):
3481 ytcfg = self._extract_ytcfg(item_id, webpage)
3482 if ytcfg:
3483 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3484 if token:
3485 return token
3486 return self._search_regex(
3487 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3488 'identity token', default=None)
3489
8bdd16b4 3490 def _real_extract(self, url):
3491 item_id = self._match_id(url)
3492 url = compat_urlparse.urlunparse(
3493 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 3494 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 3495 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 3496 self._downloader.report_warning(
3497 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 3498 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 3499 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3500
8bdd16b4 3501 # Handle both video/playlist URLs
3502 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3503 video_id = qs.get('v', [None])[0]
3504 playlist_id = qs.get('list', [None])[0]
f0c532a4 3505
29f7c58a 3506 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 3507 if playlist_id:
3508 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3509 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3510 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3511 else:
3512 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 3513 if video_id and playlist_id:
3514 if self._downloader.params.get('noplaylist'):
3515 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3516 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3517 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 3518
8bdd16b4 3519 webpage = self._download_webpage(url, item_id)
29f7c58a 3520 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 3521 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 3522 err_msg = None
02ced43c 3523 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 3524 if alert_type.lower() == 'error':
3525 if err_msg:
3526 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3527 err_msg = alert_message
3528 else:
3529 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3530 if err_msg:
3531 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 3532 tabs = try_get(
3533 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3534 if tabs:
3535 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3536 playlist = try_get(
3537 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3538 if playlist:
29f7c58a 3539 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 3540 # Fallback to video extraction if no playlist alike page is recognized.
3541 # First check for the current video then try the v attribute of URL query.
3542 video_id = try_get(
3543 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3544 compat_str) or video_id
8bdd16b4 3545 if video_id:
3546 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3547 # Failed to recognize
3548 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3549
c5e8d7af 3550
8bdd16b4 3551class YoutubePlaylistIE(InfoExtractor):
3552 IE_DESC = 'YouTube.com playlists'
3553 _VALID_URL = r'''(?x)(?:
3554 (?:https?://)?
3555 (?:\w+\.)?
3556 (?:
3557 (?:
3558 youtube(?:kids)?\.com|
29f7c58a 3559 invidio\.us
8bdd16b4 3560 )
3561 /.*?\?.*?\blist=
3562 )?
3563 (?P<id>%(playlist_id)s)
3564 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3565 IE_NAME = 'youtube:playlist'
cdc628a4 3566 _TESTS = [{
8bdd16b4 3567 'note': 'issue #673',
3568 'url': 'PLBB231211A4F62143',
cdc628a4 3569 'info_dict': {
8bdd16b4 3570 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3571 'id': 'PLBB231211A4F62143',
3572 'uploader': 'Wickydoo',
3573 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3574 },
3575 'playlist_mincount': 29,
3576 }, {
3577 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3578 'info_dict': {
3579 'title': 'YDL_safe_search',
3580 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3581 },
3582 'playlist_count': 2,
3583 'skip': 'This playlist is private',
9558dcec 3584 }, {
8bdd16b4 3585 'note': 'embedded',
3586 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3587 'playlist_count': 4,
9558dcec 3588 'info_dict': {
8bdd16b4 3589 'title': 'JODA15',
3590 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3591 'uploader': 'milan',
3592 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3593 }
cdc628a4 3594 }, {
8bdd16b4 3595 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3596 'playlist_mincount': 982,
3597 'info_dict': {
3598 'title': '2018 Chinese New Singles (11/6 updated)',
3599 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3600 'uploader': 'LBK',
3601 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3602 }
daa0df9e 3603 }, {
29f7c58a 3604 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3605 'only_matching': True,
3606 }, {
3607 # music album playlist
3608 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3609 'only_matching': True,
3610 }]
3611
3612 @classmethod
3613 def suitable(cls, url):
3614 return False if YoutubeTabIE.suitable(url) else super(
3615 YoutubePlaylistIE, cls).suitable(url)
3616
3617 def _real_extract(self, url):
3618 playlist_id = self._match_id(url)
3619 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3620 if not qs:
3621 qs = {'list': playlist_id}
3622 return self.url_result(
3623 update_url_query('https://www.youtube.com/playlist', qs),
3624 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3625
3626
3627class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3628 IE_DESC = 'youtu.be'
29f7c58a 3629 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3630 _TESTS = [{
8bdd16b4 3631 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3632 'info_dict': {
3633 'id': 'yeWKywCrFtk',
3634 'ext': 'mp4',
3635 'title': 'Small Scale Baler and Braiding Rugs',
3636 'uploader': 'Backus-Page House Museum',
3637 'uploader_id': 'backuspagemuseum',
3638 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3639 'upload_date': '20161008',
3640 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3641 'categories': ['Nonprofits & Activism'],
3642 'tags': list,
3643 'like_count': int,
3644 'dislike_count': int,
3645 },
3646 'params': {
3647 'noplaylist': True,
3648 'skip_download': True,
3649 },
39e7107d 3650 }, {
8bdd16b4 3651 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3652 'only_matching': True,
cdc628a4
PH
3653 }]
3654
8bdd16b4 3655 def _real_extract(self, url):
29f7c58a 3656 mobj = re.match(self._VALID_URL, url)
3657 video_id = mobj.group('id')
3658 playlist_id = mobj.group('playlist_id')
8bdd16b4 3659 return self.url_result(
29f7c58a 3660 update_url_query('https://www.youtube.com/watch', {
3661 'v': video_id,
3662 'list': playlist_id,
3663 'feature': 'youtu.be',
3664 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3665
3666
3667class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3668 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3669 _VALID_URL = r'ytuser:(?P<id>.+)'
3670 _TESTS = [{
3671 'url': 'ytuser:phihag',
3672 'only_matching': True,
3673 }]
3674
3675 def _real_extract(self, url):
3676 user_id = self._match_id(url)
3677 return self.url_result(
3678 'https://www.youtube.com/user/%s' % user_id,
3679 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3680
b05654f0 3681
3d3dddc9 3682class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3683 IE_NAME = 'youtube:favorites'
3684 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3685 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3686 _LOGIN_REQUIRED = True
3687 _TESTS = [{
3688 'url': ':ytfav',
3689 'only_matching': True,
3690 }, {
3691 'url': ':ytfavorites',
3692 'only_matching': True,
3693 }]
3694
3695 def _real_extract(self, url):
3696 return self.url_result(
3697 'https://www.youtube.com/playlist?list=LL',
3698 ie=YoutubeTabIE.ie_key())
3699
3700
8bdd16b4 3701class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
78caa52a 3702 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3703 # there doesn't appear to be a real limit, for example if you search for
3704 # 'python' you get more than 8.000.000 results
3705 _MAX_RESULTS = float('inf')
78caa52a 3706 IE_NAME = 'youtube:search'
b05654f0 3707 _SEARCH_KEY = 'ytsearch'
6c894ea1 3708 _SEARCH_PARAMS = None
9dd8e46a 3709 _TESTS = []
b05654f0 3710
6c894ea1
U
3711 def _entries(self, query, n):
3712 data = {
3713 'context': {
3714 'client': {
3715 'clientName': 'WEB',
3716 'clientVersion': '2.20201021.03.00',
3717 }
3718 },
3719 'query': query,
a22b2fd1 3720 }
6c894ea1
U
3721 if self._SEARCH_PARAMS:
3722 data['params'] = self._SEARCH_PARAMS
3723 total = 0
3724 for page_num in itertools.count(1):
3725 search = self._download_json(
3726 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3727 video_id='query "%s"' % query,
3728 note='Downloading page %s' % page_num,
3729 errnote='Unable to download API page', fatal=False,
3730 data=json.dumps(data).encode('utf8'),
3731 headers={'content-type': 'application/json'})
3732 if not search:
b4c08069 3733 break
6c894ea1
U
3734 slr_contents = try_get(
3735 search,
3736 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3737 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3738 list)
3739 if not slr_contents:
a22b2fd1 3740 break
0366ae87 3741
0366ae87
M
3742 # Youtube sometimes adds promoted content to searches,
3743 # changing the index location of videos and token.
3744 # So we search through all entries till we find them.
30a074c2 3745 continuation_token = None
3746 for slr_content in slr_contents:
3747 isr_contents = try_get(
3748 slr_content,
3749 lambda x: x['itemSectionRenderer']['contents'],
3750 list)
9da76d30 3751 if not isr_contents:
30a074c2 3752 continue
3753 for content in isr_contents:
3754 if not isinstance(content, dict):
3755 continue
3756 video = content.get('videoRenderer')
3757 if not isinstance(video, dict):
3758 continue
3759 video_id = video.get('videoId')
3760 if not video_id:
3761 continue
3762
3763 yield self._extract_video(video)
3764 total += 1
3765 if total == n:
3766 return
0366ae87
M
3767
3768 if continuation_token is None:
3769 continuation_token = try_get(
30a074c2 3770 slr_content,
3771 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
0366ae87 3772 compat_str)
0366ae87 3773
0366ae87 3774 if not continuation_token:
6c894ea1 3775 break
0366ae87 3776 data['continuation'] = continuation_token
b05654f0 3777
6c894ea1
U
3778 def _get_n_results(self, query, n):
3779 """Get a specified number of results for a query"""
3780 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3781
c9ae7b95 3782
a3dd9248 3783class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3784 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3785 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3786 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3787 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3788
c9ae7b95 3789
386e1dd9 3790class YoutubeSearchURLIE(YoutubeSearchIE):
c76eb41b 3791 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
386e1dd9 3792 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3793 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3794 # _MAX_RESULTS = 100
3462ffa8 3795 _TESTS = [{
3796 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3797 'playlist_mincount': 5,
3798 'info_dict': {
3799 'title': 'youtube-dl test video',
3800 }
3801 }, {
3802 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3803 'only_matching': True,
3804 }]
3805
386e1dd9 3806 @classmethod
3807 def _make_valid_url(cls):
3808 return cls._VALID_URL
3809
3462ffa8 3810 def _real_extract(self, url):
386e1dd9 3811 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3812 query = (qs.get('search_query') or qs.get('q'))[0]
3813 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3814 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3815
3816
3817class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3818 """
25f14e9f 3819 Base class for feed extractors
3d3dddc9 3820 Subclasses must define the _FEED_NAME property.
d7ae0639 3821 """
b2e8bc1b 3822 _LOGIN_REQUIRED = True
3462ffa8 3823 # _MAX_PAGES = 5
ef2f3c7f 3824 _TESTS = []
d7ae0639
JMF
3825
3826 @property
3827 def IE_NAME(self):
78caa52a 3828 return 'youtube:%s' % self._FEED_NAME
04cc9617 3829
81f0259b 3830 def _real_initialize(self):
b2e8bc1b 3831 self._login()
81f0259b 3832
3853309f 3833 def _real_extract(self, url):
3d3dddc9 3834 return self.url_result(
3835 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3836 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3837
3838
ef2f3c7f 3839class YoutubeWatchLaterIE(InfoExtractor):
3840 IE_NAME = 'youtube:watchlater'
70d5c17b 3841 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3842 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3843 _TESTS = [{
8bdd16b4 3844 'url': ':ytwatchlater',
bc7a9cd8
S
3845 'only_matching': True,
3846 }]
25f14e9f
S
3847
3848 def _real_extract(self, url):
ef2f3c7f 3849 return self.url_result(
3850 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3851
3852
25f14e9f
S
3853class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3854 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3855 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3856 _FEED_NAME = 'recommended'
3d3dddc9 3857 _TESTS = [{
3858 'url': ':ytrec',
3859 'only_matching': True,
3860 }, {
3861 'url': ':ytrecommended',
3862 'only_matching': True,
3863 }, {
3864 'url': 'https://youtube.com',
3865 'only_matching': True,
3866 }]
1ed5b5c9 3867
1ed5b5c9 3868
25f14e9f 3869class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3870 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3871 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3872 _FEED_NAME = 'subscriptions'
3d3dddc9 3873 _TESTS = [{
3874 'url': ':ytsubs',
3875 'only_matching': True,
3876 }, {
3877 'url': ':ytsubscriptions',
3878 'only_matching': True,
3879 }]
1ed5b5c9 3880
1ed5b5c9 3881
25f14e9f
S
3882class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3883 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3884 _VALID_URL = r':ythistory'
25f14e9f 3885 _FEED_NAME = 'history'
3d3dddc9 3886 _TESTS = [{
3887 'url': ':ythistory',
3888 'only_matching': True,
3889 }]
1ed5b5c9
JMF
3890
3891
15870e90
PH
3892class YoutubeTruncatedURLIE(InfoExtractor):
3893 IE_NAME = 'youtube:truncated_url'
3894 IE_DESC = False # Do not list
975d35db 3895 _VALID_URL = r'''(?x)
b95aab84
PH
3896 (?:https?://)?
3897 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3898 (?:watch\?(?:
c4808c60 3899 feature=[a-z_]+|
b95aab84
PH
3900 annotation_id=annotation_[^&]+|
3901 x-yt-cl=[0-9]+|
c1708b89 3902 hl=[^&]*|
287be8c6 3903 t=[0-9]+
b95aab84
PH
3904 )?
3905 |
3906 attribution_link\?a=[^&]+
3907 )
3908 $
975d35db 3909 '''
15870e90 3910
c4808c60 3911 _TESTS = [{
2d3d2997 3912 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3913 'only_matching': True,
dc2fc736 3914 }, {
2d3d2997 3915 'url': 'https://www.youtube.com/watch?',
dc2fc736 3916 'only_matching': True,
b95aab84
PH
3917 }, {
3918 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3919 'only_matching': True,
3920 }, {
3921 'url': 'https://www.youtube.com/watch?feature=foo',
3922 'only_matching': True,
c1708b89
PH
3923 }, {
3924 'url': 'https://www.youtube.com/watch?hl=en-GB',
3925 'only_matching': True,
287be8c6
PH
3926 }, {
3927 'url': 'https://www.youtube.com/watch?t=2372',
3928 'only_matching': True,
c4808c60
PH
3929 }]
3930
15870e90
PH
3931 def _real_extract(self, url):
3932 raise ExtractorError(
78caa52a
PH
3933 'Did you forget to quote the URL? Remember that & is a meta '
3934 'character in most shells, so you want to put the URL in quotes, '
3867038a 3935 'like youtube-dl '
2d3d2997 3936 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3937 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3938 expected=True)
772fd5cc
PH
3939
3940
3941class YoutubeTruncatedIDIE(InfoExtractor):
3942 IE_NAME = 'youtube:truncated_id'
3943 IE_DESC = False # Do not list
b95aab84 3944 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3945
3946 _TESTS = [{
3947 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3948 'only_matching': True,
3949 }]
3950
3951 def _real_extract(self, url):
3952 video_id = self._match_id(url)
3953 raise ExtractorError(
3954 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3955 expected=True)
8bdd16b4 3956
3957
3462ffa8 3958# Do Youtube show urls even exist anymore? I couldn't find any
3959r'''
3960class YoutubeShowIE(YoutubeTabIE):
8bdd16b4 3961 IE_DESC = 'YouTube.com (multi-season) shows'
3962 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3963 IE_NAME = 'youtube:show'
3964 _TESTS = [{
3965 'url': 'https://www.youtube.com/show/airdisasters',
3966 'playlist_mincount': 5,
3967 'info_dict': {
3968 'id': 'airdisasters',
3969 'title': 'Air Disasters',
3970 }
3971 }]
3972
3973 def _real_extract(self, url):
3974 playlist_id = self._match_id(url)
3975 return super(YoutubeShowIE, self)._real_extract(
3976 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3462ffa8 3977'''