]> jfr.im git - yt-dlp.git/blame_incremental - youtube_dlc/extractor/youtube.py
Add PyPI release
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5
6import itertools
7import json
8import os.path
9import random
10import re
11import time
12import traceback
13
14from .common import InfoExtractor, SearchInfoExtractor
15from ..jsinterp import JSInterpreter
16from ..swfinterp import SWFInterpreter
17from ..compat import (
18 compat_chr,
19 compat_HTTPError,
20 compat_kwargs,
21 compat_parse_qs,
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
26 compat_urlparse,
27 compat_str,
28)
29from ..utils import (
30 bool_or_none,
31 clean_html,
32 error_to_compat_str,
33 ExtractorError,
34 float_or_none,
35 get_element_by_id,
36 int_or_none,
37 mimetype2ext,
38 parse_codecs,
39 parse_count,
40 parse_duration,
41 remove_quotes,
42 remove_start,
43 smuggle_url,
44 str_or_none,
45 str_to_int,
46 try_get,
47 unescapeHTML,
48 unified_strdate,
49 unsmuggle_url,
50 update_url_query,
51 uppercase_escape,
52 url_or_none,
53 urlencode_postdata,
54 urljoin,
55)
56
57
58class YoutubeBaseInfoExtractor(InfoExtractor):
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
66
67 _RESERVED_NAMES = (
68 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
69 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
70 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
71
72 _NETRC_MACHINE = 'youtube'
73 # If True it will raise an error if no login info is provided
74 _LOGIN_REQUIRED = False
75
76 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
77
78 def _set_language(self):
79 self._set_cookie(
80 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
81 # YouTube sets the expire time to about two months
82 expire_time=time.time() + 2 * 30 * 24 * 3600)
83
84 def _ids_to_results(self, ids):
85 return [
86 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 for vid_id in ids]
88
89 def _login(self):
90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
97 username, password = self._get_login_info()
98 # No authentication to be performed
99 if username is None:
100 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
101 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
102 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
103 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
104 return True
105
106 login_page = self._download_webpage(
107 self._LOGIN_URL, None,
108 note='Downloading login page',
109 errnote='unable to fetch login page', fatal=False)
110 if login_page is False:
111 return
112
113 login_form = self._hidden_inputs(login_page)
114
115 def req(url, f_req, note, errnote):
116 data = login_form.copy()
117 data.update({
118 'pstMsg': 1,
119 'checkConnection': 'youtube',
120 'checkedDomains': 'youtube',
121 'hl': 'en',
122 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
123 'f.req': json.dumps(f_req),
124 'flowName': 'GlifWebSignIn',
125 'flowEntry': 'ServiceLogin',
126 # TODO: reverse actual botguard identifier generation algo
127 'bgRequest': '["identifier",""]',
128 })
129 return self._download_json(
130 url, None, note=note, errnote=errnote,
131 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
132 fatal=False,
133 data=urlencode_postdata(data), headers={
134 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
135 'Google-Accounts-XSRF': 1,
136 })
137
138 def warn(message):
139 self._downloader.report_warning(message)
140
141 lookup_req = [
142 username,
143 None, [], None, 'US', None, None, 2, False, True,
144 [
145 None, None,
146 [2, 1, None, 1,
147 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
148 None, [], 4],
149 1, [None, None, []], None, None, None, True
150 ],
151 username,
152 ]
153
154 lookup_results = req(
155 self._LOOKUP_URL, lookup_req,
156 'Looking up account info', 'Unable to look up account info')
157
158 if lookup_results is False:
159 return False
160
161 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
162 if not user_hash:
163 warn('Unable to extract user hash')
164 return False
165
166 challenge_req = [
167 user_hash,
168 None, 1, None, [1, None, None, None, [password, None, True]],
169 [
170 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
171 1, [None, None, []], None, None, None, True
172 ]]
173
174 challenge_results = req(
175 self._CHALLENGE_URL, challenge_req,
176 'Logging in', 'Unable to log in')
177
178 if challenge_results is False:
179 return
180
181 login_res = try_get(challenge_results, lambda x: x[0][5], list)
182 if login_res:
183 login_msg = try_get(login_res, lambda x: x[5], compat_str)
184 warn(
185 'Unable to login: %s' % 'Invalid password'
186 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
187 return False
188
189 res = try_get(challenge_results, lambda x: x[0][-1], list)
190 if not res:
191 warn('Unable to extract result entry')
192 return False
193
194 login_challenge = try_get(res, lambda x: x[0][0], list)
195 if login_challenge:
196 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
197 if challenge_str == 'TWO_STEP_VERIFICATION':
198 # SEND_SUCCESS - TFA code has been successfully sent to phone
199 # QUOTA_EXCEEDED - reached the limit of TFA codes
200 status = try_get(login_challenge, lambda x: x[5], compat_str)
201 if status == 'QUOTA_EXCEEDED':
202 warn('Exceeded the limit of TFA codes, try later')
203 return False
204
205 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
206 if not tl:
207 warn('Unable to extract TL')
208 return False
209
210 tfa_code = self._get_tfa_info('2-step verification code')
211
212 if not tfa_code:
213 warn(
214 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
215 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
216 return False
217
218 tfa_code = remove_start(tfa_code, 'G-')
219
220 tfa_req = [
221 user_hash, None, 2, None,
222 [
223 9, None, None, None, None, None, None, None,
224 [None, tfa_code, True, 2]
225 ]]
226
227 tfa_results = req(
228 self._TFA_URL.format(tl), tfa_req,
229 'Submitting TFA code', 'Unable to submit TFA code')
230
231 if tfa_results is False:
232 return False
233
234 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
235 if tfa_res:
236 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
237 warn(
238 'Unable to finish TFA: %s' % 'Invalid TFA code'
239 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
240 return False
241
242 check_cookie_url = try_get(
243 tfa_results, lambda x: x[0][-1][2], compat_str)
244 else:
245 CHALLENGES = {
246 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
247 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
248 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
249 }
250 challenge = CHALLENGES.get(
251 challenge_str,
252 '%s returned error %s.' % (self.IE_NAME, challenge_str))
253 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
254 return False
255 else:
256 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
257
258 if not check_cookie_url:
259 warn('Unable to extract CheckCookie URL')
260 return False
261
262 check_cookie_results = self._download_webpage(
263 check_cookie_url, None, 'Checking cookie', fatal=False)
264
265 if check_cookie_results is False:
266 return False
267
268 if 'https://myaccount.google.com/' not in check_cookie_results:
269 warn('Unable to log in')
270 return False
271
272 return True
273
274 def _download_webpage_handle(self, *args, **kwargs):
275 query = kwargs.get('query', {}).copy()
276 kwargs['query'] = query
277 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
278 *args, **compat_kwargs(kwargs))
279
280 def _get_yt_initial_data(self, video_id, webpage):
281 config = self._search_regex(
282 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
283 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
284 webpage, 'ytInitialData', default=None)
285 if config:
286 return self._parse_json(
287 uppercase_escape(config), video_id, fatal=False)
288
289 def _real_initialize(self):
290 if self._downloader is None:
291 return
292 self._set_language()
293 if not self._login():
294 return
295
296 _DEFAULT_API_DATA = {
297 'context': {
298 'client': {
299 'clientName': 'WEB',
300 'clientVersion': '2.20201021.03.00',
301 }
302 },
303 }
304
305 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
306 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
307 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
308
309 def _call_api(self, ep, query, video_id):
310 data = self._DEFAULT_API_DATA.copy()
311 data.update(query)
312
313 response = self._download_json(
314 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
315 note='Downloading API JSON', errnote='Unable to download API page',
316 data=json.dumps(data).encode('utf8'),
317 headers={'content-type': 'application/json'},
318 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
319
320 return response
321
322 def _extract_yt_initial_data(self, video_id, webpage):
323 return self._parse_json(
324 self._search_regex(
325 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
326 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
327 video_id)
328
329 def _extract_ytcfg(self, video_id, webpage):
330 return self._parse_json(
331 self._search_regex(
332 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
333 default='{}'), video_id, fatal=False)
334
335
336class YoutubeIE(YoutubeBaseInfoExtractor):
337 IE_DESC = 'YouTube.com'
338 _VALID_URL = r"""(?x)^
339 (
340 (?:https?://|//) # http(s):// or protocol-independent URL
341 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
342 (?:www\.)?deturl\.com/www\.youtube\.com/|
343 (?:www\.)?pwnyoutube\.com/|
344 (?:www\.)?hooktube\.com/|
345 (?:www\.)?yourepeat\.com/|
346 tube\.majestyc\.net/|
347 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
348 (?:(?:www|dev)\.)?invidio\.us/|
349 (?:(?:www|no)\.)?invidiou\.sh/|
350 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
351 (?:www\.)?invidious\.kabi\.tk/|
352 (?:www\.)?invidious\.13ad\.de/|
353 (?:www\.)?invidious\.mastodon\.host/|
354 (?:www\.)?invidious\.zapashcanon\.fr/|
355 (?:www\.)?invidious\.kavin\.rocks/|
356 (?:www\.)?invidious\.tube/|
357 (?:www\.)?invidiou\.site/|
358 (?:www\.)?invidious\.site/|
359 (?:www\.)?invidious\.xyz/|
360 (?:www\.)?invidious\.nixnet\.xyz/|
361 (?:www\.)?invidious\.drycat\.fr/|
362 (?:www\.)?tube\.poal\.co/|
363 (?:www\.)?tube\.connect\.cafe/|
364 (?:www\.)?vid\.wxzm\.sx/|
365 (?:www\.)?vid\.mint\.lgbt/|
366 (?:www\.)?yewtu\.be/|
367 (?:www\.)?yt\.elukerio\.org/|
368 (?:www\.)?yt\.lelux\.fi/|
369 (?:www\.)?invidious\.ggc-project\.de/|
370 (?:www\.)?yt\.maisputain\.ovh/|
371 (?:www\.)?invidious\.13ad\.de/|
372 (?:www\.)?invidious\.toot\.koeln/|
373 (?:www\.)?invidious\.fdn\.fr/|
374 (?:www\.)?watch\.nettohikari\.com/|
375 (?:www\.)?kgg2m7yk5aybusll\.onion/|
376 (?:www\.)?qklhadlycap4cnod\.onion/|
377 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
378 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
379 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
380 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
381 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
382 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
383 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
384 (?:.*?\#/)? # handle anchor (#/) redirect urls
385 (?: # the various things that can precede the ID:
386 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
387 |(?: # or the v= param in all its forms
388 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
389 (?:\?|\#!?) # the params delimiter ? or # or #!
390 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
391 v=
392 )
393 ))
394 |(?:
395 youtu\.be| # just youtu.be/xxxx
396 vid\.plus| # or vid.plus/xxxx
397 zwearz\.com/watch| # or zwearz.com/watch/xxxx
398 )/
399 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
400 )
401 )? # all until now is optional -> you can pass the naked ID
402 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
403 (?!.*?\blist=
404 (?:
405 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
406 WL # WL are handled by the watch later IE
407 )
408 )
409 (?(1).+)? # if we found the ID, everything can follow
410 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
411 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
412 _PLAYER_INFO_RE = (
413 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
414 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
415 )
416 _formats = {
417 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
418 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
419 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
420 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
421 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
422 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
423 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
424 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
425 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
426 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
427 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
429 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
430 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
431 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
432 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
433 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
434 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
435
436
437 # 3D videos
438 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
439 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
440 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
441 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
442 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
443 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
444 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
445
446 # Apple HTTP Live Streaming
447 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
448 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
449 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
450 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
451 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
452 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
453 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
455
456 # DASH mp4 video
457 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
458 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
459 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
460 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
461 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
462 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
463 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
467 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
468 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
469
470 # Dash mp4 audio
471 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
472 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
473 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
474 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
475 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
476 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
477 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
478
479 # Dash webm
480 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
481 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
482 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
483 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
484 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
485 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
487 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
488 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
489 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
490 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
491 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
492 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
495 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
496 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
498 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
499 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
500 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
502
503 # Dash webm audio
504 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
505 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
506
507 # Dash webm audio with opus inside
508 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
509 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
510 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
511
512 # RTMP (unnamed)
513 '_rtmp': {'protocol': 'rtmp'},
514
515 # av01 video only formats sometimes served with "unknown" codecs
516 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
517 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
518 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
519 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
520 }
521 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
522
523 _GEO_BYPASS = False
524
525 IE_NAME = 'youtube'
526 _TESTS = [
527 {
528 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
529 'info_dict': {
530 'id': 'BaW_jenozKc',
531 'ext': 'mp4',
532 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
533 'uploader': 'Philipp Hagemeister',
534 'uploader_id': 'phihag',
535 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
536 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
537 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
538 'upload_date': '20121002',
539 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
540 'categories': ['Science & Technology'],
541 'tags': ['youtube-dl'],
542 'duration': 10,
543 'view_count': int,
544 'like_count': int,
545 'dislike_count': int,
546 'start_time': 1,
547 'end_time': 9,
548 }
549 },
550 {
551 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
552 'note': 'Embed-only video (#1746)',
553 'info_dict': {
554 'id': 'yZIXLfi8CZQ',
555 'ext': 'mp4',
556 'upload_date': '20120608',
557 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
558 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
559 'uploader': 'SET India',
560 'uploader_id': 'setindia',
561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
562 'age_limit': 18,
563 }
564 },
565 {
566 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
567 'note': 'Use the first video ID in the URL',
568 'info_dict': {
569 'id': 'BaW_jenozKc',
570 'ext': 'mp4',
571 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
572 'uploader': 'Philipp Hagemeister',
573 'uploader_id': 'phihag',
574 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
575 'upload_date': '20121002',
576 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
577 'categories': ['Science & Technology'],
578 'tags': ['youtube-dl'],
579 'duration': 10,
580 'view_count': int,
581 'like_count': int,
582 'dislike_count': int,
583 },
584 'params': {
585 'skip_download': True,
586 },
587 },
588 {
589 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
590 'note': '256k DASH audio (format 141) via DASH manifest',
591 'info_dict': {
592 'id': 'a9LDPn-MO4I',
593 'ext': 'm4a',
594 'upload_date': '20121002',
595 'uploader_id': '8KVIDEO',
596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
597 'description': '',
598 'uploader': '8KVIDEO',
599 'title': 'UHDTV TEST 8K VIDEO.mp4'
600 },
601 'params': {
602 'youtube_include_dash_manifest': True,
603 'format': '141',
604 },
605 'skip': 'format 141 not served anymore',
606 },
607 # DASH manifest with encrypted signature
608 {
609 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
610 'info_dict': {
611 'id': 'IB3lcPjvWLA',
612 'ext': 'm4a',
613 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
614 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
615 'duration': 244,
616 'uploader': 'AfrojackVEVO',
617 'uploader_id': 'AfrojackVEVO',
618 'upload_date': '20131011',
619 },
620 'params': {
621 'youtube_include_dash_manifest': True,
622 'format': '141/bestaudio[ext=m4a]',
623 },
624 },
625 # Controversy video
626 {
627 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
628 'info_dict': {
629 'id': 'T4XJQO3qol8',
630 'ext': 'mp4',
631 'duration': 219,
632 'upload_date': '20100909',
633 'uploader': 'Amazing Atheist',
634 'uploader_id': 'TheAmazingAtheist',
635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
636 'title': 'Burning Everyone\'s Koran',
637 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
638 }
639 },
640 # Normal age-gate video (embed allowed)
641 {
642 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
643 'info_dict': {
644 'id': 'HtVdAasjOgU',
645 'ext': 'mp4',
646 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
647 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
648 'duration': 142,
649 'uploader': 'The Witcher',
650 'uploader_id': 'WitcherGame',
651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
652 'upload_date': '20140605',
653 'age_limit': 18,
654 },
655 },
656 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
657 # YouTube Red ad is not captured for creator
658 {
659 'url': '__2ABJjxzNo',
660 'info_dict': {
661 'id': '__2ABJjxzNo',
662 'ext': 'mp4',
663 'duration': 266,
664 'upload_date': '20100430',
665 'uploader_id': 'deadmau5',
666 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
667 'creator': 'Dada Life, deadmau5',
668 'description': 'md5:12c56784b8032162bb936a5f76d55360',
669 'uploader': 'deadmau5',
670 'title': 'Deadmau5 - Some Chords (HD)',
671 'alt_title': 'This Machine Kills Some Chords',
672 },
673 'expected_warnings': [
674 'DASH manifest missing',
675 ]
676 },
677 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
678 {
679 'url': 'lqQg6PlCWgI',
680 'info_dict': {
681 'id': 'lqQg6PlCWgI',
682 'ext': 'mp4',
683 'duration': 6085,
684 'upload_date': '20150827',
685 'uploader_id': 'olympic',
686 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
687 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
688 'uploader': 'Olympic',
689 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
690 },
691 'params': {
692 'skip_download': 'requires avconv',
693 }
694 },
695 # Non-square pixels
696 {
697 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
698 'info_dict': {
699 'id': '_b-2C3KPAM0',
700 'ext': 'mp4',
701 'stretched_ratio': 16 / 9.,
702 'duration': 85,
703 'upload_date': '20110310',
704 'uploader_id': 'AllenMeow',
705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
706 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
707 'uploader': '孫ᄋᄅ',
708 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
709 },
710 },
711 # url_encoded_fmt_stream_map is empty string
712 {
713 'url': 'qEJwOuvDf7I',
714 'info_dict': {
715 'id': 'qEJwOuvDf7I',
716 'ext': 'webm',
717 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
718 'description': '',
719 'upload_date': '20150404',
720 'uploader_id': 'spbelect',
721 'uploader': 'Наблюдатели Петербурга',
722 },
723 'params': {
724 'skip_download': 'requires avconv',
725 },
726 'skip': 'This live event has ended.',
727 },
728 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
729 {
730 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
731 'info_dict': {
732 'id': 'FIl7x6_3R5Y',
733 'ext': 'webm',
734 'title': 'md5:7b81415841e02ecd4313668cde88737a',
735 'description': 'md5:116377fd2963b81ec4ce64b542173306',
736 'duration': 220,
737 'upload_date': '20150625',
738 'uploader_id': 'dorappi2000',
739 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
740 'uploader': 'dorappi2000',
741 'formats': 'mincount:31',
742 },
743 'skip': 'not actual anymore',
744 },
745 # DASH manifest with segment_list
746 {
747 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
748 'md5': '8ce563a1d667b599d21064e982ab9e31',
749 'info_dict': {
750 'id': 'CsmdDsKjzN8',
751 'ext': 'mp4',
752 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
753 'uploader': 'Airtek',
754 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
755 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
756 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
757 },
758 'params': {
759 'youtube_include_dash_manifest': True,
760 'format': '135', # bestvideo
761 },
762 'skip': 'This live event has ended.',
763 },
764 {
765 # Multifeed videos (multiple cameras), URL is for Main Camera
766 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
767 'info_dict': {
768 'id': 'jqWvoWXjCVs',
769 'title': 'teamPGP: Rocket League Noob Stream',
770 'description': 'md5:dc7872fb300e143831327f1bae3af010',
771 },
772 'playlist': [{
773 'info_dict': {
774 'id': 'jqWvoWXjCVs',
775 'ext': 'mp4',
776 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
777 'description': 'md5:dc7872fb300e143831327f1bae3af010',
778 'duration': 7335,
779 'upload_date': '20150721',
780 'uploader': 'Beer Games Beer',
781 'uploader_id': 'beergamesbeer',
782 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
783 'license': 'Standard YouTube License',
784 },
785 }, {
786 'info_dict': {
787 'id': '6h8e8xoXJzg',
788 'ext': 'mp4',
789 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
790 'description': 'md5:dc7872fb300e143831327f1bae3af010',
791 'duration': 7337,
792 'upload_date': '20150721',
793 'uploader': 'Beer Games Beer',
794 'uploader_id': 'beergamesbeer',
795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
796 'license': 'Standard YouTube License',
797 },
798 }, {
799 'info_dict': {
800 'id': 'PUOgX5z9xZw',
801 'ext': 'mp4',
802 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
803 'description': 'md5:dc7872fb300e143831327f1bae3af010',
804 'duration': 7337,
805 'upload_date': '20150721',
806 'uploader': 'Beer Games Beer',
807 'uploader_id': 'beergamesbeer',
808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
809 'license': 'Standard YouTube License',
810 },
811 }, {
812 'info_dict': {
813 'id': 'teuwxikvS5k',
814 'ext': 'mp4',
815 'title': 'teamPGP: Rocket League Noob Stream (zim)',
816 'description': 'md5:dc7872fb300e143831327f1bae3af010',
817 'duration': 7334,
818 'upload_date': '20150721',
819 'uploader': 'Beer Games Beer',
820 'uploader_id': 'beergamesbeer',
821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
822 'license': 'Standard YouTube License',
823 },
824 }],
825 'params': {
826 'skip_download': True,
827 },
828 'skip': 'This video is not available.',
829 },
830 {
831 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
832 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
833 'info_dict': {
834 'id': 'gVfLd0zydlo',
835 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
836 },
837 'playlist_count': 2,
838 'skip': 'Not multifeed anymore',
839 },
840 {
841 'url': 'https://vid.plus/FlRa-iH7PGw',
842 'only_matching': True,
843 },
844 {
845 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
846 'only_matching': True,
847 },
848 {
849 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
850 # Also tests cut-off URL expansion in video description (see
851 # https://github.com/ytdl-org/youtube-dl/issues/1892,
852 # https://github.com/ytdl-org/youtube-dl/issues/8164)
853 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
854 'info_dict': {
855 'id': 'lsguqyKfVQg',
856 'ext': 'mp4',
857 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
858 'alt_title': 'Dark Walk - Position Music',
859 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
860 'duration': 133,
861 'upload_date': '20151119',
862 'uploader_id': 'IronSoulElf',
863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
864 'uploader': 'IronSoulElf',
865 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
866 'track': 'Dark Walk - Position Music',
867 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
868 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
869 },
870 'params': {
871 'skip_download': True,
872 },
873 },
874 {
875 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
876 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
877 'only_matching': True,
878 },
879 {
880 # Video with yt:stretch=17:0
881 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
882 'info_dict': {
883 'id': 'Q39EVAstoRM',
884 'ext': 'mp4',
885 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
886 'description': 'md5:ee18a25c350637c8faff806845bddee9',
887 'upload_date': '20151107',
888 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
889 'uploader': 'CH GAMER DROID',
890 },
891 'params': {
892 'skip_download': True,
893 },
894 'skip': 'This video does not exist.',
895 },
896 {
897 # Video licensed under Creative Commons
898 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
899 'info_dict': {
900 'id': 'M4gD1WSo5mA',
901 'ext': 'mp4',
902 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
903 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
904 'duration': 721,
905 'upload_date': '20150127',
906 'uploader_id': 'BerkmanCenter',
907 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
908 'uploader': 'The Berkman Klein Center for Internet & Society',
909 'license': 'Creative Commons Attribution license (reuse allowed)',
910 },
911 'params': {
912 'skip_download': True,
913 },
914 },
915 {
916 # Channel-like uploader_url
917 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
918 'info_dict': {
919 'id': 'eQcmzGIKrzg',
920 'ext': 'mp4',
921 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
922 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
923 'duration': 4060,
924 'upload_date': '20151119',
925 'uploader': 'Bernie Sanders',
926 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
927 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
928 'license': 'Creative Commons Attribution license (reuse allowed)',
929 },
930 'params': {
931 'skip_download': True,
932 },
933 },
934 {
935 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
936 'only_matching': True,
937 },
938 {
939 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
940 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
941 'only_matching': True,
942 },
943 {
944 # Rental video preview
945 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
946 'info_dict': {
947 'id': 'uGpuVWrhIzE',
948 'ext': 'mp4',
949 'title': 'Piku - Trailer',
950 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
951 'upload_date': '20150811',
952 'uploader': 'FlixMatrix',
953 'uploader_id': 'FlixMatrixKaravan',
954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
955 'license': 'Standard YouTube License',
956 },
957 'params': {
958 'skip_download': True,
959 },
960 'skip': 'This video is not available.',
961 },
962 {
963 # YouTube Red video with episode data
964 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
965 'info_dict': {
966 'id': 'iqKdEhx-dD4',
967 'ext': 'mp4',
968 'title': 'Isolation - Mind Field (Ep 1)',
969 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
970 'duration': 2085,
971 'upload_date': '20170118',
972 'uploader': 'Vsauce',
973 'uploader_id': 'Vsauce',
974 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
975 'series': 'Mind Field',
976 'season_number': 1,
977 'episode_number': 1,
978 },
979 'params': {
980 'skip_download': True,
981 },
982 'expected_warnings': [
983 'Skipping DASH manifest',
984 ],
985 },
986 {
987 # The following content has been identified by the YouTube community
988 # as inappropriate or offensive to some audiences.
989 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
990 'info_dict': {
991 'id': '6SJNVb0GnPI',
992 'ext': 'mp4',
993 'title': 'Race Differences in Intelligence',
994 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
995 'duration': 965,
996 'upload_date': '20140124',
997 'uploader': 'New Century Foundation',
998 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
1004 },
1005 {
1006 # itag 212
1007 'url': '1t24XAntNCY',
1008 'only_matching': True,
1009 },
1010 {
1011 # geo restricted to JP
1012 'url': 'sJL6WA-aGkQ',
1013 'only_matching': True,
1014 },
1015 {
1016 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1017 'only_matching': True,
1018 },
1019 {
1020 # DRM protected
1021 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1022 'only_matching': True,
1023 },
1024 {
1025 # Video with unsupported adaptive stream type formats
1026 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1027 'info_dict': {
1028 'id': 'Z4Vy8R84T1U',
1029 'ext': 'mp4',
1030 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1031 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1032 'duration': 433,
1033 'upload_date': '20130923',
1034 'uploader': 'Amelia Putri Harwita',
1035 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1036 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1037 'formats': 'maxcount:10',
1038 },
1039 'params': {
1040 'skip_download': True,
1041 'youtube_include_dash_manifest': False,
1042 },
1043 'skip': 'not actual anymore',
1044 },
1045 {
1046 # Youtube Music Auto-generated description
1047 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1048 'info_dict': {
1049 'id': 'MgNrAu2pzNs',
1050 'ext': 'mp4',
1051 'title': 'Voyeur Girl',
1052 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1053 'upload_date': '20190312',
1054 'uploader': 'Stephen - Topic',
1055 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1056 'artist': 'Stephen',
1057 'track': 'Voyeur Girl',
1058 'album': 'it\'s too much love to know my dear',
1059 'release_date': '20190313',
1060 'release_year': 2019,
1061 },
1062 'params': {
1063 'skip_download': True,
1064 },
1065 },
1066 {
1067 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1068 'only_matching': True,
1069 },
1070 {
1071 # invalid -> valid video id redirection
1072 'url': 'DJztXj2GPfl',
1073 'info_dict': {
1074 'id': 'DJztXj2GPfk',
1075 'ext': 'mp4',
1076 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1077 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1078 'upload_date': '20090125',
1079 'uploader': 'Prochorowka',
1080 'uploader_id': 'Prochorowka',
1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1082 'artist': 'Panjabi MC',
1083 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1084 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1085 },
1086 'params': {
1087 'skip_download': True,
1088 },
1089 },
1090 {
1091 # empty description results in an empty string
1092 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1093 'info_dict': {
1094 'id': 'x41yOUIvK2k',
1095 'ext': 'mp4',
1096 'title': 'IMG 3456',
1097 'description': '',
1098 'upload_date': '20170613',
1099 'uploader_id': 'ElevageOrVert',
1100 'uploader': 'ElevageOrVert',
1101 },
1102 'params': {
1103 'skip_download': True,
1104 },
1105 },
1106 {
1107 # with '};' inside yt initial data (see [1])
1108 # see [2] for an example with '};' inside ytInitialPlayerResponse
1109 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1110 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1111 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1112 'info_dict': {
1113 'id': 'CHqg6qOn4no',
1114 'ext': 'mp4',
1115 'title': 'Part 77 Sort a list of simple types in c#',
1116 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1117 'upload_date': '20130831',
1118 'uploader_id': 'kudvenkat',
1119 'uploader': 'kudvenkat',
1120 },
1121 'params': {
1122 'skip_download': True,
1123 },
1124 },
1125 {
1126 # another example of '};' in ytInitialData
1127 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1128 'only_matching': True,
1129 },
1130 {
1131 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1132 'only_matching': True,
1133 },
1134 ]
1135
1136 def __init__(self, *args, **kwargs):
1137 super(YoutubeIE, self).__init__(*args, **kwargs)
1138 self._player_cache = {}
1139
1140 def report_video_info_webpage_download(self, video_id):
1141 """Report attempt to download video info webpage."""
1142 self.to_screen('%s: Downloading video info webpage' % video_id)
1143
1144 def report_information_extraction(self, video_id):
1145 """Report attempt to extract video information."""
1146 self.to_screen('%s: Extracting video information' % video_id)
1147
1148 def report_unavailable_format(self, video_id, format):
1149 """Report extracted video URL."""
1150 self.to_screen('%s: Format %s not available' % (video_id, format))
1151
1152 def report_rtmp_download(self):
1153 """Indicate the download will use the RTMP protocol."""
1154 self.to_screen('RTMP download detected')
1155
1156 def _signature_cache_id(self, example_sig):
1157 """ Return a string representation of a signature """
1158 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1159
1160 @classmethod
1161 def _extract_player_info(cls, player_url):
1162 for player_re in cls._PLAYER_INFO_RE:
1163 id_m = re.search(player_re, player_url)
1164 if id_m:
1165 break
1166 else:
1167 raise ExtractorError('Cannot identify player %r' % player_url)
1168 return id_m.group('ext'), id_m.group('id')
1169
1170 def _extract_signature_function(self, video_id, player_url, example_sig):
1171 player_type, player_id = self._extract_player_info(player_url)
1172
1173 # Read from filesystem cache
1174 func_id = '%s_%s_%s' % (
1175 player_type, player_id, self._signature_cache_id(example_sig))
1176 assert os.path.basename(func_id) == func_id
1177
1178 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1179 if cache_spec is not None:
1180 return lambda s: ''.join(s[i] for i in cache_spec)
1181
1182 download_note = (
1183 'Downloading player %s' % player_url
1184 if self._downloader.params.get('verbose') else
1185 'Downloading %s player %s' % (player_type, player_id)
1186 )
1187 if player_type == 'js':
1188 code = self._download_webpage(
1189 player_url, video_id,
1190 note=download_note,
1191 errnote='Download of %s failed' % player_url)
1192 res = self._parse_sig_js(code)
1193 elif player_type == 'swf':
1194 urlh = self._request_webpage(
1195 player_url, video_id,
1196 note=download_note,
1197 errnote='Download of %s failed' % player_url)
1198 code = urlh.read()
1199 res = self._parse_sig_swf(code)
1200 else:
1201 assert False, 'Invalid player type %r' % player_type
1202
1203 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1204 cache_res = res(test_string)
1205 cache_spec = [ord(c) for c in cache_res]
1206
1207 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1208 return res
1209
1210 def _print_sig_code(self, func, example_sig):
1211 def gen_sig_code(idxs):
1212 def _genslice(start, end, step):
1213 starts = '' if start == 0 else str(start)
1214 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1215 steps = '' if step == 1 else (':%d' % step)
1216 return 's[%s%s%s]' % (starts, ends, steps)
1217
1218 step = None
1219 # Quelch pyflakes warnings - start will be set when step is set
1220 start = '(Never used)'
1221 for i, prev in zip(idxs[1:], idxs[:-1]):
1222 if step is not None:
1223 if i - prev == step:
1224 continue
1225 yield _genslice(start, prev, step)
1226 step = None
1227 continue
1228 if i - prev in [-1, 1]:
1229 step = i - prev
1230 start = prev
1231 continue
1232 else:
1233 yield 's[%d]' % prev
1234 if step is None:
1235 yield 's[%d]' % i
1236 else:
1237 yield _genslice(start, i, step)
1238
1239 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1240 cache_res = func(test_string)
1241 cache_spec = [ord(c) for c in cache_res]
1242 expr_code = ' + '.join(gen_sig_code(cache_spec))
1243 signature_id_tuple = '(%s)' % (
1244 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1245 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1246 ' return %s\n') % (signature_id_tuple, expr_code)
1247 self.to_screen('Extracted signature function:\n' + code)
1248
1249 def _parse_sig_js(self, jscode):
1250 funcname = self._search_regex(
1251 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1252 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1253 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1254 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1255 # Obsolete patterns
1256 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1257 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1258 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1259 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1260 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1261 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1263 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1264 jscode, 'Initial JS player signature function name', group='sig')
1265
1266 jsi = JSInterpreter(jscode)
1267 initial_function = jsi.extract_function(funcname)
1268 return lambda s: initial_function([s])
1269
1270 def _parse_sig_swf(self, file_contents):
1271 swfi = SWFInterpreter(file_contents)
1272 TARGET_CLASSNAME = 'SignatureDecipher'
1273 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1274 initial_function = swfi.extract_function(searched_class, 'decipher')
1275 return lambda s: initial_function([s])
1276
1277 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1278 """Turn the encrypted s field into a working signature"""
1279
1280 if player_url is None:
1281 raise ExtractorError('Cannot decrypt signature without player_url')
1282
1283 if player_url.startswith('//'):
1284 player_url = 'https:' + player_url
1285 elif not re.match(r'https?://', player_url):
1286 player_url = compat_urlparse.urljoin(
1287 'https://www.youtube.com', player_url)
1288 try:
1289 player_id = (player_url, self._signature_cache_id(s))
1290 if player_id not in self._player_cache:
1291 func = self._extract_signature_function(
1292 video_id, player_url, s
1293 )
1294 self._player_cache[player_id] = func
1295 func = self._player_cache[player_id]
1296 if self._downloader.params.get('youtube_print_sig_code'):
1297 self._print_sig_code(func, s)
1298 return func(s)
1299 except Exception as e:
1300 tb = traceback.format_exc()
1301 raise ExtractorError(
1302 'Signature extraction failed: ' + tb, cause=e)
1303
1304 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1305 try:
1306 subs_doc = self._download_xml(
1307 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1308 video_id, note=False)
1309 except ExtractorError as err:
1310 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1311 return {}
1312
1313 sub_lang_list = {}
1314 for track in subs_doc.findall('track'):
1315 lang = track.attrib['lang_code']
1316 if lang in sub_lang_list:
1317 continue
1318 sub_formats = []
1319 for ext in self._SUBTITLE_FORMATS:
1320 params = compat_urllib_parse_urlencode({
1321 'lang': lang,
1322 'v': video_id,
1323 'fmt': ext,
1324 'name': track.attrib['name'].encode('utf-8'),
1325 })
1326 sub_formats.append({
1327 'url': 'https://www.youtube.com/api/timedtext?' + params,
1328 'ext': ext,
1329 })
1330 sub_lang_list[lang] = sub_formats
1331 if has_live_chat_replay:
1332 sub_lang_list['live_chat'] = [
1333 {
1334 'video_id': video_id,
1335 'ext': 'json',
1336 'protocol': 'youtube_live_chat_replay',
1337 },
1338 ]
1339 if not sub_lang_list:
1340 self._downloader.report_warning('video doesn\'t have subtitles')
1341 return {}
1342 return sub_lang_list
1343
1344 def _get_ytplayer_config(self, video_id, webpage):
1345 patterns = (
1346 # User data may contain arbitrary character sequences that may affect
1347 # JSON extraction with regex, e.g. when '};' is contained the second
1348 # regex won't capture the whole JSON. Yet working around by trying more
1349 # concrete regex first keeping in mind proper quoted string handling
1350 # to be implemented in future that will replace this workaround (see
1351 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1352 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1353 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1354 r';ytplayer\.config\s*=\s*({.+?});',
1355 )
1356 config = self._search_regex(
1357 patterns, webpage, 'ytplayer.config', default=None)
1358 if config:
1359 return self._parse_json(
1360 uppercase_escape(config), video_id, fatal=False)
1361
1362 def _get_automatic_captions(self, video_id, player_response, player_config):
1363 """We need the webpage for getting the captions url, pass it as an
1364 argument to speed up the process."""
1365 self.to_screen('%s: Looking for automatic captions' % video_id)
1366 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1367 if not (player_response or player_config):
1368 self._downloader.report_warning(err_msg)
1369 return {}
1370 try:
1371 args = player_config.get('args') if player_config else {}
1372 caption_url = args.get('ttsurl')
1373 if caption_url:
1374 timestamp = args['timestamp']
1375 # We get the available subtitles
1376 list_params = compat_urllib_parse_urlencode({
1377 'type': 'list',
1378 'tlangs': 1,
1379 'asrs': 1,
1380 })
1381 list_url = caption_url + '&' + list_params
1382 caption_list = self._download_xml(list_url, video_id)
1383 original_lang_node = caption_list.find('track')
1384 if original_lang_node is None:
1385 self._downloader.report_warning('Video doesn\'t have automatic captions')
1386 return {}
1387 original_lang = original_lang_node.attrib['lang_code']
1388 caption_kind = original_lang_node.attrib.get('kind', '')
1389
1390 sub_lang_list = {}
1391 for lang_node in caption_list.findall('target'):
1392 sub_lang = lang_node.attrib['lang_code']
1393 sub_formats = []
1394 for ext in self._SUBTITLE_FORMATS:
1395 params = compat_urllib_parse_urlencode({
1396 'lang': original_lang,
1397 'tlang': sub_lang,
1398 'fmt': ext,
1399 'ts': timestamp,
1400 'kind': caption_kind,
1401 })
1402 sub_formats.append({
1403 'url': caption_url + '&' + params,
1404 'ext': ext,
1405 })
1406 sub_lang_list[sub_lang] = sub_formats
1407 return sub_lang_list
1408
1409 def make_captions(sub_url, sub_langs):
1410 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1411 caption_qs = compat_parse_qs(parsed_sub_url.query)
1412 captions = {}
1413 for sub_lang in sub_langs:
1414 sub_formats = []
1415 for ext in self._SUBTITLE_FORMATS:
1416 caption_qs.update({
1417 'tlang': [sub_lang],
1418 'fmt': [ext],
1419 })
1420 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1421 query=compat_urllib_parse_urlencode(caption_qs, True)))
1422 sub_formats.append({
1423 'url': sub_url,
1424 'ext': ext,
1425 })
1426 captions[sub_lang] = sub_formats
1427 return captions
1428
1429 # New captions format as of 22.06.2017
1430 if player_response:
1431 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1432 base_url = renderer['captionTracks'][0]['baseUrl']
1433 sub_lang_list = []
1434 for lang in renderer['translationLanguages']:
1435 lang_code = lang.get('languageCode')
1436 if lang_code:
1437 sub_lang_list.append(lang_code)
1438 return make_captions(base_url, sub_lang_list)
1439
1440 # Some videos don't provide ttsurl but rather caption_tracks and
1441 # caption_translation_languages (e.g. 20LmZk1hakA)
1442 # Does not used anymore as of 22.06.2017
1443 caption_tracks = args['caption_tracks']
1444 caption_translation_languages = args['caption_translation_languages']
1445 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1446 sub_lang_list = []
1447 for lang in caption_translation_languages.split(','):
1448 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1449 sub_lang = lang_qs.get('lc', [None])[0]
1450 if sub_lang:
1451 sub_lang_list.append(sub_lang)
1452 return make_captions(caption_url, sub_lang_list)
1453 # An extractor error can be raise by the download process if there are
1454 # no automatic captions but there are subtitles
1455 except (KeyError, IndexError, ExtractorError):
1456 self._downloader.report_warning(err_msg)
1457 return {}
1458
1459 def _mark_watched(self, video_id, video_info, player_response):
1460 playback_url = url_or_none(try_get(
1461 player_response,
1462 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1463 video_info, lambda x: x['videostats_playback_base_url'][0]))
1464 if not playback_url:
1465 return
1466 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1467 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1468
1469 # cpn generation algorithm is reverse engineered from base.js.
1470 # In fact it works even with dummy cpn.
1471 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1472 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1473
1474 qs.update({
1475 'ver': ['2'],
1476 'cpn': [cpn],
1477 })
1478 playback_url = compat_urlparse.urlunparse(
1479 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1480
1481 self._download_webpage(
1482 playback_url, video_id, 'Marking watched',
1483 'Unable to mark watched', fatal=False)
1484
1485 @staticmethod
1486 def _extract_urls(webpage):
1487 # Embedded YouTube player
1488 entries = [
1489 unescapeHTML(mobj.group('url'))
1490 for mobj in re.finditer(r'''(?x)
1491 (?:
1492 <iframe[^>]+?src=|
1493 data-video-url=|
1494 <embed[^>]+?src=|
1495 embedSWF\(?:\s*|
1496 <object[^>]+data=|
1497 new\s+SWFObject\(
1498 )
1499 (["\'])
1500 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1501 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1502 \1''', webpage)]
1503
1504 # lazyYT YouTube embed
1505 entries.extend(list(map(
1506 unescapeHTML,
1507 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1508
1509 # Wordpress "YouTube Video Importer" plugin
1510 matches = re.findall(r'''(?x)<div[^>]+
1511 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1512 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1513 entries.extend(m[-1] for m in matches)
1514
1515 return entries
1516
1517 @staticmethod
1518 def _extract_url(webpage):
1519 urls = YoutubeIE._extract_urls(webpage)
1520 return urls[0] if urls else None
1521
1522 @classmethod
1523 def extract_id(cls, url):
1524 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1525 if mobj is None:
1526 raise ExtractorError('Invalid URL: %s' % url)
1527 video_id = mobj.group(2)
1528 return video_id
1529
1530 def _extract_chapters_from_json(self, webpage, video_id, duration):
1531 if not webpage:
1532 return
1533 data = self._extract_yt_initial_data(video_id, webpage)
1534 if not data or not isinstance(data, dict):
1535 return
1536 chapters_list = try_get(
1537 data,
1538 lambda x: x['playerOverlays']
1539 ['playerOverlayRenderer']
1540 ['decoratedPlayerBarRenderer']
1541 ['decoratedPlayerBarRenderer']
1542 ['playerBar']
1543 ['chapteredPlayerBarRenderer']
1544 ['chapters'],
1545 list)
1546 if not chapters_list:
1547 return
1548
1549 def chapter_time(chapter):
1550 return float_or_none(
1551 try_get(
1552 chapter,
1553 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1554 int),
1555 scale=1000)
1556 chapters = []
1557 for next_num, chapter in enumerate(chapters_list, start=1):
1558 start_time = chapter_time(chapter)
1559 if start_time is None:
1560 continue
1561 end_time = (chapter_time(chapters_list[next_num])
1562 if next_num < len(chapters_list) else duration)
1563 if end_time is None:
1564 continue
1565 title = try_get(
1566 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1567 compat_str)
1568 chapters.append({
1569 'start_time': start_time,
1570 'end_time': end_time,
1571 'title': title,
1572 })
1573 return chapters
1574
1575 @staticmethod
1576 def _extract_chapters_from_description(description, duration):
1577 if not description:
1578 return None
1579 chapter_lines = re.findall(
1580 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1581 description)
1582 if not chapter_lines:
1583 return None
1584 chapters = []
1585 for next_num, (chapter_line, time_point) in enumerate(
1586 chapter_lines, start=1):
1587 start_time = parse_duration(time_point)
1588 if start_time is None:
1589 continue
1590 if start_time > duration:
1591 break
1592 end_time = (duration if next_num == len(chapter_lines)
1593 else parse_duration(chapter_lines[next_num][1]))
1594 if end_time is None:
1595 continue
1596 if end_time > duration:
1597 end_time = duration
1598 if start_time > end_time:
1599 break
1600 chapter_title = re.sub(
1601 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1602 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1603 chapters.append({
1604 'start_time': start_time,
1605 'end_time': end_time,
1606 'title': chapter_title,
1607 })
1608 return chapters
1609
1610 def _extract_chapters(self, webpage, description, video_id, duration):
1611 return (self._extract_chapters_from_json(webpage, video_id, duration)
1612 or self._extract_chapters_from_description(description, duration))
1613
1614 def _real_extract(self, url):
1615 url, smuggled_data = unsmuggle_url(url, {})
1616
1617 proto = (
1618 'http' if self._downloader.params.get('prefer_insecure', False)
1619 else 'https')
1620
1621 start_time = None
1622 end_time = None
1623 parsed_url = compat_urllib_parse_urlparse(url)
1624 for component in [parsed_url.fragment, parsed_url.query]:
1625 query = compat_parse_qs(component)
1626 if start_time is None and 't' in query:
1627 start_time = parse_duration(query['t'][0])
1628 if start_time is None and 'start' in query:
1629 start_time = parse_duration(query['start'][0])
1630 if end_time is None and 'end' in query:
1631 end_time = parse_duration(query['end'][0])
1632
1633 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1634 mobj = re.search(self._NEXT_URL_RE, url)
1635 if mobj:
1636 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1637 video_id = self.extract_id(url)
1638
1639 # Get video webpage
1640 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1641 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1642
1643 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1644 video_id = qs.get('v', [None])[0] or video_id
1645
1646 # Attempt to extract SWF player URL
1647 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1648 if mobj is not None:
1649 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1650 else:
1651 player_url = None
1652
1653 dash_mpds = []
1654
1655 def add_dash_mpd(video_info):
1656 dash_mpd = video_info.get('dashmpd')
1657 if dash_mpd and dash_mpd[0] not in dash_mpds:
1658 dash_mpds.append(dash_mpd[0])
1659
1660 def add_dash_mpd_pr(pl_response):
1661 dash_mpd = url_or_none(try_get(
1662 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1663 compat_str))
1664 if dash_mpd and dash_mpd not in dash_mpds:
1665 dash_mpds.append(dash_mpd)
1666
1667 is_live = None
1668 view_count = None
1669
1670 def extract_view_count(v_info):
1671 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1672
1673 def extract_player_response(player_response, video_id):
1674 pl_response = str_or_none(player_response)
1675 if not pl_response:
1676 return
1677 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1678 if isinstance(pl_response, dict):
1679 add_dash_mpd_pr(pl_response)
1680 return pl_response
1681
1682 def extract_embedded_config(embed_webpage, video_id):
1683 embedded_config = self._search_regex(
1684 r'setConfig\(({.*})\);',
1685 embed_webpage, 'ytInitialData', default=None)
1686 if embedded_config:
1687 return embedded_config
1688
1689 video_info = {}
1690 player_response = {}
1691 ytplayer_config = None
1692 embed_webpage = None
1693
1694 # Get video info
1695 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1696 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1697 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1698 age_gate = True
1699 # We simulate the access to the video from www.youtube.com/v/{video_id}
1700 # this can be viewed without login into Youtube
1701 url = proto + '://www.youtube.com/embed/%s' % video_id
1702 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1703 ext = extract_embedded_config(embed_webpage, video_id)
1704 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1705 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1706 if not playable_in_embed:
1707 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1708 playable_in_embed = ''
1709 else:
1710 playable_in_embed = playable_in_embed.group('playableinEmbed')
1711 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1712 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1713 if playable_in_embed == 'false':
1714 '''
1715 # TODO apply this patch when Support for Python 2.6(!) and above drops
1716 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1717 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1718 '''
1719 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1720 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1721 age_gate = False
1722 # Try looking directly into the video webpage
1723 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1724 if ytplayer_config:
1725 args = ytplayer_config.get("args")
1726 if args is not None:
1727 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1728 # Convert to the same format returned by compat_parse_qs
1729 video_info = dict((k, [v]) for k, v in args.items())
1730 add_dash_mpd(video_info)
1731 # Rental video is not rented but preview is available (e.g.
1732 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1733 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1734 if not video_info and args.get('ypc_vid'):
1735 return self.url_result(
1736 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1737 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1738 is_live = True
1739 if not player_response:
1740 player_response = extract_player_response(args.get('player_response'), video_id)
1741 elif not player_response:
1742 player_response = ytplayer_config
1743 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1744 add_dash_mpd_pr(player_response)
1745 else:
1746 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1747 else:
1748 data = compat_urllib_parse_urlencode({
1749 'video_id': video_id,
1750 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1751 'sts': self._search_regex(
1752 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1753 })
1754 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1755 try:
1756 video_info_webpage = self._download_webpage(
1757 video_info_url, video_id,
1758 note='Refetching age-gated info webpage',
1759 errnote='unable to download video info webpage')
1760 except ExtractorError:
1761 video_info_webpage = None
1762 if video_info_webpage:
1763 video_info = compat_parse_qs(video_info_webpage)
1764 pl_response = video_info.get('player_response', [None])[0]
1765 player_response = extract_player_response(pl_response, video_id)
1766 add_dash_mpd(video_info)
1767 view_count = extract_view_count(video_info)
1768 else:
1769 age_gate = False
1770 # Try looking directly into the video webpage
1771 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1772 if ytplayer_config:
1773 args = ytplayer_config.get('args', {})
1774 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1775 # Convert to the same format returned by compat_parse_qs
1776 video_info = dict((k, [v]) for k, v in args.items())
1777 add_dash_mpd(video_info)
1778 # Rental video is not rented but preview is available (e.g.
1779 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1780 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1781 if not video_info and args.get('ypc_vid'):
1782 return self.url_result(
1783 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1784 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1785 is_live = True
1786 if not player_response:
1787 player_response = extract_player_response(args.get('player_response'), video_id)
1788 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1789 add_dash_mpd_pr(player_response)
1790
1791 if not video_info and not player_response:
1792 player_response = extract_player_response(
1793 self._search_regex(
1794 (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1795 self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1796 'initial player response', default='{}'),
1797 video_id)
1798
1799 def extract_unavailable_message():
1800 messages = []
1801 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1802 msg = self._html_search_regex(
1803 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1804 video_webpage, 'unavailable %s' % kind, default=None)
1805 if msg:
1806 messages.append(msg)
1807 if messages:
1808 return '\n'.join(messages)
1809
1810 if not video_info and not player_response:
1811 unavailable_message = extract_unavailable_message()
1812 if not unavailable_message:
1813 unavailable_message = 'Unable to extract video data'
1814 raise ExtractorError(
1815 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1816
1817 if not isinstance(video_info, dict):
1818 video_info = {}
1819
1820 playable_in_embed = try_get(
1821 player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1822
1823 video_details = try_get(
1824 player_response, lambda x: x['videoDetails'], dict) or {}
1825
1826 microformat = try_get(
1827 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1828
1829 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1830 if not video_title:
1831 self._downloader.report_warning('Unable to extract video title')
1832 video_title = '_'
1833
1834 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1835 if video_description:
1836
1837 def replace_url(m):
1838 redir_url = compat_urlparse.urljoin(url, m.group(1))
1839 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1840 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1841 qs = compat_parse_qs(parsed_redir_url.query)
1842 q = qs.get('q')
1843 if q and q[0]:
1844 return q[0]
1845 return redir_url
1846
1847 description_original = video_description = re.sub(r'''(?x)
1848 <a\s+
1849 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1850 (?:title|href)="([^"]+)"\s+
1851 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1852 class="[^"]*"[^>]*>
1853 [^<]+\.{3}\s*
1854 </a>
1855 ''', replace_url, video_description)
1856 video_description = clean_html(video_description)
1857 else:
1858 video_description = video_details.get('shortDescription')
1859 if video_description is None:
1860 video_description = self._html_search_meta('description', video_webpage)
1861
1862 if not smuggled_data.get('force_singlefeed', False):
1863 if not self._downloader.params.get('noplaylist'):
1864 multifeed_metadata_list = try_get(
1865 player_response,
1866 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1867 compat_str) or try_get(
1868 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1869 if multifeed_metadata_list:
1870 entries = []
1871 feed_ids = []
1872 for feed in multifeed_metadata_list.split(','):
1873 # Unquote should take place before split on comma (,) since textual
1874 # fields may contain comma as well (see
1875 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1876 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1877
1878 def feed_entry(name):
1879 return try_get(feed_data, lambda x: x[name][0], compat_str)
1880
1881 feed_id = feed_entry('id')
1882 if not feed_id:
1883 continue
1884 feed_title = feed_entry('title')
1885 title = video_title
1886 if feed_title:
1887 title += ' (%s)' % feed_title
1888 entries.append({
1889 '_type': 'url_transparent',
1890 'ie_key': 'Youtube',
1891 'url': smuggle_url(
1892 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1893 {'force_singlefeed': True}),
1894 'title': title,
1895 })
1896 feed_ids.append(feed_id)
1897 self.to_screen(
1898 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1899 % (', '.join(feed_ids), video_id))
1900 return self.playlist_result(entries, video_id, video_title, video_description)
1901 else:
1902 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1903
1904 if view_count is None:
1905 view_count = extract_view_count(video_info)
1906 if view_count is None and video_details:
1907 view_count = int_or_none(video_details.get('viewCount'))
1908 if view_count is None and microformat:
1909 view_count = int_or_none(microformat.get('viewCount'))
1910
1911 if is_live is None:
1912 is_live = bool_or_none(video_details.get('isLive'))
1913
1914 has_live_chat_replay = False
1915 if not is_live:
1916 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1917 try:
1918 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1919 has_live_chat_replay = True
1920 except (KeyError, IndexError, TypeError):
1921 pass
1922
1923 # Check for "rental" videos
1924 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1925 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1926
1927 def _extract_filesize(media_url):
1928 return int_or_none(self._search_regex(
1929 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1930
1931 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1932 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1933
1934 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1935 self.report_rtmp_download()
1936 formats = [{
1937 'format_id': '_rtmp',
1938 'protocol': 'rtmp',
1939 'url': video_info['conn'][0],
1940 'player_url': player_url,
1941 }]
1942 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1943 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1944 if 'rtmpe%3Dyes' in encoded_url_map:
1945 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1946 formats = []
1947 formats_spec = {}
1948 fmt_list = video_info.get('fmt_list', [''])[0]
1949 if fmt_list:
1950 for fmt in fmt_list.split(','):
1951 spec = fmt.split('/')
1952 if len(spec) > 1:
1953 width_height = spec[1].split('x')
1954 if len(width_height) == 2:
1955 formats_spec[spec[0]] = {
1956 'resolution': spec[1],
1957 'width': int_or_none(width_height[0]),
1958 'height': int_or_none(width_height[1]),
1959 }
1960 for fmt in streaming_formats:
1961 itag = str_or_none(fmt.get('itag'))
1962 if not itag:
1963 continue
1964 quality = fmt.get('quality')
1965 quality_label = fmt.get('qualityLabel') or quality
1966 formats_spec[itag] = {
1967 'asr': int_or_none(fmt.get('audioSampleRate')),
1968 'filesize': int_or_none(fmt.get('contentLength')),
1969 'format_note': quality_label,
1970 'fps': int_or_none(fmt.get('fps')),
1971 'height': int_or_none(fmt.get('height')),
1972 # bitrate for itag 43 is always 2147483647
1973 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1974 'width': int_or_none(fmt.get('width')),
1975 }
1976
1977 for fmt in streaming_formats:
1978 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1979 continue
1980 url = url_or_none(fmt.get('url'))
1981
1982 if not url:
1983 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1984 if not cipher:
1985 continue
1986 url_data = compat_parse_qs(cipher)
1987 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1988 if not url:
1989 continue
1990 else:
1991 cipher = None
1992 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1993
1994 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1995 # Unsupported FORMAT_STREAM_TYPE_OTF
1996 if stream_type == 3:
1997 continue
1998
1999 format_id = fmt.get('itag') or url_data['itag'][0]
2000 if not format_id:
2001 continue
2002 format_id = compat_str(format_id)
2003
2004 if cipher:
2005 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2006 ASSETS_RE = (
2007 r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2008 r'"jsUrl"\s*:\s*("[^"]+")',
2009 r'"assets":.+?"js":\s*("[^"]+")')
2010 jsplayer_url_json = self._search_regex(
2011 ASSETS_RE,
2012 embed_webpage if age_gate else video_webpage,
2013 'JS player URL (1)', default=None)
2014 if not jsplayer_url_json and not age_gate:
2015 # We need the embed website after all
2016 if embed_webpage is None:
2017 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2018 embed_webpage = self._download_webpage(
2019 embed_url, video_id, 'Downloading embed webpage')
2020 jsplayer_url_json = self._search_regex(
2021 ASSETS_RE, embed_webpage, 'JS player URL')
2022
2023 player_url = json.loads(jsplayer_url_json)
2024 if player_url is None:
2025 player_url_json = self._search_regex(
2026 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2027 video_webpage, 'age gate player URL')
2028 player_url = json.loads(player_url_json)
2029
2030 if 'sig' in url_data:
2031 url += '&signature=' + url_data['sig'][0]
2032 elif 's' in url_data:
2033 encrypted_sig = url_data['s'][0]
2034
2035 if self._downloader.params.get('verbose'):
2036 if player_url is None:
2037 player_desc = 'unknown'
2038 else:
2039 player_type, player_version = self._extract_player_info(player_url)
2040 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2041 parts_sizes = self._signature_cache_id(encrypted_sig)
2042 self.to_screen('{%s} signature length %s, %s' %
2043 (format_id, parts_sizes, player_desc))
2044
2045 signature = self._decrypt_signature(
2046 encrypted_sig, video_id, player_url, age_gate)
2047 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2048 url += '&%s=%s' % (sp, signature)
2049 if 'ratebypass' not in url:
2050 url += '&ratebypass=yes'
2051
2052 dct = {
2053 'format_id': format_id,
2054 'url': url,
2055 'player_url': player_url,
2056 }
2057 if format_id in self._formats:
2058 dct.update(self._formats[format_id])
2059 if format_id in formats_spec:
2060 dct.update(formats_spec[format_id])
2061
2062 # Some itags are not included in DASH manifest thus corresponding formats will
2063 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2064 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2065 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2066 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2067
2068 if width is None:
2069 width = int_or_none(fmt.get('width'))
2070 if height is None:
2071 height = int_or_none(fmt.get('height'))
2072
2073 filesize = int_or_none(url_data.get(
2074 'clen', [None])[0]) or _extract_filesize(url)
2075
2076 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2077 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2078
2079 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2080 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2081 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2082
2083 more_fields = {
2084 'filesize': filesize,
2085 'tbr': tbr,
2086 'width': width,
2087 'height': height,
2088 'fps': fps,
2089 'format_note': quality_label or quality,
2090 }
2091 for key, value in more_fields.items():
2092 if value:
2093 dct[key] = value
2094 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2095 if type_:
2096 type_split = type_.split(';')
2097 kind_ext = type_split[0].split('/')
2098 if len(kind_ext) == 2:
2099 kind, _ = kind_ext
2100 dct['ext'] = mimetype2ext(type_split[0])
2101 if kind in ('audio', 'video'):
2102 codecs = None
2103 for mobj in re.finditer(
2104 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2105 if mobj.group('key') == 'codecs':
2106 codecs = mobj.group('val')
2107 break
2108 if codecs:
2109 dct.update(parse_codecs(codecs))
2110 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2111 dct['downloader_options'] = {
2112 # Youtube throttles chunks >~10M
2113 'http_chunk_size': 10485760,
2114 }
2115 formats.append(dct)
2116 else:
2117 manifest_url = (
2118 url_or_none(try_get(
2119 player_response,
2120 lambda x: x['streamingData']['hlsManifestUrl'],
2121 compat_str))
2122 or url_or_none(try_get(
2123 video_info, lambda x: x['hlsvp'][0], compat_str)))
2124 if manifest_url:
2125 formats = []
2126 m3u8_formats = self._extract_m3u8_formats(
2127 manifest_url, video_id, 'mp4', fatal=False)
2128 for a_format in m3u8_formats:
2129 itag = self._search_regex(
2130 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2131 if itag:
2132 a_format['format_id'] = itag
2133 if itag in self._formats:
2134 dct = self._formats[itag].copy()
2135 dct.update(a_format)
2136 a_format = dct
2137 a_format['player_url'] = player_url
2138 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2139 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2140 if self._downloader.params.get('youtube_include_hls_manifest', True):
2141 formats.append(a_format)
2142 else:
2143 error_message = extract_unavailable_message()
2144 if not error_message:
2145 reason_list = try_get(
2146 player_response,
2147 lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2148 list) or []
2149 for reason in reason_list:
2150 if not isinstance(reason, dict):
2151 continue
2152 reason_text = try_get(reason, lambda x: x['text'], compat_str)
2153 if reason_text:
2154 if not error_message:
2155 error_message = ''
2156 error_message += reason_text
2157 if error_message:
2158 error_message = clean_html(error_message)
2159 if not error_message:
2160 error_message = clean_html(try_get(
2161 player_response, lambda x: x['playabilityStatus']['reason'],
2162 compat_str))
2163 if not error_message:
2164 error_message = clean_html(
2165 try_get(video_info, lambda x: x['reason'][0], compat_str))
2166 if error_message:
2167 raise ExtractorError(error_message, expected=True)
2168 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2169
2170 # uploader
2171 video_uploader = try_get(
2172 video_info, lambda x: x['author'][0],
2173 compat_str) or str_or_none(video_details.get('author'))
2174 if video_uploader:
2175 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2176 else:
2177 self._downloader.report_warning('unable to extract uploader name')
2178
2179 # uploader_id
2180 video_uploader_id = None
2181 video_uploader_url = None
2182 mobj = re.search(
2183 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2184 video_webpage)
2185 if mobj is not None:
2186 video_uploader_id = mobj.group('uploader_id')
2187 video_uploader_url = mobj.group('uploader_url')
2188 else:
2189 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2190 if owner_profile_url:
2191 video_uploader_id = self._search_regex(
2192 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2193 default=None)
2194 video_uploader_url = owner_profile_url
2195
2196 channel_id = (
2197 str_or_none(video_details.get('channelId'))
2198 or self._html_search_meta(
2199 'channelId', video_webpage, 'channel id', default=None)
2200 or self._search_regex(
2201 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2202 video_webpage, 'channel id', default=None, group='id'))
2203 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2204
2205 thumbnails = []
2206 thumbnails_list = try_get(
2207 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2208 for t in thumbnails_list:
2209 if not isinstance(t, dict):
2210 continue
2211 thumbnail_url = url_or_none(t.get('url'))
2212 if not thumbnail_url:
2213 continue
2214 thumbnails.append({
2215 'url': thumbnail_url,
2216 'width': int_or_none(t.get('width')),
2217 'height': int_or_none(t.get('height')),
2218 })
2219
2220 if not thumbnails:
2221 video_thumbnail = None
2222 # We try first to get a high quality image:
2223 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2224 video_webpage, re.DOTALL)
2225 if m_thumb is not None:
2226 video_thumbnail = m_thumb.group(1)
2227 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2228 if thumbnail_url:
2229 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2230 if video_thumbnail:
2231 thumbnails.append({'url': video_thumbnail})
2232
2233 # upload date
2234 upload_date = self._html_search_meta(
2235 'datePublished', video_webpage, 'upload date', default=None)
2236 if not upload_date:
2237 upload_date = self._search_regex(
2238 [r'(?s)id="eow-date.*?>(.*?)</span>',
2239 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2240 video_webpage, 'upload date', default=None)
2241 if not upload_date:
2242 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2243 upload_date = unified_strdate(upload_date)
2244
2245 video_license = self._html_search_regex(
2246 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2247 video_webpage, 'license', default=None)
2248
2249 m_music = re.search(
2250 r'''(?x)
2251 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2252 <ul[^>]*>\s*
2253 <li>(?P<title>.+?)
2254 by (?P<creator>.+?)
2255 (?:
2256 \(.+?\)|
2257 <a[^>]*
2258 (?:
2259 \bhref=["\']/red[^>]*>| # drop possible
2260 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2261 )
2262 .*?
2263 )?</li
2264 ''',
2265 video_webpage)
2266 if m_music:
2267 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2268 video_creator = clean_html(m_music.group('creator'))
2269 else:
2270 video_alt_title = video_creator = None
2271
2272 def extract_meta(field):
2273 return self._html_search_regex(
2274 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2275 video_webpage, field, default=None)
2276
2277 track = extract_meta('Song')
2278 artist = extract_meta('Artist')
2279 album = extract_meta('Album')
2280
2281 # Youtube Music Auto-generated description
2282 release_date = release_year = None
2283 if video_description:
2284 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2285 if mobj:
2286 if not track:
2287 track = mobj.group('track').strip()
2288 if not artist:
2289 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2290 if not album:
2291 album = mobj.group('album'.strip())
2292 release_year = mobj.group('release_year')
2293 release_date = mobj.group('release_date')
2294 if release_date:
2295 release_date = release_date.replace('-', '')
2296 if not release_year:
2297 release_year = int(release_date[:4])
2298 if release_year:
2299 release_year = int(release_year)
2300
2301 yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2302 contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2303 for content in contents:
2304 rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2305 multiple_songs = False
2306 for row in rows:
2307 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2308 multiple_songs = True
2309 break
2310 for row in rows:
2311 mrr = row.get('metadataRowRenderer') or {}
2312 mrr_title = try_get(
2313 mrr, lambda x: x['title']['simpleText'], compat_str)
2314 mrr_contents = try_get(
2315 mrr, lambda x: x['contents'][0], dict) or {}
2316 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2317 if not (mrr_title and mrr_contents_text):
2318 continue
2319 if mrr_title == 'License':
2320 video_license = mrr_contents_text
2321 elif not multiple_songs:
2322 if mrr_title == 'Album':
2323 album = mrr_contents_text
2324 elif mrr_title == 'Artist':
2325 artist = mrr_contents_text
2326 elif mrr_title == 'Song':
2327 track = mrr_contents_text
2328
2329 m_episode = re.search(
2330 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2331 video_webpage)
2332 if m_episode:
2333 series = unescapeHTML(m_episode.group('series'))
2334 season_number = int(m_episode.group('season'))
2335 episode_number = int(m_episode.group('episode'))
2336 else:
2337 series = season_number = episode_number = None
2338
2339 m_cat_container = self._search_regex(
2340 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2341 video_webpage, 'categories', default=None)
2342 category = None
2343 if m_cat_container:
2344 category = self._html_search_regex(
2345 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2346 default=None)
2347 if not category:
2348 category = try_get(
2349 microformat, lambda x: x['category'], compat_str)
2350 video_categories = None if category is None else [category]
2351
2352 video_tags = [
2353 unescapeHTML(m.group('content'))
2354 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2355 if not video_tags:
2356 video_tags = try_get(video_details, lambda x: x['keywords'], list)
2357
2358 def _extract_count(count_name):
2359 return str_to_int(self._search_regex(
2360 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2361 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2362 video_webpage, count_name, default=None))
2363
2364 like_count = _extract_count('like')
2365 dislike_count = _extract_count('dislike')
2366
2367 if view_count is None:
2368 view_count = str_to_int(self._search_regex(
2369 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2370 'view count', default=None))
2371
2372 average_rating = (
2373 float_or_none(video_details.get('averageRating'))
2374 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2375
2376 # subtitles
2377 video_subtitles = self.extract_subtitles(
2378 video_id, video_webpage, has_live_chat_replay)
2379 automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2380
2381 video_duration = try_get(
2382 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2383 if not video_duration:
2384 video_duration = int_or_none(video_details.get('lengthSeconds'))
2385 if not video_duration:
2386 video_duration = parse_duration(self._html_search_meta(
2387 'duration', video_webpage, 'video duration'))
2388
2389 # Get Subscriber Count of channel
2390 subscriber_count = parse_count(self._search_regex(
2391 r'"text":"([\d\.]+\w?) subscribers"',
2392 video_webpage,
2393 'subscriber count',
2394 default=None
2395 ))
2396
2397 # annotations
2398 video_annotations = None
2399 if self._downloader.params.get('writeannotations', False):
2400 xsrf_token = None
2401 ytcfg = self._extract_ytcfg(video_id, video_webpage)
2402 if ytcfg:
2403 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2404 if not xsrf_token:
2405 xsrf_token = self._search_regex(
2406 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2407 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2408 invideo_url = try_get(
2409 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2410 if xsrf_token and invideo_url:
2411 xsrf_field_name = None
2412 if ytcfg:
2413 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2414 if not xsrf_field_name:
2415 xsrf_field_name = self._search_regex(
2416 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2417 video_webpage, 'xsrf field name',
2418 group='xsrf_field_name', default='session_token')
2419 video_annotations = self._download_webpage(
2420 self._proto_relative_url(invideo_url),
2421 video_id, note='Downloading annotations',
2422 errnote='Unable to download video annotations', fatal=False,
2423 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2424
2425 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2426
2427 # Look for the DASH manifest
2428 if self._downloader.params.get('youtube_include_dash_manifest', True):
2429 dash_mpd_fatal = True
2430 for mpd_url in dash_mpds:
2431 dash_formats = {}
2432 try:
2433 def decrypt_sig(mobj):
2434 s = mobj.group(1)
2435 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2436 return '/signature/%s' % dec_s
2437
2438 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2439
2440 for df in self._extract_mpd_formats(
2441 mpd_url, video_id, fatal=dash_mpd_fatal,
2442 formats_dict=self._formats):
2443 if not df.get('filesize'):
2444 df['filesize'] = _extract_filesize(df['url'])
2445 # Do not overwrite DASH format found in some previous DASH manifest
2446 if df['format_id'] not in dash_formats:
2447 dash_formats[df['format_id']] = df
2448 # Additional DASH manifests may end up in HTTP Error 403 therefore
2449 # allow them to fail without bug report message if we already have
2450 # some DASH manifest succeeded. This is temporary workaround to reduce
2451 # burst of bug reports until we figure out the reason and whether it
2452 # can be fixed at all.
2453 dash_mpd_fatal = False
2454 except (ExtractorError, KeyError) as e:
2455 self.report_warning(
2456 'Skipping DASH manifest: %r' % e, video_id)
2457 if dash_formats:
2458 # Remove the formats we found through non-DASH, they
2459 # contain less info and it can be wrong, because we use
2460 # fixed values (for example the resolution). See
2461 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2462 # example.
2463 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2464 formats.extend(dash_formats.values())
2465
2466 # Check for malformed aspect ratio
2467 stretched_m = re.search(
2468 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2469 video_webpage)
2470 if stretched_m:
2471 w = float(stretched_m.group('w'))
2472 h = float(stretched_m.group('h'))
2473 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2474 # We will only process correct ratios.
2475 if w > 0 and h > 0:
2476 ratio = w / h
2477 for f in formats:
2478 if f.get('vcodec') != 'none':
2479 f['stretched_ratio'] = ratio
2480
2481 if not formats:
2482 if 'reason' in video_info:
2483 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2484 regions_allowed = self._html_search_meta(
2485 'regionsAllowed', video_webpage, default=None)
2486 countries = regions_allowed.split(',') if regions_allowed else None
2487 self.raise_geo_restricted(
2488 msg=video_info['reason'][0], countries=countries)
2489 reason = video_info['reason'][0]
2490 if 'Invalid parameters' in reason:
2491 unavailable_message = extract_unavailable_message()
2492 if unavailable_message:
2493 reason = unavailable_message
2494 raise ExtractorError(
2495 'YouTube said: %s' % reason,
2496 expected=True, video_id=video_id)
2497 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2498 raise ExtractorError('This video is DRM protected.', expected=True)
2499
2500 self._sort_formats(formats)
2501
2502 self.mark_watched(video_id, video_info, player_response)
2503
2504 return {
2505 'id': video_id,
2506 'uploader': video_uploader,
2507 'uploader_id': video_uploader_id,
2508 'uploader_url': video_uploader_url,
2509 'channel_id': channel_id,
2510 'channel_url': channel_url,
2511 'upload_date': upload_date,
2512 'license': video_license,
2513 'creator': video_creator or artist,
2514 'title': video_title,
2515 'alt_title': video_alt_title or track,
2516 'thumbnails': thumbnails,
2517 'description': video_description,
2518 'categories': video_categories,
2519 'tags': video_tags,
2520 'subtitles': video_subtitles,
2521 'automatic_captions': automatic_captions,
2522 'duration': video_duration,
2523 'age_limit': 18 if age_gate else 0,
2524 'annotations': video_annotations,
2525 'chapters': chapters,
2526 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2527 'view_count': view_count,
2528 'like_count': like_count,
2529 'dislike_count': dislike_count,
2530 'average_rating': average_rating,
2531 'formats': formats,
2532 'is_live': is_live,
2533 'start_time': start_time,
2534 'end_time': end_time,
2535 'series': series,
2536 'season_number': season_number,
2537 'episode_number': episode_number,
2538 'track': track,
2539 'artist': artist,
2540 'album': album,
2541 'release_date': release_date,
2542 'release_year': release_year,
2543 'subscriber_count': subscriber_count,
2544 'playable_in_embed': playable_in_embed,
2545 }
2546
2547
2548class YoutubeTabIE(YoutubeBaseInfoExtractor):
2549 IE_DESC = 'YouTube.com tab'
2550 _VALID_URL = r'''(?x)
2551 https?://
2552 (?:\w+\.)?
2553 (?:
2554 youtube(?:kids)?\.com|
2555 invidio\.us
2556 )/
2557 (?:
2558 (?:channel|c|user)/|
2559 (?P<not_channel>
2560 feed/|
2561 (?:playlist|watch)\?.*?\blist=
2562 )|
2563 (?!(?:%s)\b) # Direct URLs
2564 )
2565 (?P<id>[^/?\#&]+)
2566 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2567 IE_NAME = 'youtube:tab'
2568
2569 _TESTS = [{
2570 # playlists, multipage
2571 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2572 'playlist_mincount': 94,
2573 'info_dict': {
2574 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2575 'title': 'Игорь Клейнер - Playlists',
2576 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2577 },
2578 }, {
2579 # playlists, multipage, different order
2580 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2581 'playlist_mincount': 94,
2582 'info_dict': {
2583 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2584 'title': 'Игорь Клейнер - Playlists',
2585 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2586 },
2587 }, {
2588 # playlists, singlepage
2589 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2590 'playlist_mincount': 4,
2591 'info_dict': {
2592 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2593 'title': 'ThirstForScience - Playlists',
2594 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2595 }
2596 }, {
2597 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2598 'only_matching': True,
2599 }, {
2600 # basic, single video playlist
2601 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2602 'info_dict': {
2603 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2604 'uploader': 'Sergey M.',
2605 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2606 'title': 'youtube-dl public playlist',
2607 },
2608 'playlist_count': 1,
2609 }, {
2610 # empty playlist
2611 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2612 'info_dict': {
2613 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2614 'uploader': 'Sergey M.',
2615 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2616 'title': 'youtube-dl empty playlist',
2617 },
2618 'playlist_count': 0,
2619 }, {
2620 # Home tab
2621 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2622 'info_dict': {
2623 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2624 'title': 'lex will - Home',
2625 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2626 },
2627 'playlist_mincount': 2,
2628 }, {
2629 # Videos tab
2630 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2631 'info_dict': {
2632 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2633 'title': 'lex will - Videos',
2634 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2635 },
2636 'playlist_mincount': 975,
2637 }, {
2638 # Videos tab, sorted by popular
2639 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2640 'info_dict': {
2641 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2642 'title': 'lex will - Videos',
2643 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2644 },
2645 'playlist_mincount': 199,
2646 }, {
2647 # Playlists tab
2648 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2649 'info_dict': {
2650 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2651 'title': 'lex will - Playlists',
2652 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2653 },
2654 'playlist_mincount': 17,
2655 }, {
2656 # Community tab
2657 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2658 'info_dict': {
2659 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2660 'title': 'lex will - Community',
2661 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2662 },
2663 'playlist_mincount': 18,
2664 }, {
2665 # Channels tab
2666 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2667 'info_dict': {
2668 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2669 'title': 'lex will - Channels',
2670 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2671 },
2672 'playlist_mincount': 138,
2673 }, {
2674 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2675 'only_matching': True,
2676 }, {
2677 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2678 'only_matching': True,
2679 }, {
2680 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2681 'only_matching': True,
2682 }, {
2683 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2684 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2685 'info_dict': {
2686 'title': '29C3: Not my department',
2687 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2688 'uploader': 'Christiaan008',
2689 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2690 },
2691 'playlist_count': 96,
2692 }, {
2693 'note': 'Large playlist',
2694 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2695 'info_dict': {
2696 'title': 'Uploads from Cauchemar',
2697 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2698 'uploader': 'Cauchemar',
2699 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2700 },
2701 'playlist_mincount': 1123,
2702 }, {
2703 # even larger playlist, 8832 videos
2704 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2705 'only_matching': True,
2706 }, {
2707 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2708 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2709 'info_dict': {
2710 'title': 'Uploads from Interstellar Movie',
2711 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2712 'uploader': 'Interstellar Movie',
2713 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2714 },
2715 'playlist_mincount': 21,
2716 }, {
2717 # https://github.com/ytdl-org/youtube-dl/issues/21844
2718 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2719 'info_dict': {
2720 'title': 'Data Analysis with Dr Mike Pound',
2721 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2722 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2723 'uploader': 'Computerphile',
2724 },
2725 'playlist_mincount': 11,
2726 }, {
2727 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2728 'only_matching': True,
2729 }, {
2730 # Playlist URL that does not actually serve a playlist
2731 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2732 'info_dict': {
2733 'id': 'FqZTN594JQw',
2734 'ext': 'webm',
2735 'title': "Smiley's People 01 detective, Adventure Series, Action",
2736 'uploader': 'STREEM',
2737 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2738 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2739 'upload_date': '20150526',
2740 'license': 'Standard YouTube License',
2741 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2742 'categories': ['People & Blogs'],
2743 'tags': list,
2744 'view_count': int,
2745 'like_count': int,
2746 'dislike_count': int,
2747 },
2748 'params': {
2749 'skip_download': True,
2750 },
2751 'skip': 'This video is not available.',
2752 'add_ie': [YoutubeIE.ie_key()],
2753 }, {
2754 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2755 'only_matching': True,
2756 }, {
2757 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2758 'only_matching': True,
2759 }, {
2760 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2761 'info_dict': {
2762 'id': '9Auq9mYxFEE',
2763 'ext': 'mp4',
2764 'title': 'Watch Sky News live',
2765 'uploader': 'Sky News',
2766 'uploader_id': 'skynews',
2767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2768 'upload_date': '20191102',
2769 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2770 'categories': ['News & Politics'],
2771 'tags': list,
2772 'like_count': int,
2773 'dislike_count': int,
2774 },
2775 'params': {
2776 'skip_download': True,
2777 },
2778 }, {
2779 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2780 'info_dict': {
2781 'id': 'a48o2S1cPoo',
2782 'ext': 'mp4',
2783 'title': 'The Young Turks - Live Main Show',
2784 'uploader': 'The Young Turks',
2785 'uploader_id': 'TheYoungTurks',
2786 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2787 'upload_date': '20150715',
2788 'license': 'Standard YouTube License',
2789 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2790 'categories': ['News & Politics'],
2791 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2792 'like_count': int,
2793 'dislike_count': int,
2794 },
2795 'params': {
2796 'skip_download': True,
2797 },
2798 'only_matching': True,
2799 }, {
2800 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2801 'only_matching': True,
2802 }, {
2803 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2804 'only_matching': True,
2805 }, {
2806 'url': 'https://www.youtube.com/feed/trending',
2807 'only_matching': True,
2808 }, {
2809 # needs auth
2810 'url': 'https://www.youtube.com/feed/library',
2811 'only_matching': True,
2812 }, {
2813 # needs auth
2814 'url': 'https://www.youtube.com/feed/history',
2815 'only_matching': True,
2816 }, {
2817 # needs auth
2818 'url': 'https://www.youtube.com/feed/subscriptions',
2819 'only_matching': True,
2820 }, {
2821 # needs auth
2822 'url': 'https://www.youtube.com/feed/watch_later',
2823 'only_matching': True,
2824 }, {
2825 # no longer available?
2826 'url': 'https://www.youtube.com/feed/recommended',
2827 'only_matching': True,
2828 }, {
2829 # inline playlist with not always working continuations
2830 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2831 'only_matching': True,
2832 }, {
2833 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2834 'only_matching': True,
2835 }, {
2836 'url': 'https://www.youtube.com/course',
2837 'only_matching': True,
2838 }, {
2839 'url': 'https://www.youtube.com/zsecurity',
2840 'only_matching': True,
2841 }, {
2842 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2843 'only_matching': True,
2844 }, {
2845 'url': 'https://www.youtube.com/TheYoungTurks/live',
2846 'only_matching': True,
2847 }]
2848
2849 @classmethod
2850 def suitable(cls, url):
2851 return False if YoutubeIE.suitable(url) else super(
2852 YoutubeTabIE, cls).suitable(url)
2853
2854 def _extract_channel_id(self, webpage):
2855 channel_id = self._html_search_meta(
2856 'channelId', webpage, 'channel id', default=None)
2857 if channel_id:
2858 return channel_id
2859 channel_url = self._html_search_meta(
2860 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2861 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2862 'twitter:app:url:googleplay'), webpage, 'channel url')
2863 return self._search_regex(
2864 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2865 channel_url, 'channel id')
2866
2867 @staticmethod
2868 def _extract_grid_item_renderer(item):
2869 for item_kind in ('Playlist', 'Video', 'Channel'):
2870 renderer = item.get('grid%sRenderer' % item_kind)
2871 if renderer:
2872 return renderer
2873
2874 def _extract_video(self, renderer):
2875 video_id = renderer.get('videoId')
2876 title = try_get(
2877 renderer,
2878 (lambda x: x['title']['runs'][0]['text'],
2879 lambda x: x['title']['simpleText']), compat_str)
2880 description = try_get(
2881 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2882 compat_str)
2883 duration = parse_duration(try_get(
2884 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2885 view_count_text = try_get(
2886 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2887 view_count = str_to_int(self._search_regex(
2888 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2889 'view count', default=None))
2890 uploader = try_get(
2891 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2892 return {
2893 '_type': 'url_transparent',
2894 'ie_key': YoutubeIE.ie_key(),
2895 'id': video_id,
2896 'url': video_id,
2897 'title': title,
2898 'description': description,
2899 'duration': duration,
2900 'view_count': view_count,
2901 'uploader': uploader,
2902 }
2903
2904 def _grid_entries(self, grid_renderer):
2905 for item in grid_renderer['items']:
2906 if not isinstance(item, dict):
2907 continue
2908 renderer = self._extract_grid_item_renderer(item)
2909 if not isinstance(renderer, dict):
2910 continue
2911 title = try_get(
2912 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2913 # playlist
2914 playlist_id = renderer.get('playlistId')
2915 if playlist_id:
2916 yield self.url_result(
2917 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2918 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2919 video_title=title)
2920 # video
2921 video_id = renderer.get('videoId')
2922 if video_id:
2923 yield self._extract_video(renderer)
2924 # channel
2925 channel_id = renderer.get('channelId')
2926 if channel_id:
2927 title = try_get(
2928 renderer, lambda x: x['title']['simpleText'], compat_str)
2929 yield self.url_result(
2930 'https://www.youtube.com/channel/%s' % channel_id,
2931 ie=YoutubeTabIE.ie_key(), video_title=title)
2932
2933 def _shelf_entries_from_content(self, shelf_renderer):
2934 content = shelf_renderer.get('content')
2935 if not isinstance(content, dict):
2936 return
2937 renderer = content.get('gridRenderer')
2938 if renderer:
2939 # TODO: add support for nested playlists so each shelf is processed
2940 # as separate playlist
2941 # TODO: this includes only first N items
2942 for entry in self._grid_entries(renderer):
2943 yield entry
2944 renderer = content.get('horizontalListRenderer')
2945 if renderer:
2946 # TODO
2947 pass
2948
2949 def _shelf_entries(self, shelf_renderer, skip_channels=False):
2950 ep = try_get(
2951 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2952 compat_str)
2953 shelf_url = urljoin('https://www.youtube.com', ep)
2954 if shelf_url:
2955 # Skipping links to another channels, note that checking for
2956 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2957 # will not work
2958 if skip_channels and '/channels?' in shelf_url:
2959 return
2960 title = try_get(
2961 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2962 yield self.url_result(shelf_url, video_title=title)
2963 # Shelf may not contain shelf URL, fallback to extraction from content
2964 for entry in self._shelf_entries_from_content(shelf_renderer):
2965 yield entry
2966
2967 def _playlist_entries(self, video_list_renderer):
2968 for content in video_list_renderer['contents']:
2969 if not isinstance(content, dict):
2970 continue
2971 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2972 if not isinstance(renderer, dict):
2973 continue
2974 video_id = renderer.get('videoId')
2975 if not video_id:
2976 continue
2977 yield self._extract_video(renderer)
2978
2979 r""" # Not needed in the new implementation
2980 def _itemSection_entries(self, item_sect_renderer):
2981 for content in item_sect_renderer['contents']:
2982 if not isinstance(content, dict):
2983 continue
2984 renderer = content.get('videoRenderer', {})
2985 if not isinstance(renderer, dict):
2986 continue
2987 video_id = renderer.get('videoId')
2988 if not video_id:
2989 continue
2990 yield self._extract_video(renderer)
2991 """
2992
2993 def _rich_entries(self, rich_grid_renderer):
2994 renderer = try_get(
2995 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
2996 video_id = renderer.get('videoId')
2997 if not video_id:
2998 return
2999 yield self._extract_video(renderer)
3000
3001 def _video_entry(self, video_renderer):
3002 video_id = video_renderer.get('videoId')
3003 if video_id:
3004 return self._extract_video(video_renderer)
3005
3006 def _post_thread_entries(self, post_thread_renderer):
3007 post_renderer = try_get(
3008 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3009 if not post_renderer:
3010 return
3011 # video attachment
3012 video_renderer = try_get(
3013 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3014 video_id = None
3015 if video_renderer:
3016 entry = self._video_entry(video_renderer)
3017 if entry:
3018 yield entry
3019 # inline video links
3020 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3021 for run in runs:
3022 if not isinstance(run, dict):
3023 continue
3024 ep_url = try_get(
3025 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3026 if not ep_url:
3027 continue
3028 if not YoutubeIE.suitable(ep_url):
3029 continue
3030 ep_video_id = YoutubeIE._match_id(ep_url)
3031 if video_id == ep_video_id:
3032 continue
3033 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
3034
3035 def _post_thread_continuation_entries(self, post_thread_continuation):
3036 contents = post_thread_continuation.get('contents')
3037 if not isinstance(contents, list):
3038 return
3039 for content in contents:
3040 renderer = content.get('backstagePostThreadRenderer')
3041 if not isinstance(renderer, dict):
3042 continue
3043 for entry in self._post_thread_entries(renderer):
3044 yield entry
3045
3046 @staticmethod
3047 def _build_continuation_query(continuation, ctp=None):
3048 query = {
3049 'ctoken': continuation,
3050 'continuation': continuation,
3051 }
3052 if ctp:
3053 query['itct'] = ctp
3054 return query
3055
3056 @staticmethod
3057 def _extract_next_continuation_data(renderer):
3058 next_continuation = try_get(
3059 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3060 if not next_continuation:
3061 return
3062 continuation = next_continuation.get('continuation')
3063 if not continuation:
3064 return
3065 ctp = next_continuation.get('clickTrackingParams')
3066 return YoutubeTabIE._build_continuation_query(continuation, ctp)
3067
3068 @classmethod
3069 def _extract_continuation(cls, renderer):
3070 next_continuation = cls._extract_next_continuation_data(renderer)
3071 if next_continuation:
3072 return next_continuation
3073 contents = renderer.get('contents')
3074 if not isinstance(contents, list):
3075 return
3076 for content in contents:
3077 if not isinstance(content, dict):
3078 continue
3079 continuation_ep = try_get(
3080 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3081 dict)
3082 if not continuation_ep:
3083 continue
3084 continuation = try_get(
3085 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3086 if not continuation:
3087 continue
3088 ctp = continuation_ep.get('clickTrackingParams')
3089 return YoutubeTabIE._build_continuation_query(continuation, ctp)
3090
3091 def _entries(self, tab, identity_token):
3092
3093 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3094 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3095 for content in contents:
3096 if not isinstance(content, dict):
3097 continue
3098 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3099 if not is_renderer:
3100 renderer = content.get('richItemRenderer')
3101 if renderer:
3102 for entry in self._rich_entries(renderer):
3103 yield entry
3104 continuation_list[0] = self._extract_continuation(parent_renderer)
3105 continue
3106 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3107 for isr_content in isr_contents:
3108 if not isinstance(isr_content, dict):
3109 continue
3110 renderer = isr_content.get('playlistVideoListRenderer')
3111 if renderer:
3112 for entry in self._playlist_entries(renderer):
3113 yield entry
3114 continuation_list[0] = self._extract_continuation(renderer)
3115 continue
3116 renderer = isr_content.get('gridRenderer')
3117 if renderer:
3118 for entry in self._grid_entries(renderer):
3119 yield entry
3120 continuation_list[0] = self._extract_continuation(renderer)
3121 continue
3122 renderer = isr_content.get('shelfRenderer')
3123 if renderer:
3124 is_channels_tab = tab.get('title') == 'Channels'
3125 for entry in self._shelf_entries(renderer, not is_channels_tab):
3126 yield entry
3127 continue
3128 renderer = isr_content.get('backstagePostThreadRenderer')
3129 if renderer:
3130 for entry in self._post_thread_entries(renderer):
3131 yield entry
3132 continuation_list[0] = self._extract_continuation(renderer)
3133 continue
3134 renderer = isr_content.get('videoRenderer')
3135 if renderer:
3136 entry = self._video_entry(renderer)
3137 if entry:
3138 yield entry
3139
3140 if not continuation_list[0]:
3141 continuation_list[0] = self._extract_continuation(is_renderer)
3142
3143 if not continuation_list[0]:
3144 continuation_list[0] = self._extract_continuation(parent_renderer)
3145
3146 continuation_list = [None] # Python 2 doesnot support nonlocal
3147 tab_content = try_get(tab, lambda x: x['content'], dict)
3148 if not tab_content:
3149 return
3150 parent_renderer = (
3151 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3152 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3153 for entry in extract_entries(parent_renderer):
3154 yield entry
3155 continuation = continuation_list[0]
3156
3157 headers = {
3158 'x-youtube-client-name': '1',
3159 'x-youtube-client-version': '2.20201112.04.01',
3160 }
3161 if identity_token:
3162 headers['x-youtube-identity-token'] = identity_token
3163
3164 for page_num in itertools.count(1):
3165 if not continuation:
3166 break
3167 count = 0
3168 retries = 3
3169 while count <= retries:
3170 try:
3171 # Downloading page may result in intermittent 5xx HTTP error
3172 # that is usually worked around with a retry
3173 browse = self._download_json(
3174 'https://www.youtube.com/browse_ajax', None,
3175 'Downloading page %d%s'
3176 % (page_num, ' (retry #%d)' % count if count else ''),
3177 headers=headers, query=continuation)
3178 break
3179 except ExtractorError as e:
3180 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3181 count += 1
3182 if count <= retries:
3183 continue
3184 raise
3185 if not browse:
3186 break
3187 response = try_get(browse, lambda x: x[1]['response'], dict)
3188 if not response:
3189 break
3190
3191 continuation_contents = try_get(
3192 response, lambda x: x['continuationContents'], dict)
3193 if continuation_contents:
3194 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3195 if continuation_renderer:
3196 for entry in self._playlist_entries(continuation_renderer):
3197 yield entry
3198 continuation = self._extract_continuation(continuation_renderer)
3199 continue
3200 continuation_renderer = continuation_contents.get('gridContinuation')
3201 if continuation_renderer:
3202 for entry in self._grid_entries(continuation_renderer):
3203 yield entry
3204 continuation = self._extract_continuation(continuation_renderer)
3205 continue
3206 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3207 if continuation_renderer:
3208 for entry in self._post_thread_continuation_entries(continuation_renderer):
3209 yield entry
3210 continuation = self._extract_continuation(continuation_renderer)
3211 continue
3212 continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
3213 if continuation_renderer:
3214 continuation_list = [None]
3215 for entry in extract_entries(continuation_renderer):
3216 yield entry
3217 continuation = continuation_list[0]
3218 continue
3219
3220 continuation_items = try_get(
3221 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3222 if continuation_items:
3223 continuation_item = continuation_items[0]
3224 if not isinstance(continuation_item, dict):
3225 continue
3226 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3227 if renderer:
3228 video_list_renderer = {'contents': continuation_items}
3229 for entry in self._playlist_entries(video_list_renderer):
3230 yield entry
3231 continuation = self._extract_continuation(video_list_renderer)
3232 continue
3233 break
3234
3235 @staticmethod
3236 def _extract_selected_tab(tabs):
3237 for tab in tabs:
3238 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3239 return tab['tabRenderer']
3240 else:
3241 raise ExtractorError('Unable to find selected tab')
3242
3243 @staticmethod
3244 def _extract_uploader(data):
3245 uploader = {}
3246 sidebar_renderer = try_get(
3247 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3248 if sidebar_renderer:
3249 for item in sidebar_renderer:
3250 if not isinstance(item, dict):
3251 continue
3252 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3253 if not isinstance(renderer, dict):
3254 continue
3255 owner = try_get(
3256 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3257 if owner:
3258 uploader['uploader'] = owner.get('text')
3259 uploader['uploader_id'] = try_get(
3260 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3261 uploader['uploader_url'] = urljoin(
3262 'https://www.youtube.com/',
3263 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3264 return uploader
3265
3266 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3267 selected_tab = self._extract_selected_tab(tabs)
3268 renderer = try_get(
3269 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3270 playlist_id = title = description = None
3271 if renderer:
3272 channel_title = renderer.get('title') or item_id
3273 tab_title = selected_tab.get('title')
3274 title = channel_title or item_id
3275 if tab_title:
3276 title += ' - %s' % tab_title
3277 description = renderer.get('description')
3278 playlist_id = renderer.get('externalId')
3279 renderer = try_get(
3280 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3281 if renderer:
3282 title = renderer.get('title')
3283 description = None
3284 playlist_id = item_id
3285 if playlist_id is None:
3286 playlist_id = item_id
3287 if title is None:
3288 title = "Youtube " + playlist_id.title()
3289 playlist = self.playlist_result(
3290 self._entries(selected_tab, identity_token),
3291 playlist_id=playlist_id, playlist_title=title,
3292 playlist_description=description)
3293 playlist.update(self._extract_uploader(data))
3294 return playlist
3295
3296 def _extract_from_playlist(self, item_id, url, data, playlist):
3297 title = playlist.get('title') or try_get(
3298 data, lambda x: x['titleText']['simpleText'], compat_str)
3299 playlist_id = playlist.get('playlistId') or item_id
3300 # Inline playlist rendition continuation does not always work
3301 # at Youtube side, so delegating regular tab-based playlist URL
3302 # processing whenever possible.
3303 playlist_url = urljoin(url, try_get(
3304 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3305 compat_str))
3306 if playlist_url and playlist_url != url:
3307 return self.url_result(
3308 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3309 video_title=title)
3310 return self.playlist_result(
3311 self._playlist_entries(playlist), playlist_id=playlist_id,
3312 playlist_title=title)
3313
3314 @staticmethod
3315 def _extract_alerts(data):
3316 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3317 if not isinstance(alert_dict, dict):
3318 continue
3319 for renderer in alert_dict:
3320 alert = alert_dict[renderer]
3321 alert_type = alert.get('type')
3322 if not alert_type:
3323 continue
3324 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3325 if message:
3326 yield alert_type, message
3327 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3328 message = try_get(run, lambda x: x['text'], compat_str)
3329 if message:
3330 yield alert_type, message
3331
3332 def _extract_identity_token(self, webpage, item_id):
3333 ytcfg = self._extract_ytcfg(item_id, webpage)
3334 if ytcfg:
3335 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3336 if token:
3337 return token
3338 return self._search_regex(
3339 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3340 'identity token', default=None)
3341
3342 def _real_extract(self, url):
3343 item_id = self._match_id(url)
3344 url = compat_urlparse.urlunparse(
3345 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3346 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3347 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3348 self._downloader.report_warning(
3349 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3350 'To download only the videos in the home page, add a "/featured" to the URL')
3351 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3352
3353 # Handle both video/playlist URLs
3354 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3355 video_id = qs.get('v', [None])[0]
3356 playlist_id = qs.get('list', [None])[0]
3357
3358 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3359 if playlist_id:
3360 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3361 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3362 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3363 else:
3364 raise ExtractorError('Unable to recognize tab page')
3365 if video_id and playlist_id:
3366 if self._downloader.params.get('noplaylist'):
3367 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3368 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3369 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3370
3371 webpage = self._download_webpage(url, item_id)
3372 identity_token = self._extract_identity_token(webpage, item_id)
3373 data = self._extract_yt_initial_data(item_id, webpage)
3374 for alert_type, alert_message in self._extract_alerts(data):
3375 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3376 tabs = try_get(
3377 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3378 if tabs:
3379 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3380 playlist = try_get(
3381 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3382 if playlist:
3383 return self._extract_from_playlist(item_id, url, data, playlist)
3384 # Fallback to video extraction if no playlist alike page is recognized.
3385 # First check for the current video then try the v attribute of URL query.
3386 video_id = try_get(
3387 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3388 compat_str) or video_id
3389 if video_id:
3390 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3391 # Failed to recognize
3392 raise ExtractorError('Unable to recognize tab page')
3393
3394
3395class YoutubePlaylistIE(InfoExtractor):
3396 IE_DESC = 'YouTube.com playlists'
3397 _VALID_URL = r'''(?x)(?:
3398 (?:https?://)?
3399 (?:\w+\.)?
3400 (?:
3401 (?:
3402 youtube(?:kids)?\.com|
3403 invidio\.us
3404 )
3405 /.*?\?.*?\blist=
3406 )?
3407 (?P<id>%(playlist_id)s)
3408 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3409 IE_NAME = 'youtube:playlist'
3410 _TESTS = [{
3411 'note': 'issue #673',
3412 'url': 'PLBB231211A4F62143',
3413 'info_dict': {
3414 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3415 'id': 'PLBB231211A4F62143',
3416 'uploader': 'Wickydoo',
3417 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3418 },
3419 'playlist_mincount': 29,
3420 }, {
3421 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3422 'info_dict': {
3423 'title': 'YDL_safe_search',
3424 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3425 },
3426 'playlist_count': 2,
3427 'skip': 'This playlist is private',
3428 }, {
3429 'note': 'embedded',
3430 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3431 'playlist_count': 4,
3432 'info_dict': {
3433 'title': 'JODA15',
3434 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3435 'uploader': 'milan',
3436 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3437 }
3438 }, {
3439 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3440 'playlist_mincount': 982,
3441 'info_dict': {
3442 'title': '2018 Chinese New Singles (11/6 updated)',
3443 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3444 'uploader': 'LBK',
3445 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3446 }
3447 }, {
3448 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3449 'only_matching': True,
3450 }, {
3451 # music album playlist
3452 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3453 'only_matching': True,
3454 }]
3455
3456 @classmethod
3457 def suitable(cls, url):
3458 return False if YoutubeTabIE.suitable(url) else super(
3459 YoutubePlaylistIE, cls).suitable(url)
3460
3461 def _real_extract(self, url):
3462 playlist_id = self._match_id(url)
3463 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3464 if not qs:
3465 qs = {'list': playlist_id}
3466 return self.url_result(
3467 update_url_query('https://www.youtube.com/playlist', qs),
3468 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3469
3470
3471class YoutubeYtBeIE(InfoExtractor):
3472 IE_DESC = 'youtu.be'
3473 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3474 _TESTS = [{
3475 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3476 'info_dict': {
3477 'id': 'yeWKywCrFtk',
3478 'ext': 'mp4',
3479 'title': 'Small Scale Baler and Braiding Rugs',
3480 'uploader': 'Backus-Page House Museum',
3481 'uploader_id': 'backuspagemuseum',
3482 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3483 'upload_date': '20161008',
3484 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3485 'categories': ['Nonprofits & Activism'],
3486 'tags': list,
3487 'like_count': int,
3488 'dislike_count': int,
3489 },
3490 'params': {
3491 'noplaylist': True,
3492 'skip_download': True,
3493 },
3494 }, {
3495 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3496 'only_matching': True,
3497 }]
3498
3499 def _real_extract(self, url):
3500 mobj = re.match(self._VALID_URL, url)
3501 video_id = mobj.group('id')
3502 playlist_id = mobj.group('playlist_id')
3503 return self.url_result(
3504 update_url_query('https://www.youtube.com/watch', {
3505 'v': video_id,
3506 'list': playlist_id,
3507 'feature': 'youtu.be',
3508 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3509
3510
3511class YoutubeYtUserIE(InfoExtractor):
3512 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
3513 _VALID_URL = r'ytuser:(?P<id>.+)'
3514 _TESTS = [{
3515 'url': 'ytuser:phihag',
3516 'only_matching': True,
3517 }]
3518
3519 def _real_extract(self, url):
3520 user_id = self._match_id(url)
3521 return self.url_result(
3522 'https://www.youtube.com/user/%s' % user_id,
3523 ie=YoutubeTabIE.ie_key(), video_id=user_id)
3524
3525
3526class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3527 IE_NAME = 'youtube:favorites'
3528 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3529 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3530 _LOGIN_REQUIRED = True
3531 _TESTS = [{
3532 'url': ':ytfav',
3533 'only_matching': True,
3534 }, {
3535 'url': ':ytfavorites',
3536 'only_matching': True,
3537 }]
3538
3539 def _real_extract(self, url):
3540 return self.url_result(
3541 'https://www.youtube.com/playlist?list=LL',
3542 ie=YoutubeTabIE.ie_key())
3543
3544
3545class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3546 IE_DESC = 'YouTube.com searches'
3547 # there doesn't appear to be a real limit, for example if you search for
3548 # 'python' you get more than 8.000.000 results
3549 _MAX_RESULTS = float('inf')
3550 IE_NAME = 'youtube:search'
3551 _SEARCH_KEY = 'ytsearch'
3552 _SEARCH_PARAMS = None
3553 _TESTS = []
3554
3555 def _entries(self, query, n):
3556 data = {
3557 'context': {
3558 'client': {
3559 'clientName': 'WEB',
3560 'clientVersion': '2.20201021.03.00',
3561 }
3562 },
3563 'query': query,
3564 }
3565 if self._SEARCH_PARAMS:
3566 data['params'] = self._SEARCH_PARAMS
3567 total = 0
3568 for page_num in itertools.count(1):
3569 search = self._download_json(
3570 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3571 video_id='query "%s"' % query,
3572 note='Downloading page %s' % page_num,
3573 errnote='Unable to download API page', fatal=False,
3574 data=json.dumps(data).encode('utf8'),
3575 headers={'content-type': 'application/json'})
3576 if not search:
3577 break
3578 slr_contents = try_get(
3579 search,
3580 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3581 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3582 list)
3583 if not slr_contents:
3584 break
3585
3586 isr_contents = []
3587 continuation_token = None
3588 # Youtube sometimes adds promoted content to searches,
3589 # changing the index location of videos and token.
3590 # So we search through all entries till we find them.
3591 for index, isr in enumerate(slr_contents):
3592 if not isr_contents:
3593 isr_contents = try_get(
3594 slr_contents,
3595 (lambda x: x[index]['itemSectionRenderer']['contents']),
3596 list)
3597 for content in isr_contents:
3598 if content.get('videoRenderer') is not None:
3599 break
3600 else:
3601 isr_contents = []
3602
3603 if continuation_token is None:
3604 continuation_token = try_get(
3605 slr_contents,
3606 lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
3607 'token'],
3608 compat_str)
3609 if continuation_token is not None and isr_contents:
3610 break
3611
3612 if not isr_contents:
3613 break
3614 for content in isr_contents:
3615 if not isinstance(content, dict):
3616 continue
3617 video = content.get('videoRenderer')
3618 if not isinstance(video, dict):
3619 continue
3620 video_id = video.get('videoId')
3621 if not video_id:
3622 continue
3623 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3624 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3625 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3626 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3627 view_count = str_to_int(self._search_regex(
3628 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
3629 'view count', default=None))
3630 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3631 total += 1
3632 yield {
3633 '_type': 'url_transparent',
3634 'ie_key': YoutubeIE.ie_key(),
3635 'id': video_id,
3636 'url': video_id,
3637 'title': title,
3638 'description': description,
3639 'duration': duration,
3640 'view_count': view_count,
3641 'uploader': uploader,
3642 }
3643 if total == n:
3644 return
3645 if not continuation_token:
3646 break
3647 data['continuation'] = continuation_token
3648
3649 def _get_n_results(self, query, n):
3650 """Get a specified number of results for a query"""
3651 return self.playlist_result(self._entries(query, n), query)
3652
3653
3654class YoutubeSearchDateIE(YoutubeSearchIE):
3655 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3656 _SEARCH_KEY = 'ytsearchdate'
3657 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
3658 _SEARCH_PARAMS = 'CAI%3D'
3659
3660
3661class YoutubeSearchURLIE(YoutubeSearchIE):
3662 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
3663 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3664 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3665 # _MAX_RESULTS = 100
3666 _TESTS = [{
3667 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3668 'playlist_mincount': 5,
3669 'info_dict': {
3670 'title': 'youtube-dl test video',
3671 }
3672 }, {
3673 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3674 'only_matching': True,
3675 }]
3676
3677 @classmethod
3678 def _make_valid_url(cls):
3679 return cls._VALID_URL
3680
3681 def _real_extract(self, url):
3682 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3683 query = (qs.get('search_query') or qs.get('q'))[0]
3684 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3685 return self._get_n_results(query, self._MAX_RESULTS)
3686
3687
3688class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3689 """
3690 Base class for feed extractors
3691 Subclasses must define the _FEED_NAME property.
3692 """
3693 _LOGIN_REQUIRED = True
3694 # _MAX_PAGES = 5
3695 _TESTS = []
3696
3697 @property
3698 def IE_NAME(self):
3699 return 'youtube:%s' % self._FEED_NAME
3700
3701 def _real_initialize(self):
3702 self._login()
3703
3704 def _real_extract(self, url):
3705 return self.url_result(
3706 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3707 ie=YoutubeTabIE.ie_key())
3708
3709
3710class YoutubeWatchLaterIE(InfoExtractor):
3711 IE_NAME = 'youtube:watchlater'
3712 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3713 _VALID_URL = r':ytwatchlater'
3714 _TESTS = [{
3715 'url': ':ytwatchlater',
3716 'only_matching': True,
3717 }]
3718
3719 def _real_extract(self, url):
3720 return self.url_result(
3721 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3722
3723
3724class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3725 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3726 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3727 _FEED_NAME = 'recommended'
3728 _TESTS = [{
3729 'url': ':ytrec',
3730 'only_matching': True,
3731 }, {
3732 'url': ':ytrecommended',
3733 'only_matching': True,
3734 }, {
3735 'url': 'https://youtube.com',
3736 'only_matching': True,
3737 }]
3738
3739
3740class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3741 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3742 _VALID_URL = r':ytsub(?:scription)?s?'
3743 _FEED_NAME = 'subscriptions'
3744 _TESTS = [{
3745 'url': ':ytsubs',
3746 'only_matching': True,
3747 }, {
3748 'url': ':ytsubscriptions',
3749 'only_matching': True,
3750 }]
3751
3752
3753class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3754 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3755 _VALID_URL = r':ythistory'
3756 _FEED_NAME = 'history'
3757 _TESTS = [{
3758 'url': ':ythistory',
3759 'only_matching': True,
3760 }]
3761
3762
3763class YoutubeTruncatedURLIE(InfoExtractor):
3764 IE_NAME = 'youtube:truncated_url'
3765 IE_DESC = False # Do not list
3766 _VALID_URL = r'''(?x)
3767 (?:https?://)?
3768 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3769 (?:watch\?(?:
3770 feature=[a-z_]+|
3771 annotation_id=annotation_[^&]+|
3772 x-yt-cl=[0-9]+|
3773 hl=[^&]*|
3774 t=[0-9]+
3775 )?
3776 |
3777 attribution_link\?a=[^&]+
3778 )
3779 $
3780 '''
3781
3782 _TESTS = [{
3783 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3784 'only_matching': True,
3785 }, {
3786 'url': 'https://www.youtube.com/watch?',
3787 'only_matching': True,
3788 }, {
3789 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3790 'only_matching': True,
3791 }, {
3792 'url': 'https://www.youtube.com/watch?feature=foo',
3793 'only_matching': True,
3794 }, {
3795 'url': 'https://www.youtube.com/watch?hl=en-GB',
3796 'only_matching': True,
3797 }, {
3798 'url': 'https://www.youtube.com/watch?t=2372',
3799 'only_matching': True,
3800 }]
3801
3802 def _real_extract(self, url):
3803 raise ExtractorError(
3804 'Did you forget to quote the URL? Remember that & is a meta '
3805 'character in most shells, so you want to put the URL in quotes, '
3806 'like youtube-dl '
3807 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3808 ' or simply youtube-dl BaW_jenozKc .',
3809 expected=True)
3810
3811
3812class YoutubeTruncatedIDIE(InfoExtractor):
3813 IE_NAME = 'youtube:truncated_id'
3814 IE_DESC = False # Do not list
3815 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3816
3817 _TESTS = [{
3818 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3819 'only_matching': True,
3820 }]
3821
3822 def _real_extract(self, url):
3823 video_id = self._match_id(url)
3824 raise ExtractorError(
3825 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3826 expected=True)
3827
3828
3829# Do Youtube show urls even exist anymore? I couldn't find any
3830r'''
3831class YoutubeShowIE(YoutubeTabIE):
3832 IE_DESC = 'YouTube.com (multi-season) shows'
3833 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3834 IE_NAME = 'youtube:show'
3835 _TESTS = [{
3836 'url': 'https://www.youtube.com/show/airdisasters',
3837 'playlist_mincount': 5,
3838 'info_dict': {
3839 'id': 'airdisasters',
3840 'title': 'Air Disasters',
3841 }
3842 }]
3843
3844 def _real_extract(self, url):
3845 playlist_id = self._match_id(url)
3846 return super(YoutubeShowIE, self)._real_extract(
3847 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3848'''