]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[YoutubeDL] Ensure dir existence for each requested format (closes #14116)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
8d81f3e3 19 compat_kwargs,
c5e8d7af 20 compat_parse_qs,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
c5e8d7af 26 compat_str,
4bb4a188
PH
27)
28from ..utils import (
c5e8d7af 29 clean_html,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
0cb58b02 40 remove_quotes,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
c93d53f5 43 str_to_int,
556dbe7f 44 try_get,
c5e8d7af
PH
45 unescapeHTML,
46 unified_strdate,
cf7e015f 47 unsmuggle_url,
81c2f20b 48 uppercase_escape,
6e6bc8da 49 urlencode_postdata,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
57
58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 61
b2e8bc1b
JMF
62 _NETRC_MACHINE = 'youtube'
63 # If True it will raise an error if no login info is provided
64 _LOGIN_REQUIRED = False
65
d0ba5587
S
66 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
67
b2e8bc1b 68 def _set_language(self):
810fb84d
PH
69 self._set_cookie(
70 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 71 # YouTube sets the expire time to about two months
810fb84d 72 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 73
25f14e9f
S
74 def _ids_to_results(self, ids):
75 return [
76 self.url_result(vid_id, 'Youtube', video_id=vid_id)
77 for vid_id in ids]
78
b2e8bc1b 79 def _login(self):
83317f69 80 """
81 Attempt to log in to YouTube.
82 True is returned if successful or skipped.
83 False is returned if login failed.
84
85 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
86 """
b2e8bc1b
JMF
87 (username, password) = self._get_login_info()
88 # No authentication to be performed
89 if username is None:
90 if self._LOGIN_REQUIRED:
69ea8ca4 91 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 92 return True
b2e8bc1b 93
7cc3570e
PH
94 login_page = self._download_webpage(
95 self._LOGIN_URL, None,
69ea8ca4
PH
96 note='Downloading login page',
97 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
98 if login_page is False:
99 return
b2e8bc1b 100
1212e997 101 login_form = self._hidden_inputs(login_page)
c5e8d7af 102
e00eb564
S
103 def req(url, f_req, note, errnote):
104 data = login_form.copy()
105 data.update({
106 'pstMsg': 1,
107 'checkConnection': 'youtube',
108 'checkedDomains': 'youtube',
109 'hl': 'en',
110 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 111 'f.req': json.dumps(f_req),
e00eb564
S
112 'flowName': 'GlifWebSignIn',
113 'flowEntry': 'ServiceLogin',
041bc3ad 114 })
e00eb564
S
115 return self._download_json(
116 url, None, note=note, errnote=errnote,
117 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
118 fatal=False,
119 data=urlencode_postdata(data), headers={
120 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
121 'Google-Accounts-XSRF': 1,
122 })
123
3995d37d
S
124 def warn(message):
125 self._downloader.report_warning(message)
126
127 lookup_req = [
128 username,
129 None, [], None, 'US', None, None, 2, False, True,
130 [
131 None, None,
132 [2, 1, None, 1,
133 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
134 None, [], 4],
135 1, [None, None, []], None, None, None, True
136 ],
137 username,
138 ]
139
e00eb564 140 lookup_results = req(
3995d37d 141 self._LOOKUP_URL, lookup_req,
e00eb564
S
142 'Looking up account info', 'Unable to look up account info')
143
144 if lookup_results is False:
145 return False
041bc3ad 146
3995d37d
S
147 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
148 if not user_hash:
149 warn('Unable to extract user hash')
150 return False
151
152 challenge_req = [
153 user_hash,
154 None, 1, None, [1, None, None, None, [password, None, True]],
155 [
156 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
157 1, [None, None, []], None, None, None, True
158 ]]
83317f69 159
3995d37d
S
160 challenge_results = req(
161 self._CHALLENGE_URL, challenge_req,
162 'Logging in', 'Unable to log in')
83317f69 163
3995d37d 164 if challenge_results is False:
e00eb564 165 return
83317f69 166
3995d37d
S
167 login_res = try_get(challenge_results, lambda x: x[0][5], list)
168 if login_res:
169 login_msg = try_get(login_res, lambda x: x[5], compat_str)
170 warn(
171 'Unable to login: %s' % 'Invalid password'
172 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
173 return False
174
175 res = try_get(challenge_results, lambda x: x[0][-1], list)
176 if not res:
177 warn('Unable to extract result entry')
178 return False
179
180 tfa = try_get(res, lambda x: x[0][0], list)
181 if tfa:
182 tfa_str = try_get(tfa, lambda x: x[2], compat_str)
183 if tfa_str == 'TWO_STEP_VERIFICATION':
184 # SEND_SUCCESS - TFA code has been successfully sent to phone
185 # QUOTA_EXCEEDED - reached the limit of TFA codes
186 status = try_get(tfa, lambda x: x[5], compat_str)
187 if status == 'QUOTA_EXCEEDED':
188 warn('Exceeded the limit of TFA codes, try later')
189 return False
190
191 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
192 if not tl:
193 warn('Unable to extract TL')
194 return False
195
196 tfa_code = self._get_tfa_info('2-step verification code')
197
198 if not tfa_code:
199 warn(
200 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
201 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
202 return False
203
204 tfa_code = remove_start(tfa_code, 'G-')
205
206 tfa_req = [
207 user_hash, None, 2, None,
208 [
209 9, None, None, None, None, None, None, None,
210 [None, tfa_code, True, 2]
211 ]]
212
213 tfa_results = req(
214 self._TFA_URL.format(tl), tfa_req,
215 'Submitting TFA code', 'Unable to submit TFA code')
216
217 if tfa_results is False:
218 return False
219
220 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
221 if tfa_res:
222 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
223 warn(
224 'Unable to finish TFA: %s' % 'Invalid TFA code'
225 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
226 return False
227
228 check_cookie_url = try_get(
229 tfa_results, lambda x: x[0][-1][2], compat_str)
230 else:
231 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
232
233 if not check_cookie_url:
234 warn('Unable to extract CheckCookie URL')
235 return False
e00eb564
S
236
237 check_cookie_results = self._download_webpage(
3995d37d
S
238 check_cookie_url, None, 'Checking cookie', fatal=False)
239
240 if check_cookie_results is False:
241 return False
e00eb564 242
3995d37d
S
243 if 'https://myaccount.google.com/' not in check_cookie_results:
244 warn('Unable to log in')
b2e8bc1b 245 return False
e00eb564 246
b2e8bc1b
JMF
247 return True
248
8d81f3e3
S
249 def _download_webpage(self, *args, **kwargs):
250 kwargs.setdefault('query', {})['disable_polymer'] = 'true'
251 return super(YoutubeBaseInfoExtractor, self)._download_webpage(
252 *args, **compat_kwargs(kwargs))
253
b2e8bc1b
JMF
254 def _real_initialize(self):
255 if self._downloader is None:
256 return
42939b61 257 self._set_language()
b2e8bc1b
JMF
258 if not self._login():
259 return
c5e8d7af 260
8377574c 261
8e7aad20 262class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 263 # Extract entries from page with "Load more" button
648e6a1f
S
264 def _entries(self, page, playlist_id):
265 more_widget_html = content_html = page
266 for page_num in itertools.count(1):
061a75ed
S
267 for entry in self._process_page(content_html):
268 yield entry
648e6a1f
S
269
270 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
271 if not mobj:
272 break
273
274 more = self._download_json(
275 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
276 'Downloading page #%s' % page_num,
277 transform_source=uppercase_escape)
278 content_html = more['content_html']
279 if not content_html.strip():
280 # Some webpages show a "Load more" button but they don't
281 # have more videos
282 break
283 more_widget_html = more['load_more_widget_html']
284
061a75ed
S
285
286class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
287 def _process_page(self, content):
288 for video_id, video_title in self.extract_videos_from_page(content):
289 yield self.url_result(video_id, 'Youtube', video_id, video_title)
290
648e6a1f
S
291 def extract_videos_from_page(self, page):
292 ids_in_page = []
293 titles_in_page = []
294 for mobj in re.finditer(self._VIDEO_RE, page):
295 # The link with index 0 is not the first video of the playlist (not sure if still actual)
296 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
297 continue
298 video_id = mobj.group('id')
299 video_title = unescapeHTML(mobj.group('title'))
300 if video_title:
301 video_title = video_title.strip()
302 try:
303 idx = ids_in_page.index(video_id)
304 if video_title and not titles_in_page[idx]:
305 titles_in_page[idx] = video_title
306 except ValueError:
307 ids_in_page.append(video_id)
308 titles_in_page.append(video_title)
309 return zip(ids_in_page, titles_in_page)
310
311
061a75ed
S
312class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
313 def _process_page(self, content):
6dee688e
S
314 for playlist_id in orderedSet(re.findall(
315 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
316 content)):
061a75ed
S
317 yield self.url_result(
318 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
319
0c148415
S
320 def _real_extract(self, url):
321 playlist_id = self._match_id(url)
322 webpage = self._download_webpage(url, playlist_id)
0c148415 323 title = self._og_search_title(webpage, fatal=False)
061a75ed 324 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
325
326
360e1ca5 327class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 328 IE_DESC = 'YouTube.com'
cb7dfeea 329 _VALID_URL = r"""(?x)^
c5e8d7af 330 (
edb53e2d 331 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 332 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 333 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 334 (?:www\.)?pwnyoutube\.com/|
f7000f3a 335 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
336 tube\.majestyc\.net/|
337 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
338 (?:.*?\#/)? # handle anchor (#/) redirect urls
339 (?: # the various things that can precede the ID:
ac7553d0 340 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 341 |(?: # or the v= param in all its forms
f7000f3a 342 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 343 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 344 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
345 v=
346 )
f4b05232 347 ))
cbaed4bb
S
348 |(?:
349 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
350 vid\.plus| # or vid.plus/xxxx
351 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 352 )/
edb53e2d 353 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 354 )
c5e8d7af 355 )? # all until now is optional -> you can pass the naked ID
8963d9c2 356 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
357 (?!.*?\blist=
358 (?:
359 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
360 WL # WL are handled by the watch later IE
361 )
362 )
c5e8d7af 363 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 364 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 365 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 366 _formats = {
c2d3cb4c 367 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
368 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
369 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
370 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
371 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
372 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
373 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
374 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 375 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 376 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
377 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
378 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
379 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
380 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
381 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 382 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 383 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
384 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 385
386
387 # 3D videos
c2d3cb4c 388 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
389 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
390 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
391 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 392 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
393 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
394 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 395
96fb5605 396 # Apple HTTP Live Streaming
11f12195 397 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 398 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
399 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
400 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
401 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
402 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 403 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
404 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
405
406 # DASH mp4 video
d23028a8
S
407 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
408 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
409 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
410 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
411 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
412 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
413 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
414 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
415 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
416 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
417 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
418 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 419
f6f1fc92 420 # Dash mp4 audio
d23028a8
S
421 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
422 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
423 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
424 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
425 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
426 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
427 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
428
429 # Dash webm
d23028a8
S
430 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
431 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
432 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
433 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
434 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
435 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
436 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
437 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
438 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
439 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
440 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
441 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
442 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
443 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
444 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 445 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
446 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
447 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
448 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
449 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
450 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
451 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
452
453 # Dash webm audio
d23028a8
S
454 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
455 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 456
0857baad 457 # Dash webm audio with opus inside
d23028a8
S
458 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
459 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
460 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 461
ce6b9a2d
PH
462 # RTMP (unnamed)
463 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 464 }
23d17e4b 465 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 466
fd5c4aab
S
467 _GEO_BYPASS = False
468
78caa52a 469 IE_NAME = 'youtube'
2eb88d95
PH
470 _TESTS = [
471 {
2d3d2997 472 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
473 'info_dict': {
474 'id': 'BaW_jenozKc',
475 'ext': 'mp4',
476 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
477 'uploader': 'Philipp Hagemeister',
478 'uploader_id': 'phihag',
ec85ded8 479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 480 'upload_date': '20121002',
7caf9830 481 'license': 'Standard YouTube License',
4bc3a23e
PH
482 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
483 'categories': ['Science & Technology'],
000b6b5a 484 'tags': ['youtube-dl'],
556dbe7f 485 'duration': 10,
3e7c1224
PH
486 'like_count': int,
487 'dislike_count': int,
7c80519c 488 'start_time': 1,
297a564b 489 'end_time': 9,
2eb88d95 490 }
0e853ca4 491 },
0e853ca4 492 {
2d3d2997 493 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
494 'note': 'Test generic use_cipher_signature video (#897)',
495 'info_dict': {
496 'id': 'UxxajLWwzqY',
497 'ext': 'mp4',
498 'upload_date': '20120506',
499 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 500 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 501 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
502 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
503 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
504 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 505 'duration': 180,
4bc3a23e
PH
506 'uploader': 'Icona Pop',
507 'uploader_id': 'IconaPop',
ec85ded8 508 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 509 'license': 'Standard YouTube License',
0cb58b02 510 'creator': 'Icona Pop',
2eb88d95 511 }
c108eb73
JMF
512 },
513 {
4bc3a23e
PH
514 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
515 'note': 'Test VEVO video with age protection (#956)',
516 'info_dict': {
517 'id': '07FYdnEawAQ',
518 'ext': 'mp4',
519 'upload_date': '20130703',
520 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 521 'alt_title': 'Tunnel Vision',
4bc3a23e 522 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 523 'duration': 419,
4bc3a23e
PH
524 'uploader': 'justintimberlakeVEVO',
525 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 526 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 527 'license': 'Standard YouTube License',
0cb58b02 528 'creator': 'Justin Timberlake',
34952f09 529 'age_limit': 18,
c108eb73
JMF
530 }
531 },
fccd3771 532 {
4bc3a23e
PH
533 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
534 'note': 'Embed-only video (#1746)',
535 'info_dict': {
536 'id': 'yZIXLfi8CZQ',
537 'ext': 'mp4',
538 'upload_date': '20120608',
539 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
540 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
541 'uploader': 'SET India',
94bfcd23 542 'uploader_id': 'setindia',
ec85ded8 543 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 544 'license': 'Standard YouTube License',
94bfcd23 545 'age_limit': 18,
fccd3771
PH
546 }
547 },
11b56058 548 {
2d3d2997 549 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
550 'note': 'Use the first video ID in the URL',
551 'info_dict': {
552 'id': 'BaW_jenozKc',
553 'ext': 'mp4',
554 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
555 'uploader': 'Philipp Hagemeister',
556 'uploader_id': 'phihag',
ec85ded8 557 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 558 'upload_date': '20121002',
7caf9830 559 'license': 'Standard YouTube License',
11b56058
PM
560 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
561 'categories': ['Science & Technology'],
562 'tags': ['youtube-dl'],
556dbe7f 563 'duration': 10,
11b56058
PM
564 'like_count': int,
565 'dislike_count': int,
34a7de29
S
566 },
567 'params': {
568 'skip_download': True,
569 },
11b56058 570 },
dd27fd17 571 {
2d3d2997 572 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
573 'note': '256k DASH audio (format 141) via DASH manifest',
574 'info_dict': {
575 'id': 'a9LDPn-MO4I',
576 'ext': 'm4a',
577 'upload_date': '20121002',
578 'uploader_id': '8KVIDEO',
ec85ded8 579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
580 'description': '',
581 'uploader': '8KVIDEO',
7caf9830 582 'license': 'Standard YouTube License',
4bc3a23e 583 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 584 },
4bc3a23e
PH
585 'params': {
586 'youtube_include_dash_manifest': True,
587 'format': '141',
4919603f 588 },
de3c7fe0 589 'skip': 'format 141 not served anymore',
dd27fd17 590 },
3489b7d2
JMF
591 # DASH manifest with encrypted signature
592 {
78caa52a
PH
593 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
594 'info_dict': {
595 'id': 'IB3lcPjvWLA',
596 'ext': 'm4a',
b766eb27
JMF
597 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
598 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
556dbe7f 599 'duration': 244,
78caa52a
PH
600 'uploader': 'AfrojackVEVO',
601 'uploader_id': 'AfrojackVEVO',
602 'upload_date': '20131011',
7caf9830 603 'license': 'Standard YouTube License',
3489b7d2 604 },
4bc3a23e 605 'params': {
78caa52a 606 'youtube_include_dash_manifest': True,
de3c7fe0 607 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
608 },
609 },
aaeb86f6
S
610 # JS player signature function name containing $
611 {
612 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
613 'info_dict': {
614 'id': 'nfWlot6h_JM',
615 'ext': 'm4a',
616 'title': 'Taylor Swift - Shake It Off',
0cb58b02 617 'alt_title': 'Shake It Off',
f57b7835 618 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 619 'duration': 242,
aaeb86f6
S
620 'uploader': 'TaylorSwiftVEVO',
621 'uploader_id': 'TaylorSwiftVEVO',
622 'upload_date': '20140818',
7caf9830 623 'license': 'Standard YouTube License',
0cb58b02 624 'creator': 'Taylor Swift',
aaeb86f6
S
625 },
626 'params': {
627 'youtube_include_dash_manifest': True,
de3c7fe0 628 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
629 },
630 },
aa79ac0c
PH
631 # Controversy video
632 {
633 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
634 'info_dict': {
635 'id': 'T4XJQO3qol8',
636 'ext': 'mp4',
556dbe7f 637 'duration': 219,
aa79ac0c
PH
638 'upload_date': '20100909',
639 'uploader': 'The Amazing Atheist',
640 'uploader_id': 'TheAmazingAtheist',
ec85ded8 641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 642 'license': 'Standard YouTube License',
aa79ac0c
PH
643 'title': 'Burning Everyone\'s Koran',
644 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
645 }
c522adb1
JMF
646 },
647 # Normal age-gate video (No vevo, embed allowed)
648 {
2d3d2997 649 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
650 'info_dict': {
651 'id': 'HtVdAasjOgU',
652 'ext': 'mp4',
653 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 654 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 655 'duration': 142,
c522adb1
JMF
656 'uploader': 'The Witcher',
657 'uploader_id': 'WitcherGame',
ec85ded8 658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 659 'upload_date': '20140605',
7caf9830 660 'license': 'Standard YouTube License',
34952f09 661 'age_limit': 18,
c522adb1
JMF
662 },
663 },
fccae2b9
S
664 # Age-gate video with encrypted signature
665 {
2d3d2997 666 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
667 'info_dict': {
668 'id': '6kLq3WMV1nU',
669 'ext': 'mp4',
670 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
671 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
556dbe7f 672 'duration': 247,
fccae2b9
S
673 'uploader': 'LloydVEVO',
674 'uploader_id': 'LloydVEVO',
ec85ded8 675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 676 'upload_date': '20110629',
7caf9830 677 'license': 'Standard YouTube License',
34952f09 678 'age_limit': 18,
fccae2b9
S
679 },
680 },
774e208f 681 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
7d02dcfa 682 # YouTube Red ad is not captured for creator
774e208f
PH
683 {
684 'url': '__2ABJjxzNo',
685 'info_dict': {
686 'id': '__2ABJjxzNo',
687 'ext': 'mp4',
556dbe7f 688 'duration': 266,
774e208f
PH
689 'upload_date': '20100430',
690 'uploader_id': 'deadmau5',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 692 'creator': 'deadmau5',
774e208f
PH
693 'description': 'md5:12c56784b8032162bb936a5f76d55360',
694 'uploader': 'deadmau5',
7caf9830 695 'license': 'Standard YouTube License',
774e208f 696 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 697 'alt_title': 'Some Chords',
774e208f
PH
698 },
699 'expected_warnings': [
700 'DASH manifest missing',
701 ]
e52a40ab
PH
702 },
703 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
704 {
705 'url': 'lqQg6PlCWgI',
706 'info_dict': {
707 'id': 'lqQg6PlCWgI',
708 'ext': 'mp4',
556dbe7f 709 'duration': 6085,
90227264 710 'upload_date': '20150827',
cbe2bd91 711 'uploader_id': 'olympic',
ec85ded8 712 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 713 'license': 'Standard YouTube License',
cbe2bd91 714 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 715 'uploader': 'Olympic',
cbe2bd91
PH
716 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
717 },
718 'params': {
719 'skip_download': 'requires avconv',
e52a40ab 720 }
cbe2bd91 721 },
6271f1ca
PH
722 # Non-square pixels
723 {
724 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
725 'info_dict': {
726 'id': '_b-2C3KPAM0',
727 'ext': 'mp4',
728 'stretched_ratio': 16 / 9.,
556dbe7f 729 'duration': 85,
6271f1ca
PH
730 'upload_date': '20110310',
731 'uploader_id': 'AllenMeow',
ec85ded8 732 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
733 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
734 'uploader': '孫艾倫',
7caf9830 735 'license': 'Standard YouTube License',
6271f1ca
PH
736 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
737 },
06b491eb
S
738 },
739 # url_encoded_fmt_stream_map is empty string
740 {
741 'url': 'qEJwOuvDf7I',
742 'info_dict': {
743 'id': 'qEJwOuvDf7I',
f57b7835 744 'ext': 'webm',
06b491eb
S
745 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
746 'description': '',
747 'upload_date': '20150404',
748 'uploader_id': 'spbelect',
749 'uploader': 'Наблюдатели Петербурга',
750 },
751 'params': {
752 'skip_download': 'requires avconv',
e323cf3f
S
753 },
754 'skip': 'This live event has ended.',
06b491eb 755 },
da77d856
S
756 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
757 {
758 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
759 'info_dict': {
760 'id': 'FIl7x6_3R5Y',
761 'ext': 'mp4',
762 'title': 'md5:7b81415841e02ecd4313668cde88737a',
763 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 764 'duration': 220,
da77d856
S
765 'upload_date': '20150625',
766 'uploader_id': 'dorappi2000',
ec85ded8 767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 768 'uploader': 'dorappi2000',
7caf9830 769 'license': 'Standard YouTube License',
be49068d 770 'formats': 'mincount:32',
da77d856 771 },
2ee8f5d8 772 },
8a1a26ce
YCH
773 # DASH manifest with segment_list
774 {
775 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
776 'md5': '8ce563a1d667b599d21064e982ab9e31',
777 'info_dict': {
778 'id': 'CsmdDsKjzN8',
779 'ext': 'mp4',
17ee98e1 780 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
781 'uploader': 'Airtek',
782 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
783 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 784 'license': 'Standard YouTube License',
8a1a26ce
YCH
785 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
786 },
787 'params': {
788 'youtube_include_dash_manifest': True,
789 'format': '135', # bestvideo
be49068d
S
790 },
791 'skip': 'This live event has ended.',
2ee8f5d8 792 },
cf7e015f
S
793 {
794 # Multifeed videos (multiple cameras), URL is for Main Camera
795 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
796 'info_dict': {
797 'id': 'jqWvoWXjCVs',
798 'title': 'teamPGP: Rocket League Noob Stream',
799 'description': 'md5:dc7872fb300e143831327f1bae3af010',
800 },
801 'playlist': [{
802 'info_dict': {
803 'id': 'jqWvoWXjCVs',
804 'ext': 'mp4',
805 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
806 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 807 'duration': 7335,
cf7e015f
S
808 'upload_date': '20150721',
809 'uploader': 'Beer Games Beer',
810 'uploader_id': 'beergamesbeer',
ec85ded8 811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 812 'license': 'Standard YouTube License',
cf7e015f
S
813 },
814 }, {
815 'info_dict': {
816 'id': '6h8e8xoXJzg',
817 'ext': 'mp4',
818 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
819 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 820 'duration': 7337,
cf7e015f
S
821 'upload_date': '20150721',
822 'uploader': 'Beer Games Beer',
823 'uploader_id': 'beergamesbeer',
ec85ded8 824 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 825 'license': 'Standard YouTube License',
cf7e015f
S
826 },
827 }, {
828 'info_dict': {
829 'id': 'PUOgX5z9xZw',
830 'ext': 'mp4',
831 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
832 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 833 'duration': 7337,
cf7e015f
S
834 'upload_date': '20150721',
835 'uploader': 'Beer Games Beer',
836 'uploader_id': 'beergamesbeer',
ec85ded8 837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 838 'license': 'Standard YouTube License',
cf7e015f
S
839 },
840 }, {
841 'info_dict': {
842 'id': 'teuwxikvS5k',
843 'ext': 'mp4',
844 'title': 'teamPGP: Rocket League Noob Stream (zim)',
845 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 846 'duration': 7334,
cf7e015f
S
847 'upload_date': '20150721',
848 'uploader': 'Beer Games Beer',
849 'uploader_id': 'beergamesbeer',
ec85ded8 850 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 851 'license': 'Standard YouTube License',
cf7e015f
S
852 },
853 }],
854 'params': {
855 'skip_download': True,
856 },
cbaed4bb 857 },
f9f49d87
S
858 {
859 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
860 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
861 'info_dict': {
862 'id': 'gVfLd0zydlo',
863 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
864 },
865 'playlist_count': 2,
be49068d 866 'skip': 'Not multifeed anymore',
f9f49d87 867 },
cbaed4bb 868 {
2d3d2997 869 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 870 'only_matching': True,
0e49d9a6 871 },
6d4fc66b 872 {
2d3d2997 873 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
874 'only_matching': True,
875 },
0e49d9a6 876 {
61f92af1 877 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
878 # Also tests cut-off URL expansion in video description (see
879 # https://github.com/rg3/youtube-dl/issues/1892,
880 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
881 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
882 'info_dict': {
883 'id': 'lsguqyKfVQg',
884 'ext': 'mp4',
885 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 886 'alt_title': 'Dark Walk',
0e49d9a6 887 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 888 'duration': 133,
0e49d9a6
LL
889 'upload_date': '20151119',
890 'uploader_id': 'IronSoulElf',
ec85ded8 891 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 892 'uploader': 'IronSoulElf',
7caf9830 893 'license': 'Standard YouTube License',
0cb58b02 894 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
895 },
896 'params': {
897 'skip_download': True,
898 },
899 },
61f92af1
S
900 {
901 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
902 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
903 'only_matching': True,
904 },
313dfc45
LL
905 {
906 # Video with yt:stretch=17:0
907 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
908 'info_dict': {
909 'id': 'Q39EVAstoRM',
910 'ext': 'mp4',
911 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
912 'description': 'md5:ee18a25c350637c8faff806845bddee9',
913 'upload_date': '20151107',
914 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
915 'uploader': 'CH GAMER DROID',
916 },
917 'params': {
918 'skip_download': True,
919 },
be49068d 920 'skip': 'This video does not exist.',
313dfc45 921 },
7caf9830
S
922 {
923 # Video licensed under Creative Commons
924 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
925 'info_dict': {
926 'id': 'M4gD1WSo5mA',
927 'ext': 'mp4',
928 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
929 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 930 'duration': 721,
7caf9830
S
931 'upload_date': '20150127',
932 'uploader_id': 'BerkmanCenter',
ec85ded8 933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 934 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
935 'license': 'Creative Commons Attribution license (reuse allowed)',
936 },
937 'params': {
938 'skip_download': True,
939 },
940 },
fd050249
S
941 {
942 # Channel-like uploader_url
943 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
944 'info_dict': {
945 'id': 'eQcmzGIKrzg',
946 'ext': 'mp4',
947 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
948 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 949 'duration': 4060,
fd050249
S
950 'upload_date': '20151119',
951 'uploader': 'Bernie 2016',
952 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
954 'license': 'Creative Commons Attribution license (reuse allowed)',
955 },
956 'params': {
957 'skip_download': True,
958 },
959 },
040ac686
S
960 {
961 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
962 'only_matching': True,
7f29cf54
S
963 },
964 {
965 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
966 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
967 'only_matching': True,
6496ccb4
S
968 },
969 {
970 # Rental video preview
971 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
972 'info_dict': {
973 'id': 'uGpuVWrhIzE',
974 'ext': 'mp4',
975 'title': 'Piku - Trailer',
976 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
977 'upload_date': '20150811',
978 'uploader': 'FlixMatrix',
979 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
981 'license': 'Standard YouTube License',
982 },
983 'params': {
984 'skip_download': True,
985 },
022a5d66 986 },
12afdc2a
S
987 {
988 # YouTube Red video with episode data
989 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
990 'info_dict': {
991 'id': 'iqKdEhx-dD4',
992 'ext': 'mp4',
993 'title': 'Isolation - Mind Field (Ep 1)',
556dbe7f
S
994 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
995 'duration': 2085,
12afdc2a
S
996 'upload_date': '20170118',
997 'uploader': 'Vsauce',
998 'uploader_id': 'Vsauce',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1000 'license': 'Standard YouTube License',
1001 'series': 'Mind Field',
1002 'season_number': 1,
1003 'episode_number': 1,
1004 },
1005 'params': {
1006 'skip_download': True,
1007 },
1008 'expected_warnings': [
1009 'Skipping DASH manifest',
1010 ],
1011 },
c7121fa7
S
1012 {
1013 # The following content has been identified by the YouTube community
1014 # as inappropriate or offensive to some audiences.
1015 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1016 'info_dict': {
1017 'id': '6SJNVb0GnPI',
1018 'ext': 'mp4',
1019 'title': 'Race Differences in Intelligence',
1020 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1021 'duration': 965,
1022 'upload_date': '20140124',
1023 'uploader': 'New Century Foundation',
1024 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1026 'license': 'Standard YouTube License',
1027 'view_count': int,
1028 },
1029 'params': {
1030 'skip_download': True,
1031 },
1032 },
022a5d66
S
1033 {
1034 # itag 212
1035 'url': '1t24XAntNCY',
1036 'only_matching': True,
fd5c4aab
S
1037 },
1038 {
1039 # geo restricted to JP
1040 'url': 'sJL6WA-aGkQ',
1041 'only_matching': True,
1042 },
d0ba5587
S
1043 {
1044 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1045 'only_matching': True,
1046 },
2eb88d95
PH
1047 ]
1048
e0df6211
PH
1049 def __init__(self, *args, **kwargs):
1050 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1051 self._player_cache = {}
e0df6211 1052
c5e8d7af
PH
1053 def report_video_info_webpage_download(self, video_id):
1054 """Report attempt to download video info webpage."""
69ea8ca4 1055 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1056
c5e8d7af
PH
1057 def report_information_extraction(self, video_id):
1058 """Report attempt to extract video information."""
69ea8ca4 1059 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1060
1061 def report_unavailable_format(self, video_id, format):
1062 """Report extracted video URL."""
69ea8ca4 1063 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1064
1065 def report_rtmp_download(self):
1066 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1067 self.to_screen('RTMP download detected')
c5e8d7af 1068
60064c53
PH
1069 def _signature_cache_id(self, example_sig):
1070 """ Return a string representation of a signature """
78caa52a 1071 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1072
1073 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1074 id_m = re.match(
e31fed95 1075 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1076 player_url)
c081b35c
PH
1077 if not id_m:
1078 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1079 player_type = id_m.group('ext')
1080 player_id = id_m.group('id')
1081
c4417ddb 1082 # Read from filesystem cache
60064c53
PH
1083 func_id = '%s_%s_%s' % (
1084 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1085 assert os.path.basename(func_id) == func_id
a0e07d31 1086
69ea8ca4 1087 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1088 if cache_spec is not None:
78caa52a 1089 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1090
6d1a55a5
PH
1091 download_note = (
1092 'Downloading player %s' % player_url
1093 if self._downloader.params.get('verbose') else
1094 'Downloading %s player %s' % (player_type, player_id)
1095 )
e0df6211
PH
1096 if player_type == 'js':
1097 code = self._download_webpage(
1098 player_url, video_id,
6d1a55a5 1099 note=download_note,
69ea8ca4 1100 errnote='Download of %s failed' % player_url)
83799698 1101 res = self._parse_sig_js(code)
c4417ddb 1102 elif player_type == 'swf':
e0df6211
PH
1103 urlh = self._request_webpage(
1104 player_url, video_id,
6d1a55a5 1105 note=download_note,
69ea8ca4 1106 errnote='Download of %s failed' % player_url)
e0df6211 1107 code = urlh.read()
83799698 1108 res = self._parse_sig_swf(code)
e0df6211
PH
1109 else:
1110 assert False, 'Invalid player type %r' % player_type
1111
785521bf
PH
1112 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1113 cache_res = res(test_string)
1114 cache_spec = [ord(c) for c in cache_res]
83799698 1115
69ea8ca4 1116 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1117 return res
1118
60064c53 1119 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1120 def gen_sig_code(idxs):
1121 def _genslice(start, end, step):
78caa52a 1122 starts = '' if start == 0 else str(start)
8bcc8756 1123 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1124 steps = '' if step == 1 else (':%d' % step)
78caa52a 1125 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1126
1127 step = None
7af808a5
PH
1128 # Quelch pyflakes warnings - start will be set when step is set
1129 start = '(Never used)'
edf3e38e
PH
1130 for i, prev in zip(idxs[1:], idxs[:-1]):
1131 if step is not None:
1132 if i - prev == step:
1133 continue
1134 yield _genslice(start, prev, step)
1135 step = None
1136 continue
1137 if i - prev in [-1, 1]:
1138 step = i - prev
1139 start = prev
1140 continue
1141 else:
78caa52a 1142 yield 's[%d]' % prev
edf3e38e 1143 if step is None:
78caa52a 1144 yield 's[%d]' % i
edf3e38e
PH
1145 else:
1146 yield _genslice(start, i, step)
1147
78caa52a 1148 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1149 cache_res = func(test_string)
edf3e38e 1150 cache_spec = [ord(c) for c in cache_res]
78caa52a 1151 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1152 signature_id_tuple = '(%s)' % (
1153 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1154 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1155 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1156 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1157
e0df6211
PH
1158 def _parse_sig_js(self, jscode):
1159 funcname = self._search_regex(
3c90cc8b
S
1160 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1161 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1162 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1163
1164 jsi = JSInterpreter(jscode)
1165 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1166 return lambda s: initial_function([s])
1167
1168 def _parse_sig_swf(self, file_contents):
54256267 1169 swfi = SWFInterpreter(file_contents)
78caa52a 1170 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1171 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1172 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1173 return lambda s: initial_function([s])
1174
83799698 1175 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1176 """Turn the encrypted s field into a working signature"""
6b37f0be 1177
c8bf86d5 1178 if player_url is None:
69ea8ca4 1179 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1180
69ea8ca4 1181 if player_url.startswith('//'):
78caa52a 1182 player_url = 'https:' + player_url
3c90cc8b
S
1183 elif not re.match(r'https?://', player_url):
1184 player_url = compat_urlparse.urljoin(
1185 'https://www.youtube.com', player_url)
c8bf86d5 1186 try:
62af3a0e 1187 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1188 if player_id not in self._player_cache:
1189 func = self._extract_signature_function(
60064c53 1190 video_id, player_url, s
c8bf86d5
PH
1191 )
1192 self._player_cache[player_id] = func
1193 func = self._player_cache[player_id]
1194 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1195 self._print_sig_code(func, s)
c8bf86d5
PH
1196 return func(s)
1197 except Exception as e:
1198 tb = traceback.format_exc()
1199 raise ExtractorError(
78caa52a 1200 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1201
360e1ca5 1202 def _get_subtitles(self, video_id, webpage):
de7f3446 1203 try:
60e47a26 1204 subs_doc = self._download_xml(
38c2e5b8 1205 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1206 video_id, note=False)
1207 except ExtractorError as err:
9b9c5355 1208 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1209 return {}
de7f3446
JMF
1210
1211 sub_lang_list = {}
60e47a26
JMF
1212 for track in subs_doc.findall('track'):
1213 lang = track.attrib['lang_code']
7e660ac1
LD
1214 if lang in sub_lang_list:
1215 continue
360e1ca5 1216 sub_formats = []
23d17e4b 1217 for ext in self._SUBTITLE_FORMATS:
15707c7e 1218 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1219 'lang': lang,
1220 'v': video_id,
1221 'fmt': ext,
1222 'name': track.attrib['name'].encode('utf-8'),
1223 })
1224 sub_formats.append({
1225 'url': 'https://www.youtube.com/api/timedtext?' + params,
1226 'ext': ext,
1227 })
1228 sub_lang_list[lang] = sub_formats
de7f3446 1229 if not sub_lang_list:
69ea8ca4 1230 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1231 return {}
1232 return sub_lang_list
1233
a72778d3
S
1234 def _get_ytplayer_config(self, video_id, webpage):
1235 patterns = (
526b3b07
S
1236 # User data may contain arbitrary character sequences that may affect
1237 # JSON extraction with regex, e.g. when '};' is contained the second
1238 # regex won't capture the whole JSON. Yet working around by trying more
1239 # concrete regex first keeping in mind proper quoted string handling
1240 # to be implemented in future that will replace this workaround (see
1241 # https://github.com/rg3/youtube-dl/issues/7468,
1242 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1243 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1244 r';ytplayer\.config\s*=\s*({.+?});',
1245 )
1246 config = self._search_regex(
1247 patterns, webpage, 'ytplayer.config', default=None)
1248 if config:
1249 return self._parse_json(
1250 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1251
360e1ca5 1252 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1253 """We need the webpage for getting the captions url, pass it as an
1254 argument to speed up the process."""
69ea8ca4 1255 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1256 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1257 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1258 if not player_config:
de7f3446
JMF
1259 self._downloader.report_warning(err_msg)
1260 return {}
de7f3446 1261 try:
0792d563 1262 args = player_config['args']
b78b292f
S
1263 caption_url = args.get('ttsurl')
1264 if caption_url:
1265 timestamp = args['timestamp']
1266 # We get the available subtitles
15707c7e 1267 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1268 'type': 'list',
1269 'tlangs': 1,
1270 'asrs': 1,
1271 })
1272 list_url = caption_url + '&' + list_params
1273 caption_list = self._download_xml(list_url, video_id)
1274 original_lang_node = caption_list.find('track')
1275 if original_lang_node is None:
1276 self._downloader.report_warning('Video doesn\'t have automatic captions')
1277 return {}
1278 original_lang = original_lang_node.attrib['lang_code']
1279 caption_kind = original_lang_node.attrib.get('kind', '')
1280
1281 sub_lang_list = {}
1282 for lang_node in caption_list.findall('target'):
1283 sub_lang = lang_node.attrib['lang_code']
1284 sub_formats = []
1285 for ext in self._SUBTITLE_FORMATS:
15707c7e 1286 params = compat_urllib_parse_urlencode({
b78b292f
S
1287 'lang': original_lang,
1288 'tlang': sub_lang,
1289 'fmt': ext,
1290 'ts': timestamp,
1291 'kind': caption_kind,
1292 })
1293 sub_formats.append({
1294 'url': caption_url + '&' + params,
1295 'ext': ext,
1296 })
1297 sub_lang_list[sub_lang] = sub_formats
1298 return sub_lang_list
1299
ddbb4c5c
S
1300 def make_captions(sub_url, sub_langs):
1301 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1302 caption_qs = compat_parse_qs(parsed_sub_url.query)
1303 captions = {}
1304 for sub_lang in sub_langs:
1305 sub_formats = []
1306 for ext in self._SUBTITLE_FORMATS:
1307 caption_qs.update({
1308 'tlang': [sub_lang],
1309 'fmt': [ext],
1310 })
1311 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1312 query=compat_urllib_parse_urlencode(caption_qs, True)))
1313 sub_formats.append({
1314 'url': sub_url,
1315 'ext': ext,
1316 })
1317 captions[sub_lang] = sub_formats
1318 return captions
1319
1320 # New captions format as of 22.06.2017
1321 player_response = args.get('player_response')
1322 if player_response and isinstance(player_response, compat_str):
1323 player_response = self._parse_json(
1324 player_response, video_id, fatal=False)
1325 if player_response:
1326 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1327 base_url = renderer['captionTracks'][0]['baseUrl']
1328 sub_lang_list = []
1329 for lang in renderer['translationLanguages']:
1330 lang_code = lang.get('languageCode')
1331 if lang_code:
1332 sub_lang_list.append(lang_code)
1333 return make_captions(base_url, sub_lang_list)
1334
b78b292f
S
1335 # Some videos don't provide ttsurl but rather caption_tracks and
1336 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1337 # Does not used anymore as of 22.06.2017
b78b292f
S
1338 caption_tracks = args['caption_tracks']
1339 caption_translation_languages = args['caption_translation_languages']
1340 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1341 sub_lang_list = []
b78b292f
S
1342 for lang in caption_translation_languages.split(','):
1343 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1344 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1345 if sub_lang:
1346 sub_lang_list.append(sub_lang)
1347 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1348 # An extractor error can be raise by the download process if there are
1349 # no automatic captions but there are subtitles
ddbb4c5c 1350 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1351 self._downloader.report_warning(err_msg)
1352 return {}
1353
d77ab8e2
S
1354 def _mark_watched(self, video_id, video_info):
1355 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1356 if not playback_url:
1357 return
1358 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1359 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1360
1361 # cpn generation algorithm is reverse engineered from base.js.
1362 # In fact it works even with dummy cpn.
1363 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1364 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1365
1366 qs.update({
1367 'ver': ['2'],
1368 'cpn': [cpn],
1369 })
1370 playback_url = compat_urlparse.urlunparse(
15707c7e 1371 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1372
1373 self._download_webpage(
1374 playback_url, video_id, 'Marking watched',
1375 'Unable to mark watched', fatal=False)
1376
97665381
PH
1377 @classmethod
1378 def extract_id(cls, url):
1379 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1380 if mobj is None:
69ea8ca4 1381 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1382 video_id = mobj.group(2)
1383 return video_id
1384
1fb07d10
JG
1385 def _extract_annotations(self, video_id):
1386 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1387 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1388
9cafc3fd
S
1389 @staticmethod
1390 def _extract_chapters(description, duration):
1391 if not description:
1392 return None
1393 chapter_lines = re.findall(
1394 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1395 description)
1396 if not chapter_lines:
1397 return None
1398 chapters = []
1399 for next_num, (chapter_line, time_point) in enumerate(
1400 chapter_lines, start=1):
1401 start_time = parse_duration(time_point)
1402 if start_time is None:
1403 continue
39d4c1be
S
1404 if start_time > duration:
1405 break
9cafc3fd
S
1406 end_time = (duration if next_num == len(chapter_lines)
1407 else parse_duration(chapter_lines[next_num][1]))
1408 if end_time is None:
1409 continue
39d4c1be
S
1410 if end_time > duration:
1411 end_time = duration
1412 if start_time > end_time:
1413 break
9cafc3fd
S
1414 chapter_title = re.sub(
1415 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1416 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1417 chapters.append({
1418 'start_time': start_time,
1419 'end_time': end_time,
1420 'title': chapter_title,
1421 })
1422 return chapters
1423
c5e8d7af 1424 def _real_extract(self, url):
cf7e015f
S
1425 url, smuggled_data = unsmuggle_url(url, {})
1426
7e8c0af0 1427 proto = (
78caa52a
PH
1428 'http' if self._downloader.params.get('prefer_insecure', False)
1429 else 'https')
7e8c0af0 1430
7c80519c 1431 start_time = None
297a564b 1432 end_time = None
7c80519c
JMF
1433 parsed_url = compat_urllib_parse_urlparse(url)
1434 for component in [parsed_url.fragment, parsed_url.query]:
1435 query = compat_parse_qs(component)
297a564b 1436 if start_time is None and 't' in query:
7c80519c 1437 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1438 if start_time is None and 'start' in query:
1439 start_time = parse_duration(query['start'][0])
297a564b
JMF
1440 if end_time is None and 'end' in query:
1441 end_time = parse_duration(query['end'][0])
7c80519c 1442
c5e8d7af
PH
1443 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1444 mobj = re.search(self._NEXT_URL_RE, url)
1445 if mobj:
7fd002c0 1446 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1447 video_id = self.extract_id(url)
c5e8d7af
PH
1448
1449 # Get video webpage
aa79ac0c 1450 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1451 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1452
1453 # Attempt to extract SWF player URL
e0df6211 1454 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1455 if mobj is not None:
1456 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1457 else:
1458 player_url = None
1459
d8d24a92
S
1460 dash_mpds = []
1461
1462 def add_dash_mpd(video_info):
1463 dash_mpd = video_info.get('dashmpd')
1464 if dash_mpd and dash_mpd[0] not in dash_mpds:
1465 dash_mpds.append(dash_mpd[0])
1466
c7121fa7
S
1467 is_live = None
1468 view_count = None
1469
1470 def extract_view_count(v_info):
1471 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1472
c5e8d7af 1473 # Get video info
6449cd80 1474 embed_webpage = None
c108eb73 1475 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1476 age_gate = True
1477 # We simulate the access to the video from www.youtube.com/v/{video_id}
1478 # this can be viewed without login into Youtube
beb95e77
CL
1479 url = proto + '://www.youtube.com/embed/%s' % video_id
1480 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1481 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1482 'video_id': video_id,
1483 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1484 'sts': self._search_regex(
beb95e77 1485 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1486 })
7e8c0af0 1487 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1488 video_info_webpage = self._download_webpage(
1489 video_info_url, video_id,
20436c30 1490 note='Refetching age-gated info webpage',
94bd3613 1491 errnote='unable to download video info webpage')
c5e8d7af 1492 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1493 add_dash_mpd(video_info)
c108eb73
JMF
1494 else:
1495 age_gate = False
bc93bdb5 1496 video_info = None
dc4e4f90 1497 sts = None
d8d24a92 1498 # Try looking directly into the video webpage
a72778d3
S
1499 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1500 if ytplayer_config:
4e62ebe2 1501 args = ytplayer_config['args']
d8d24a92
S
1502 if args.get('url_encoded_fmt_stream_map'):
1503 # Convert to the same format returned by compat_parse_qs
1504 video_info = dict((k, [v]) for k, v in args.items())
1505 add_dash_mpd(video_info)
6496ccb4
S
1506 # Rental video is not rented but preview is available (e.g.
1507 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1508 # https://github.com/rg3/youtube-dl/issues/10532)
1509 if not video_info and args.get('ypc_vid'):
1510 return self.url_result(
1511 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1512 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1513 is_live = True
dc4e4f90 1514 sts = ytplayer_config.get('sts')
0a3cf9ad
S
1515 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1516 # We also try looking in get_video_info since it may contain different dashmpd
1517 # URL that points to a DASH manifest with possibly different itag set (some itags
1518 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1519 # manifest pointed by get_video_info's dashmpd).
1520 # The general idea is to take a union of itags of both DASH manifests (for example
1521 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1522 self.report_video_info_webpage_download(video_id)
dc4e4f90
S
1523 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1524 query = {
1525 'video_id': video_id,
1526 'ps': 'default',
1527 'eurl': '',
1528 'gl': 'US',
1529 'hl': 'en',
1530 }
1531 if el:
1532 query['el'] = el
1533 if sts:
1534 query['sts'] = sts
810fb84d 1535 video_info_webpage = self._download_webpage(
dc4e4f90 1536 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1537 video_id, note=False,
dc4e4f90
S
1538 errnote='unable to download video info webpage',
1539 fatal=False, query=query)
1540 if not video_info_webpage:
1541 continue
0a3cf9ad 1542 get_video_info = compat_parse_qs(video_info_webpage)
fd545fc6 1543 add_dash_mpd(get_video_info)
c7121fa7
S
1544 if view_count is None:
1545 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1546 if not video_info:
1547 video_info = get_video_info
1548 if 'token' in get_video_info:
89ea063e
S
1549 # Different get_video_info requests may report different results, e.g.
1550 # some may report video unavailability, but some may serve it without
1551 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1552 # the original webpage as well as el=info and el=embedded get_video_info
1553 # requests report video unavailability due to geo restriction while
1554 # el=detailpage succeeds and returns valid data). This is probably
1555 # due to YouTube measures against IP ranges of hosting providers.
1556 # Working around by preferring the first succeeded video_info containing
1557 # the token if no such video_info yet was found.
44b2264f
S
1558 if 'token' not in video_info:
1559 video_info = get_video_info
4e62ebe2 1560 break
c5e8d7af
PH
1561 if 'token' not in video_info:
1562 if 'reason' in video_info:
af214c3a 1563 if 'The uploader has not made this video available in your country.' in video_info['reason']:
fd5c4aab
S
1564 regions_allowed = self._html_search_meta(
1565 'regionsAllowed', video_webpage, default=None)
1566 countries = regions_allowed.split(',') if regions_allowed else None
1567 self.raise_geo_restricted(
1568 msg=video_info['reason'][0], countries=countries)
d11271dd 1569 raise ExtractorError(
78caa52a 1570 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1571 expected=True, video_id=video_id)
c5e8d7af 1572 else:
d11271dd 1573 raise ExtractorError(
78caa52a 1574 '"token" parameter not in video info for unknown reason',
d11271dd 1575 video_id=video_id)
c5e8d7af 1576
cf7e015f
S
1577 # title
1578 if 'title' in video_info:
1579 video_title = video_info['title'][0]
1580 else:
1581 self._downloader.report_warning('Unable to extract video title')
1582 video_title = '_'
1583
1584 # description
9cafc3fd 1585 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1586 if video_description:
9cafc3fd 1587 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1588 <a\s+
25cb7a0e 1589 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1590 (?:title|href)="([^"]+)"\s+
25cb7a0e 1591 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1592 class="[^"]*"[^>]*>
23f13e97 1593 [^<]+\.{3}\s*
cf7e015f
S
1594 </a>
1595 ''', r'\1', video_description)
1596 video_description = clean_html(video_description)
1597 else:
1598 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1599 if fd_mobj:
1600 video_description = unescapeHTML(fd_mobj.group(1))
1601 else:
1602 video_description = ''
1603
5e1eddb9
S
1604 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1605 if not self._downloader.params.get('noplaylist'):
1606 entries = []
1607 feed_ids = []
6863631c 1608 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1609 for feed in multifeed_metadata_list.split(','):
6863631c
S
1610 # Unquote should take place before split on comma (,) since textual
1611 # fields may contain comma as well (see
1612 # https://github.com/rg3/youtube-dl/issues/8536)
1613 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1614 entries.append({
1615 '_type': 'url_transparent',
1616 'ie_key': 'Youtube',
1617 'url': smuggle_url(
1618 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1619 {'force_singlefeed': True}),
1620 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1621 })
1622 feed_ids.append(feed_data['id'][0])
1623 self.to_screen(
1624 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1625 % (', '.join(feed_ids), video_id))
1626 return self.playlist_result(entries, video_id, video_title, video_description)
1627 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1628
c7121fa7 1629 if view_count is None:
1c9c8de2 1630 view_count = extract_view_count(video_info)
1d699755 1631
c5e8d7af
PH
1632 # Check for "rental" videos
1633 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
c9612c04 1634 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af
PH
1635
1636 # Start extracting information
1637 self.report_information_extraction(video_id)
1638
1639 # uploader
1640 if 'author' not in video_info:
69ea8ca4 1641 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1642 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1643
1644 # uploader_id
1645 video_uploader_id = None
fd050249
S
1646 video_uploader_url = None
1647 mobj = re.search(
1648 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1649 video_webpage)
c5e8d7af 1650 if mobj is not None:
fd050249
S
1651 video_uploader_id = mobj.group('uploader_id')
1652 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1653 else:
69ea8ca4 1654 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1655
c5e8d7af 1656 # thumbnail image
7763b04e
JMF
1657 # We try first to get a high quality image:
1658 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1659 video_webpage, re.DOTALL)
1660 if m_thumb is not None:
1661 video_thumbnail = m_thumb.group(1)
1662 elif 'thumbnail_url' not in video_info:
69ea8ca4 1663 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1664 video_thumbnail = None
c5e8d7af 1665 else: # don't panic if we can't find it
7fd002c0 1666 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1667
1668 # upload date
9d0b581f
S
1669 upload_date = self._html_search_meta(
1670 'datePublished', video_webpage, 'upload date', default=None)
1671 if not upload_date:
1672 upload_date = self._search_regex(
1673 [r'(?s)id="eow-date.*?>(.*?)</span>',
79985209 1674 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
9d0b581f 1675 video_webpage, 'upload date', default=None)
9d0b581f 1676 upload_date = unified_strdate(upload_date)
c5e8d7af 1677
7caf9830
S
1678 video_license = self._html_search_regex(
1679 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1680 video_webpage, 'license', default=None)
1681
0cb58b02 1682 m_music = re.search(
7d02dcfa
S
1683 r'''(?x)
1684 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1685 <ul[^>]*>\s*
1686 <li>(?P<title>.+?)
1687 by (?P<creator>.+?)
1688 (?:
1689 \(.+?\)|
1690 <a[^>]*
1691 (?:
1692 \bhref=["\']/red[^>]*>| # drop possible
ea3f2049 1693 >\s*Listen ad-free with YouTube Red # YouTube Red ad
7d02dcfa
S
1694 )
1695 .*?
1696 )?</li
1697 ''',
0cb58b02
S
1698 video_webpage)
1699 if m_music:
1700 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1701 video_creator = clean_html(m_music.group('creator'))
1702 else:
1703 video_alt_title = video_creator = None
1704
12afdc2a
S
1705 m_episode = re.search(
1706 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1707 video_webpage)
1708 if m_episode:
1709 series = m_episode.group('series')
1710 season_number = int(m_episode.group('season'))
1711 episode_number = int(m_episode.group('episode'))
1712 else:
1713 series = season_number = episode_number = None
1714
55f7bd2d
PH
1715 m_cat_container = self._search_regex(
1716 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1717 video_webpage, 'categories', default=None)
ec8deefc 1718 if m_cat_container:
ad3bc6ac 1719 category = self._html_search_regex(
01ed5c9b 1720 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1721 default=None)
1722 video_categories = None if category is None else [category]
1723 else:
1724 video_categories = None
ec8deefc 1725
000b6b5a
S
1726 video_tags = [
1727 unescapeHTML(m.group('content'))
1728 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1729
f30a38be 1730 def _extract_count(count_name):
c93d53f5
S
1731 return str_to_int(self._search_regex(
1732 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1733 % re.escape(count_name),
1734 video_webpage, count_name, default=None))
1735
69ea8ca4
PH
1736 like_count = _extract_count('like')
1737 dislike_count = _extract_count('dislike')
336c3a69 1738
c5e8d7af 1739 # subtitles
d82134c3 1740 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1741 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af 1742
556dbe7f
S
1743 video_duration = try_get(
1744 video_info, lambda x: int_or_none(x['length_seconds'][0]))
1745 if not video_duration:
1746 video_duration = parse_duration(self._html_search_meta(
1747 'duration', video_webpage, 'video duration'))
c5e8d7af 1748
1fb07d10
JG
1749 # annotations
1750 video_annotations = None
1751 if self._downloader.params.get('writeannotations', False):
5f6a1245 1752 video_annotations = self._extract_annotations(video_id)
1fb07d10 1753
9cafc3fd
S
1754 chapters = self._extract_chapters(description_original, video_duration)
1755
c5e8d7af
PH
1756 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1757 self.report_rtmp_download()
dd27fd17
PH
1758 formats = [{
1759 'format_id': '_rtmp',
1760 'protocol': 'rtmp',
1761 'url': video_info['conn'][0],
1762 'player_url': player_url,
1763 }]
24270b03 1764 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1765 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1766 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1767 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1768 formats_spec = {}
82156fdb 1769 fmt_list = video_info.get('fmt_list', [''])[0]
1770 if fmt_list:
1771 for fmt in fmt_list.split(','):
1772 spec = fmt.split('/')
3318832e 1773 if len(spec) > 1:
1774 width_height = spec[1].split('x')
1775 if len(width_height) == 2:
1776 formats_spec[spec[0]] = {
1777 'resolution': spec[1],
1778 'width': int_or_none(width_height[0]),
1779 'height': int_or_none(width_height[1]),
1780 }
c9afb51c 1781 formats = []
00fe14fc 1782 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1783 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1784 if 'itag' not in url_data or 'url' not in url_data:
1785 continue
1786 format_id = url_data['itag'][0]
1787 url = url_data['url'][0]
1788
a49eccdf 1789 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1790 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1791 jsplayer_url_json = self._search_regex(
6449cd80
PH
1792 ASSETS_RE,
1793 embed_webpage if age_gate else video_webpage,
1794 'JS player URL (1)', default=None)
1795 if not jsplayer_url_json and not age_gate:
1796 # We need the embed website after all
1797 if embed_webpage is None:
1798 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1799 embed_webpage = self._download_webpage(
1800 embed_url, video_id, 'Downloading embed webpage')
1801 jsplayer_url_json = self._search_regex(
1802 ASSETS_RE, embed_webpage, 'JS player URL')
1803
beb95e77 1804 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1805 if player_url is None:
1806 player_url_json = self._search_regex(
1807 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1808 video_webpage, 'age gate player URL')
201e9eaa
PH
1809 player_url = json.loads(player_url_json)
1810
a49eccdf
YCH
1811 if 'sig' in url_data:
1812 url += '&signature=' + url_data['sig'][0]
1813 elif 's' in url_data:
1814 encrypted_sig = url_data['s'][0]
1815
201e9eaa 1816 if self._downloader.params.get('verbose'):
cf010131 1817 if player_url is None:
201e9eaa
PH
1818 player_version = 'unknown'
1819 player_desc = 'unknown'
1820 else:
1821 if player_url.endswith('swf'):
1822 player_version = self._search_regex(
1823 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1824 'flash player', fatal=False)
201e9eaa 1825 player_desc = 'flash player %s' % player_version
cf010131 1826 else:
201e9eaa 1827 player_version = self._search_regex(
b62985a9
YCH
1828 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1829 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
1830 player_url,
1831 'html5 player', fatal=False)
78caa52a 1832 player_desc = 'html5 player %s' % player_version
201e9eaa 1833
60064c53 1834 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1835 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1836 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1837
1838 signature = self._decrypt_signature(
1839 encrypted_sig, video_id, player_url, age_gate)
1840 url += '&signature=' + signature
1841 if 'ratebypass' not in url:
1842 url += '&ratebypass=yes'
c9afb51c 1843
94278f72
YCH
1844 dct = {
1845 'format_id': format_id,
1846 'url': url,
1847 'player_url': player_url,
1848 }
1849 if format_id in self._formats:
1850 dct.update(self._formats[format_id])
3318832e 1851 if format_id in formats_spec:
1852 dct.update(formats_spec[format_id])
94278f72 1853
aabc2be6
S
1854 # Some itags are not included in DASH manifest thus corresponding formats will
1855 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1856 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1857 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1858 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1859
1860 more_fields = {
c9afb51c 1861 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1862 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1863 'width': width,
1864 'height': height,
1865 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1866 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1867 }
94278f72
YCH
1868 for key, value in more_fields.items():
1869 if value:
1870 dct[key] = value
aabc2be6
S
1871 type_ = url_data.get('type', [None])[0]
1872 if type_:
1873 type_split = type_.split(';')
1874 kind_ext = type_split[0].split('/')
1875 if len(kind_ext) == 2:
94278f72
YCH
1876 kind, _ = kind_ext
1877 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1878 if kind in ('audio', 'video'):
1879 codecs = None
1880 for mobj in re.finditer(
1881 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1882 if mobj.group('key') == 'codecs':
1883 codecs = mobj.group('val')
1884 break
1885 if codecs:
6310acf5 1886 dct.update(parse_codecs(codecs))
aabc2be6 1887 formats.append(dct)
1d043b93
JMF
1888 elif video_info.get('hlsvp'):
1889 manifest_url = video_info['hlsvp'][0]
89beedd3
RA
1890 formats = []
1891 m3u8_formats = self._extract_m3u8_formats(
1892 manifest_url, video_id, 'mp4', fatal=False)
1893 for a_format in m3u8_formats:
1894 itag = self._search_regex(
1895 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1896 if itag:
1897 a_format['format_id'] = itag
1898 if itag in self._formats:
1899 dct = self._formats[itag].copy()
1900 dct.update(a_format)
1901 a_format = dct
1902 a_format['player_url'] = player_url
1903 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
049d71d8 1904 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
89beedd3 1905 formats.append(a_format)
c5e8d7af 1906 else:
8ceabd4d
S
1907 unavailable_message = self._html_search_regex(
1908 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1909 video_webpage, 'unavailable message', default=None)
1910 if unavailable_message:
1911 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1912 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1913
dd27fd17 1914 # Look for the DASH manifest
203fb43f 1915 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1916 dash_mpd_fatal = True
8ff648e4 1917 for mpd_url in dash_mpds:
d8d24a92 1918 dash_formats = {}
774e208f 1919 try:
05d0d131
YCH
1920 def decrypt_sig(mobj):
1921 s = mobj.group(1)
1922 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1923 return '/signature/%s' % dec_s
1924
8ff648e4 1925 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1926
8ff648e4 1927 for df in self._extract_mpd_formats(
1928 mpd_url, video_id, fatal=dash_mpd_fatal,
1929 formats_dict=self._formats):
d8d24a92
S
1930 # Do not overwrite DASH format found in some previous DASH manifest
1931 if df['format_id'] not in dash_formats:
1932 dash_formats[df['format_id']] = df
77c6fb5b
S
1933 # Additional DASH manifests may end up in HTTP Error 403 therefore
1934 # allow them to fail without bug report message if we already have
1935 # some DASH manifest succeeded. This is temporary workaround to reduce
1936 # burst of bug reports until we figure out the reason and whether it
1937 # can be fixed at all.
1938 dash_mpd_fatal = False
774e208f
PH
1939 except (ExtractorError, KeyError) as e:
1940 self.report_warning(
1941 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1942 if dash_formats:
04b3b3df
JMF
1943 # Remove the formats we found through non-DASH, they
1944 # contain less info and it can be wrong, because we use
1945 # fixed values (for example the resolution). See
1946 # https://github.com/rg3/youtube-dl/issues/5774 for an
1947 # example.
d80265cc 1948 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1949 formats.extend(dash_formats.values())
d80044c2 1950
6271f1ca
PH
1951 # Check for malformed aspect ratio
1952 stretched_m = re.search(
1953 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1954 video_webpage)
1955 if stretched_m:
313dfc45
LL
1956 w = float(stretched_m.group('w'))
1957 h = float(stretched_m.group('h'))
5faf9fed
S
1958 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1959 # We will only process correct ratios.
313dfc45 1960 if w > 0 and h > 0:
41f24c32 1961 ratio = w / h
313dfc45
LL
1962 for f in formats:
1963 if f.get('vcodec') != 'none':
1964 f['stretched_ratio'] = ratio
6271f1ca 1965
4bcc7bd1 1966 self._sort_formats(formats)
4ea3be0a 1967
d77ab8e2
S
1968 self.mark_watched(video_id, video_info)
1969
4ea3be0a 1970 return {
8bcc8756
JW
1971 'id': video_id,
1972 'uploader': video_uploader,
1973 'uploader_id': video_uploader_id,
fd050249 1974 'uploader_url': video_uploader_url,
8bcc8756 1975 'upload_date': upload_date,
7caf9830 1976 'license': video_license,
0cb58b02 1977 'creator': video_creator,
8bcc8756 1978 'title': video_title,
0cb58b02 1979 'alt_title': video_alt_title,
8bcc8756
JW
1980 'thumbnail': video_thumbnail,
1981 'description': video_description,
1982 'categories': video_categories,
000b6b5a 1983 'tags': video_tags,
8bcc8756 1984 'subtitles': video_subtitles,
360e1ca5 1985 'automatic_captions': automatic_captions,
8bcc8756
JW
1986 'duration': video_duration,
1987 'age_limit': 18 if age_gate else 0,
1988 'annotations': video_annotations,
9cafc3fd 1989 'chapters': chapters,
7e8c0af0 1990 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1991 'view_count': view_count,
4ea3be0a 1992 'like_count': like_count,
1993 'dislike_count': dislike_count,
2d30521a 1994 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1995 'formats': formats,
2fe1ff85 1996 'is_live': is_live,
7c80519c 1997 'start_time': start_time,
297a564b 1998 'end_time': end_time,
12afdc2a
S
1999 'series': series,
2000 'season_number': season_number,
2001 'episode_number': episode_number,
4ea3be0a 2002 }
c5e8d7af 2003
5f6a1245 2004
40805306 2005class YoutubeSharedVideoIE(InfoExtractor):
fd8c8c7d 2006 _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
40805306
YCH
2007 IE_NAME = 'youtube:shared'
2008
2009 _TEST = {
2010 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
2011 'info_dict': {
2012 'id': 'uPDB5I9wfp8',
2013 'ext': 'webm',
2014 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
2015 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
2016 'upload_date': '20160219',
2017 'uploader': 'Pocoyo - Português (BR)',
2018 'uploader_id': 'PocoyoBrazil',
2019 },
2020 'add_ie': ['Youtube'],
2021 'params': {
2022 # There are already too many Youtube downloads
2023 'skip_download': True,
2024 },
2025 }
2026
2027 def _real_extract(self, url):
2028 video_id = self._match_id(url)
2029
2030 webpage = self._download_webpage(url, video_id)
2031
2032 real_video_id = self._html_search_meta(
2033 'videoId', webpage, 'YouTube video id', fatal=True)
2034
2035 return self.url_result(real_video_id, YoutubeIE.ie_key())
2036
2037
8e7aad20 2038class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2039 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2040 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2041 (?:https?://)?
2042 (?:\w+\.)?
c5e8d7af 2043 (?:
feaa5ad7
S
2044 youtube\.com/
2045 (?:
87dadd45 2046 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2047 \? (?:.*?[&;])*? (?:p|a|list)=
2048 | p/
2049 )|
2050 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2051 )
d67cc9fa 2052 (
a6857510 2053 (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
5f6a1245 2054 # Top tracks, they can also include dots
d67cc9fa
JMF
2055 |(?:MC)[\w\.]*
2056 )
c5e8d7af
PH
2057 .*
2058 |
d0ba5587
S
2059 (%(playlist_id)s)
2060 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2061 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 2062 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 2063 IE_NAME = 'youtube:playlist'
81127aa5
PH
2064 _TESTS = [{
2065 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2066 'info_dict': {
2067 'title': 'ytdl test PL',
a1cf99d0 2068 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2069 },
2070 'playlist_count': 3,
9291475f
PH
2071 }, {
2072 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2073 'info_dict': {
acf757f4 2074 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2075 'title': 'YDL_Empty_List',
2076 },
2077 'playlist_count': 0,
4201ba13 2078 'skip': 'This playlist is private',
9291475f
PH
2079 }, {
2080 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2081 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2082 'info_dict': {
2083 'title': '29C3: Not my department',
acf757f4 2084 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
2085 },
2086 'playlist_count': 95,
2087 }, {
2088 'note': 'issue #673',
2089 'url': 'PLBB231211A4F62143',
2090 'info_dict': {
f46a8702 2091 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2092 'id': 'PLBB231211A4F62143',
9291475f
PH
2093 },
2094 'playlist_mincount': 26,
2095 }, {
2096 'note': 'Large playlist',
2097 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2098 'info_dict': {
2099 'title': 'Uploads from Cauchemar',
acf757f4 2100 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
2101 },
2102 'playlist_mincount': 799,
2103 }, {
2104 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2105 'info_dict': {
2106 'title': 'YDL_safe_search',
acf757f4 2107 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2108 },
2109 'playlist_count': 2,
4201ba13 2110 'skip': 'This playlist is private',
ac7553d0
PH
2111 }, {
2112 'note': 'embedded',
2d3d2997 2113 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2114 'playlist_count': 4,
2115 'info_dict': {
2116 'title': 'JODA15',
acf757f4 2117 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 2118 }
87dadd45
S
2119 }, {
2120 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2121 'playlist_mincount': 485,
2122 'info_dict': {
2123 'title': '2017 華語最新單曲 (2/24更新)',
2124 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2125 }
6b08cdf6
PH
2126 }, {
2127 'note': 'Embedded SWF player',
2d3d2997 2128 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2129 'playlist_count': 4,
2130 'info_dict': {
2131 'title': 'JODA7',
acf757f4 2132 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 2133 }
4b7df0d3
JMF
2134 }, {
2135 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2136 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2137 'info_dict': {
acf757f4
PH
2138 'title': 'Uploads from Interstellar Movie',
2139 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2140 },
481cc733 2141 'playlist_mincount': 21,
dacb3a86
S
2142 }, {
2143 # Playlist URL that does not actually serve a playlist
2144 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2145 'info_dict': {
2146 'id': 'FqZTN594JQw',
2147 'ext': 'webm',
2148 'title': "Smiley's People 01 detective, Adventure Series, Action",
2149 'uploader': 'STREEM',
2150 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2151 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2152 'upload_date': '20150526',
2153 'license': 'Standard YouTube License',
2154 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2155 'categories': ['People & Blogs'],
2156 'tags': list,
2157 'like_count': int,
2158 'dislike_count': int,
2159 },
2160 'params': {
2161 'skip_download': True,
2162 },
2163 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2164 }, {
2165 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2166 'info_dict': {
2167 'id': 'yeWKywCrFtk',
2168 'ext': 'mp4',
2169 'title': 'Small Scale Baler and Braiding Rugs',
2170 'uploader': 'Backus-Page House Museum',
2171 'uploader_id': 'backuspagemuseum',
ec85ded8 2172 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
2173 'upload_date': '20161008',
2174 'license': 'Standard YouTube License',
2175 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2176 'categories': ['Nonprofits & Activism'],
2177 'tags': list,
2178 'like_count': int,
2179 'dislike_count': int,
2180 },
2181 'params': {
2182 'noplaylist': True,
2183 'skip_download': True,
2184 },
feaa5ad7
S
2185 }, {
2186 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2187 'only_matching': True,
a6857510
S
2188 }, {
2189 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2190 'only_matching': True,
81127aa5 2191 }]
c5e8d7af 2192
880e1c52
JMF
2193 def _real_initialize(self):
2194 self._login()
2195
652cdaa2 2196 def _extract_mix(self, playlist_id):
99209c29 2197 # The mixes are generated from a single video
652cdaa2 2198 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2199 ids = []
2200 last_id = playlist_id[-11:]
2201 for n in itertools.count(1):
2202 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2203 webpage = self._download_webpage(
2204 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2205 new_ids = orderedSet(re.findall(
2206 r'''(?xs)data-video-username=".*?".*?
2207 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2208 webpage))
2209 # Fetch new pages until all the videos are repeated, it seems that
2210 # there are always 51 unique videos.
2211 new_ids = [_id for _id in new_ids if _id not in ids]
2212 if not new_ids:
2213 break
2214 ids.extend(new_ids)
2215 last_id = ids[-1]
2216
2217 url_results = self._ids_to_results(ids)
2218
bc2f773b 2219 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2220 title_span = (
2221 search_title('playlist-title') or
2222 search_title('title long-title') or
2223 search_title('title'))
76d1700b 2224 title = clean_html(title_span)
652cdaa2
JMF
2225
2226 return self.playlist_result(url_results, playlist_id, title)
2227
448830ce 2228 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2229 url = self._TEMPLATE_URL % playlist_id
2230 page = self._download_webpage(url, playlist_id)
dbb94fb0 2231
8bc0800d
G
2232 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2233 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2234 match = match.strip()
2235 # Check if the playlist exists or is private
4201ba13
S
2236 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2237 if mobj:
2238 reason = mobj.group('reason')
2239 message = 'This playlist %s' % reason
2240 if 'private' in reason:
2241 message += ', use --username or --netrc to access it'
2242 message += '.'
2243 raise ExtractorError(message, expected=True)
39b62db1
YCH
2244 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2245 raise ExtractorError(
2246 'Invalid parameters. Maybe URL is incorrect.',
2247 expected=True)
2248 elif re.match(r'[^<]*Choose your language[^<]*', match):
2249 continue
2250 else:
2251 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2252
dbb94fb0 2253 playlist_title = self._html_search_regex(
63b4295d 2254 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2255 page, 'title', default=None)
c5e8d7af 2256
dacb3a86
S
2257 has_videos = True
2258
2259 if not playlist_title:
2260 try:
2261 # Some playlist URLs don't actually serve a playlist (e.g.
2262 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2263 next(self._entries(page, playlist_id))
2264 except StopIteration:
2265 has_videos = False
2266
2267 return has_videos, self.playlist_result(
2268 self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 2269
ebf1b291 2270 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2271 # Check if it's a video-specific URL
2272 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2273 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2274 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2275 'video id', default=None)
2276 if video_id:
448830ce
S
2277 if self._downloader.params.get('noplaylist'):
2278 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2279 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2280 else:
2281 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2282 return video_id, None
2283 return None, None
448830ce 2284
ebf1b291
S
2285 def _real_extract(self, url):
2286 # Extract playlist id
2287 mobj = re.match(self._VALID_URL, url)
2288 if mobj is None:
2289 raise ExtractorError('Invalid URL: %s' % url)
2290 playlist_id = mobj.group(1) or mobj.group(2)
2291
dacb3a86 2292 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2293 if video:
2294 return video
2295
466a6145 2296 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2297 # Mixes require a custom extraction process
2298 return self._extract_mix(playlist_id)
2299
dacb3a86
S
2300 has_videos, playlist = self._extract_playlist(playlist_id)
2301 if has_videos or not video_id:
2302 return playlist
2303
2304 # Some playlist URLs don't actually serve a playlist (see
2305 # https://github.com/rg3/youtube-dl/issues/10537).
2306 # Fallback to plain video extraction if there is a video id
2307 # along with playlist id.
2308 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2309
c5e8d7af 2310
648e6a1f 2311class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2312 IE_DESC = 'YouTube.com channels'
9ff67727 2313 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2314 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2315 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2316 IE_NAME = 'youtube:channel'
cdc628a4
PH
2317 _TESTS = [{
2318 'note': 'paginated channel',
2319 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2320 'playlist_mincount': 91,
acf757f4 2321 'info_dict': {
9170ca5b
JMF
2322 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2323 'title': 'Uploads from lex will',
acf757f4 2324 }
5c43afd4
JMF
2325 }, {
2326 'note': 'Age restricted channel',
2327 # from https://www.youtube.com/user/DeusExOfficial
2328 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2329 'playlist_mincount': 64,
2330 'info_dict': {
2331 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2332 'title': 'Uploads from Deus Ex',
2333 },
cdc628a4 2334 }]
c5e8d7af 2335
e462474e
S
2336 @classmethod
2337 def suitable(cls, url):
f07e276a
S
2338 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2339 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2340
9558dcec
S
2341 def _build_template_url(self, url, channel_id):
2342 return self._TEMPLATE_URL % channel_id
2343
c5e8d7af 2344 def _real_extract(self, url):
9ff67727 2345 channel_id = self._match_id(url)
c5e8d7af 2346
9558dcec 2347 url = self._build_template_url(url, channel_id)
386bdfa6
S
2348
2349 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2350 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2351 # otherwise fallback on channel by page extraction
2352 channel_page = self._download_webpage(
2353 url + '?view=57', channel_id,
2354 'Downloading channel page', fatal=False)
2b3c2546
PH
2355 if channel_page is False:
2356 channel_playlist_id = False
2357 else:
2358 channel_playlist_id = self._html_search_meta(
2359 'channelId', channel_page, 'channel id', default=None)
2360 if not channel_playlist_id:
73c4ac2c
S
2361 channel_url = self._html_search_meta(
2362 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2363 channel_page, 'channel url', default=None)
2364 if channel_url:
2365 channel_playlist_id = self._search_regex(
2366 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2367 channel_url, 'channel id', default=None)
386bdfa6
S
2368 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2369 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2370 return self.url_result(
2371 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2372
60bf45c8 2373 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2374 autogenerated = re.search(r'''(?x)
2375 class="[^"]*?(?:
2376 channel-header-autogenerated-label|
2377 yt-channel-title-autogenerated
2378 )[^"]*"''', channel_page) is not None
c5e8d7af 2379
b9643eed
JMF
2380 if autogenerated:
2381 # The videos are contained in a single page
2382 # the ajax pages can't be used, they are empty
b82f815f 2383 entries = [
fb69240c
S
2384 self.url_result(
2385 video_id, 'Youtube', video_id=video_id,
2386 video_title=video_title)
8f02ad4f 2387 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2388 return self.playlist_result(entries, channel_id)
2389
73c4ac2c
S
2390 try:
2391 next(self._entries(channel_page, channel_id))
2392 except StopIteration:
2393 alert_message = self._html_search_regex(
2394 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2395 channel_page, 'alert', default=None, group='alert')
2396 if alert_message:
2397 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2398
648e6a1f 2399 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2400
2401
eb0f3e7e 2402class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2403 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
9558dcec
S
2404 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2405 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2406 IE_NAME = 'youtube:user'
c5e8d7af 2407
cdc628a4
PH
2408 _TESTS = [{
2409 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2410 'playlist_mincount': 320,
2411 'info_dict': {
73c4ac2c
S
2412 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2413 'title': 'Uploads from The Linux Foundation',
cdc628a4 2414 }
9558dcec
S
2415 }, {
2416 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2417 # but not https://www.youtube.com/user/12minuteathlete/videos
2418 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2419 'playlist_mincount': 249,
2420 'info_dict': {
2421 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2422 'title': 'Uploads from 12 Minute Athlete',
2423 }
cdc628a4
PH
2424 }, {
2425 'url': 'ytuser:phihag',
2426 'only_matching': True,
daa0df9e
YCH
2427 }, {
2428 'url': 'https://www.youtube.com/c/gametrailers',
2429 'only_matching': True,
9558dcec
S
2430 }, {
2431 'url': 'https://www.youtube.com/gametrailers',
2432 'only_matching': True,
73c4ac2c 2433 }, {
0e879f43 2434 # This channel is not available, geo restricted to JP
73c4ac2c
S
2435 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2436 'only_matching': True,
cdc628a4
PH
2437 }]
2438
e3ea4790 2439 @classmethod
f4b05232 2440 def suitable(cls, url):
e3ea4790
JMF
2441 # Don't return True if the url can be extracted with other youtube
2442 # extractor, the regex would is too permissive and it would match.
f3a58d46 2443 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2444 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2445 return False
2446 else:
2447 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2448
9558dcec
S
2449 def _build_template_url(self, url, channel_id):
2450 mobj = re.match(self._VALID_URL, url)
2451 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2452
b05654f0 2453
f07e276a
S
2454class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2455 IE_DESC = 'YouTube.com live streams'
073d5bf5 2456 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2457 IE_NAME = 'youtube:live'
2458
2459 _TESTS = [{
2d3d2997 2460 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2461 'info_dict': {
2462 'id': 'a48o2S1cPoo',
2463 'ext': 'mp4',
2464 'title': 'The Young Turks - Live Main Show',
2465 'uploader': 'The Young Turks',
2466 'uploader_id': 'TheYoungTurks',
ec85ded8 2467 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2468 'upload_date': '20150715',
2469 'license': 'Standard YouTube License',
2470 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2471 'categories': ['News & Politics'],
2472 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2473 'like_count': int,
2474 'dislike_count': int,
2475 },
2476 'params': {
2477 'skip_download': True,
2478 },
2479 }, {
2d3d2997 2480 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2481 'only_matching': True,
c1b2a085
S
2482 }, {
2483 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2484 'only_matching': True,
073d5bf5
S
2485 }, {
2486 'url': 'https://www.youtube.com/TheYoungTurks/live',
2487 'only_matching': True,
f07e276a
S
2488 }]
2489
2490 def _real_extract(self, url):
2491 mobj = re.match(self._VALID_URL, url)
2492 channel_id = mobj.group('id')
2493 base_url = mobj.group('base_url')
2494 webpage = self._download_webpage(url, channel_id, fatal=False)
2495 if webpage:
2496 page_type = self._og_search_property(
2497 'type', webpage, 'page type', default=None)
2498 video_id = self._html_search_meta(
2499 'videoId', webpage, 'video id', default=None)
2500 if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2501 return self.url_result(video_id, YoutubeIE.ie_key())
2502 return self.url_result(base_url)
2503
2504
e462474e
S
2505class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2506 IE_DESC = 'YouTube.com user/channel playlists'
2507 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2508 IE_NAME = 'youtube:playlists'
0c148415 2509
e568c223 2510 _TESTS = [{
2d3d2997 2511 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2512 'playlist_mincount': 4,
2513 'info_dict': {
2514 'id': 'ThirstForScience',
2515 'title': 'Thirst for Science',
2516 },
e568c223
S
2517 }, {
2518 # with "Load more" button
2d3d2997 2519 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2520 'playlist_mincount': 70,
2521 'info_dict': {
2522 'id': 'igorkle1',
2523 'title': 'Игорь Клейнер',
2524 },
e462474e
S
2525 }, {
2526 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2527 'playlist_mincount': 17,
2528 'info_dict': {
2529 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2530 'title': 'Chem Player',
2531 },
e568c223 2532 }]
0c148415
S
2533
2534
b4c08069 2535class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2536 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2537 # there doesn't appear to be a real limit, for example if you search for
2538 # 'python' you get more than 8.000.000 results
2539 _MAX_RESULTS = float('inf')
78caa52a 2540 IE_NAME = 'youtube:search'
b05654f0 2541 _SEARCH_KEY = 'ytsearch'
b4c08069 2542 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2543 _TESTS = []
b05654f0 2544
b05654f0
PH
2545 def _get_n_results(self, query, n):
2546 """Get a specified number of results for a query"""
2547
b4c08069 2548 videos = []
b05654f0
PH
2549 limit = n
2550
a22b2fd1
YCH
2551 url_query = {
2552 'search_query': query.encode('utf-8'),
2553 }
2554 url_query.update(self._EXTRA_QUERY_ARGS)
2555 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2556
b4c08069 2557 for pagenum in itertools.count(1):
b4c08069 2558 data = self._download_json(
69ea8ca4 2559 result_url, video_id='query "%s"' % query,
b4c08069 2560 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2561 errnote='Unable to download API page',
2562 query={'spf': 'navigate'})
b4c08069 2563 html_content = data[1]['body']['content']
7cc3570e 2564
b4c08069 2565 if 'class="search-message' in html_content:
07ad22b8 2566 raise ExtractorError(
78caa52a 2567 '[youtube] No video results', expected=True)
b05654f0 2568
b4c08069
JMF
2569 new_videos = self._ids_to_results(orderedSet(re.findall(
2570 r'href="/watch\?v=(.{11})', html_content)))
2571 videos += new_videos
2572 if not new_videos or len(videos) > limit:
2573 break
a22b2fd1
YCH
2574 next_link = self._html_search_regex(
2575 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2576 html_content, 'next link', default=None)
2577 if next_link is None:
2578 break
2579 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2580
b4c08069
JMF
2581 if len(videos) > n:
2582 videos = videos[:n]
b05654f0 2583 return self.playlist_result(videos, query)
75dff0ee 2584
c9ae7b95 2585
a3dd9248 2586class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2587 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2588 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2589 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2590 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2591
c9ae7b95 2592
175c2e9e 2593class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
78caa52a
PH
2594 IE_DESC = 'YouTube.com search URLs'
2595 IE_NAME = 'youtube:search_url'
d2c1f79f 2596 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
175c2e9e 2597 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
cdc628a4
PH
2598 _TESTS = [{
2599 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2600 'playlist_mincount': 5,
2601 'info_dict': {
2602 'title': 'youtube-dl test video',
2603 }
d2c1f79f
S
2604 }, {
2605 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2606 'only_matching': True,
cdc628a4 2607 }]
c9ae7b95
PH
2608
2609 def _real_extract(self, url):
2610 mobj = re.match(self._VALID_URL, url)
7fd002c0 2611 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2612 webpage = self._download_webpage(url, query)
175c2e9e 2613 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2614
2615
136dadde 2616class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2617 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2618 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2619 IE_NAME = 'youtube:show'
cdc628a4 2620 _TESTS = [{
4003bd82 2621 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2622 'playlist_mincount': 5,
cdc628a4
PH
2623 'info_dict': {
2624 'id': 'airdisasters',
2625 'title': 'Air Disasters',
2626 }
2627 }]
75dff0ee
JMF
2628
2629 def _real_extract(self, url):
136dadde
S
2630 playlist_id = self._match_id(url)
2631 return super(YoutubeShowIE, self)._real_extract(
2632 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2633
2634
b2e8bc1b 2635class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2636 """
25f14e9f 2637 Base class for feed extractors
d7ae0639
JMF
2638 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2639 """
b2e8bc1b 2640 _LOGIN_REQUIRED = True
d7ae0639
JMF
2641
2642 @property
2643 def IE_NAME(self):
78caa52a 2644 return 'youtube:%s' % self._FEED_NAME
04cc9617 2645
81f0259b 2646 def _real_initialize(self):
b2e8bc1b 2647 self._login()
81f0259b 2648
04cc9617 2649 def _real_extract(self, url):
25f14e9f
S
2650 page = self._download_webpage(
2651 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2652
2653 # The extraction process is the same as for playlists, but the regex
2654 # for the video ids doesn't contain an index
2655 ids = []
2656 more_widget_html = content_html = page
2bc43303
JMF
2657 for page_num in itertools.count(1):
2658 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2659
2660 # 'recommended' feed has infinite 'load more' and each new portion spins
2661 # the same videos in (sometimes) slightly different order, so we'll check
2662 # for unicity and break when portion has no new videos
2663 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2664 if not new_ids:
2665 break
2666
2bc43303
JMF
2667 ids.extend(new_ids)
2668
2669 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2670 if not mobj:
2671 break
2672
2673 more = self._download_json(
25f14e9f 2674 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2675 'Downloading page #%s' % page_num,
2676 transform_source=uppercase_escape)
2677 content_html = more['content_html']
2678 more_widget_html = more['load_more_widget_html']
2679
25f14e9f
S
2680 return self.playlist_result(
2681 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2682
2683
2684class YoutubeWatchLaterIE(YoutubePlaylistIE):
2685 IE_NAME = 'youtube:watchlater'
2686 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2687 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2688
bc7a9cd8
S
2689 _TESTS = [{
2690 'url': 'https://www.youtube.com/playlist?list=WL',
2691 'only_matching': True,
2692 }, {
2693 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2694 'only_matching': True,
2695 }]
25f14e9f
S
2696
2697 def _real_extract(self, url):
7e5dc339 2698 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2699 if video:
2700 return video
dacb3a86
S
2701 _, playlist = self._extract_playlist('WL')
2702 return playlist
f459d170 2703
5f6a1245 2704
c626a3d9 2705class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2706 IE_NAME = 'youtube:favorites'
f3a34072 2707 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2708 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2709 _LOGIN_REQUIRED = True
2710
2711 def _real_extract(self, url):
2712 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2713 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2714 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2715
2716
25f14e9f
S
2717class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2718 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2719 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2720 _FEED_NAME = 'recommended'
2721 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2722
1ed5b5c9 2723
25f14e9f
S
2724class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2725 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2726 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2727 _FEED_NAME = 'subscriptions'
2728 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2729
1ed5b5c9 2730
25f14e9f
S
2731class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2732 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2733 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2734 _FEED_NAME = 'history'
2735 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2736
2737
15870e90
PH
2738class YoutubeTruncatedURLIE(InfoExtractor):
2739 IE_NAME = 'youtube:truncated_url'
2740 IE_DESC = False # Do not list
975d35db 2741 _VALID_URL = r'''(?x)
b95aab84
PH
2742 (?:https?://)?
2743 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2744 (?:watch\?(?:
c4808c60 2745 feature=[a-z_]+|
b95aab84
PH
2746 annotation_id=annotation_[^&]+|
2747 x-yt-cl=[0-9]+|
c1708b89 2748 hl=[^&]*|
287be8c6 2749 t=[0-9]+
b95aab84
PH
2750 )?
2751 |
2752 attribution_link\?a=[^&]+
2753 )
2754 $
975d35db 2755 '''
15870e90 2756
c4808c60 2757 _TESTS = [{
2d3d2997 2758 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2759 'only_matching': True,
dc2fc736 2760 }, {
2d3d2997 2761 'url': 'https://www.youtube.com/watch?',
dc2fc736 2762 'only_matching': True,
b95aab84
PH
2763 }, {
2764 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2765 'only_matching': True,
2766 }, {
2767 'url': 'https://www.youtube.com/watch?feature=foo',
2768 'only_matching': True,
c1708b89
PH
2769 }, {
2770 'url': 'https://www.youtube.com/watch?hl=en-GB',
2771 'only_matching': True,
287be8c6
PH
2772 }, {
2773 'url': 'https://www.youtube.com/watch?t=2372',
2774 'only_matching': True,
c4808c60
PH
2775 }]
2776
15870e90
PH
2777 def _real_extract(self, url):
2778 raise ExtractorError(
78caa52a
PH
2779 'Did you forget to quote the URL? Remember that & is a meta '
2780 'character in most shells, so you want to put the URL in quotes, '
2781 'like youtube-dl '
2d3d2997 2782 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2783 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2784 expected=True)
772fd5cc
PH
2785
2786
2787class YoutubeTruncatedIDIE(InfoExtractor):
2788 IE_NAME = 'youtube:truncated_id'
2789 IE_DESC = False # Do not list
b95aab84 2790 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2791
2792 _TESTS = [{
2793 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2794 'only_matching': True,
2795 }]
2796
2797 def _real_extract(self, url):
2798 video_id = self._match_id(url)
2799 raise ExtractorError(
2800 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2801 expected=True)