]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Allow empty attribute values in description regex
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
c5e8d7af 19 compat_parse_qs,
7fd002c0
S
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
c5e8d7af 25 compat_str,
4bb4a188
PH
26)
27from ..utils import (
c5e8d7af 28 clean_html,
9b9c5355 29 error_to_compat_str,
c5e8d7af 30 ExtractorError,
2d30521a 31 float_or_none,
4bb4a188
PH
32 get_element_by_attribute,
33 get_element_by_id,
dd27fd17 34 int_or_none,
94278f72 35 mimetype2ext,
4bb4a188 36 orderedSet,
7c80519c 37 parse_duration,
0cb58b02 38 remove_quotes,
041bc3ad 39 remove_start,
5c2266df 40 sanitized_Request,
cf7e015f 41 smuggle_url,
c93d53f5 42 str_to_int,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
81c2f20b 46 uppercase_escape,
6e6bc8da 47 urlencode_postdata,
af214c3a 48 ISO3166Utils,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
b2e8bc1b
JMF
56 _NETRC_MACHINE = 'youtube'
57 # If True it will raise an error if no login info is provided
58 _LOGIN_REQUIRED = False
59
b2e8bc1b 60 def _set_language(self):
810fb84d
PH
61 self._set_cookie(
62 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 63 # YouTube sets the expire time to about two months
810fb84d 64 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 65
25f14e9f
S
66 def _ids_to_results(self, ids):
67 return [
68 self.url_result(vid_id, 'Youtube', video_id=vid_id)
69 for vid_id in ids]
70
b2e8bc1b 71 def _login(self):
83317f69 72 """
73 Attempt to log in to YouTube.
74 True is returned if successful or skipped.
75 False is returned if login failed.
76
77 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
78 """
b2e8bc1b
JMF
79 (username, password) = self._get_login_info()
80 # No authentication to be performed
81 if username is None:
82 if self._LOGIN_REQUIRED:
69ea8ca4 83 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 84 return True
b2e8bc1b 85
7cc3570e
PH
86 login_page = self._download_webpage(
87 self._LOGIN_URL, None,
69ea8ca4
PH
88 note='Downloading login page',
89 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
90 if login_page is False:
91 return
b2e8bc1b 92
795f28f8 93 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
78caa52a 94 login_page, 'Login GALX parameter')
c5e8d7af 95
b2e8bc1b
JMF
96 # Log in
97 login_form_strs = {
8bcc8756
JW
98 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
99 'Email': username,
100 'GALX': galx,
101 'Passwd': password,
102
103 'PersistentCookie': 'yes',
104 '_utf8': '霱',
105 'bgresponse': 'js_disabled',
106 'checkConnection': '',
107 'checkedDomains': 'youtube',
108 'dnConn': '',
109 'pstMsg': '0',
110 'rmShown': '1',
111 'secTok': '',
112 'signIn': 'Sign in',
113 'timeStmp': '',
114 'service': 'youtube',
115 'uilel': '3',
116 'hl': 'en_US',
b2e8bc1b 117 }
83317f69 118
6e6bc8da 119 login_data = urlencode_postdata(login_form_strs)
7cc3570e 120
5c2266df 121 req = sanitized_Request(self._LOGIN_URL, login_data)
7cc3570e
PH
122 login_results = self._download_webpage(
123 req, None,
69ea8ca4 124 note='Logging in', errnote='unable to log in', fatal=False)
7cc3570e
PH
125 if login_results is False:
126 return False
83317f69 127
494ab6db
S
128 error_msg = self._html_search_regex(
129 r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
130 login_results, 'error message', default=None)
131 if error_msg:
132 raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
133
83317f69 134 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 135 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 136
137 # Two-Factor
138 # TODO add SMS and phone call support - these require making a request and then prompting the user
139
9303ce3e 140 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
041bc3ad 141 tfa_code = self._get_tfa_info('2-step verification code')
83317f69 142
041bc3ad
S
143 if not tfa_code:
144 self._downloader.report_warning(
145 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
146 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 147 return False
148
041bc3ad
S
149 tfa_code = remove_start(tfa_code, 'G-')
150
151 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
152
153 tfa_form_strs.update({
9303ce3e 154 'Pin': tfa_code,
155 'TrustDevice': 'on',
041bc3ad
S
156 })
157
6e6bc8da 158 tfa_data = urlencode_postdata(tfa_form_strs)
83317f69 159
5c2266df 160 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
83317f69 161 tfa_results = self._download_webpage(
162 tfa_req, None,
69ea8ca4 163 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 164
165 if tfa_results is False:
166 return False
167
9303ce3e 168 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
041bc3ad 169 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
83317f69 170 return False
171 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 172 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 173 return False
174 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 175 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 176 return False
177
7cc3570e 178 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
69ea8ca4 179 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
180 return False
181 return True
182
b2e8bc1b
JMF
183 def _real_initialize(self):
184 if self._downloader is None:
185 return
42939b61 186 self._set_language()
b2e8bc1b
JMF
187 if not self._login():
188 return
c5e8d7af 189
8377574c 190
8e7aad20 191class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 192 # Extract entries from page with "Load more" button
648e6a1f
S
193 def _entries(self, page, playlist_id):
194 more_widget_html = content_html = page
195 for page_num in itertools.count(1):
061a75ed
S
196 for entry in self._process_page(content_html):
197 yield entry
648e6a1f
S
198
199 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
200 if not mobj:
201 break
202
203 more = self._download_json(
204 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
205 'Downloading page #%s' % page_num,
206 transform_source=uppercase_escape)
207 content_html = more['content_html']
208 if not content_html.strip():
209 # Some webpages show a "Load more" button but they don't
210 # have more videos
211 break
212 more_widget_html = more['load_more_widget_html']
213
061a75ed
S
214
215class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
216 def _process_page(self, content):
217 for video_id, video_title in self.extract_videos_from_page(content):
218 yield self.url_result(video_id, 'Youtube', video_id, video_title)
219
648e6a1f
S
220 def extract_videos_from_page(self, page):
221 ids_in_page = []
222 titles_in_page = []
223 for mobj in re.finditer(self._VIDEO_RE, page):
224 # The link with index 0 is not the first video of the playlist (not sure if still actual)
225 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
226 continue
227 video_id = mobj.group('id')
228 video_title = unescapeHTML(mobj.group('title'))
229 if video_title:
230 video_title = video_title.strip()
231 try:
232 idx = ids_in_page.index(video_id)
233 if video_title and not titles_in_page[idx]:
234 titles_in_page[idx] = video_title
235 except ValueError:
236 ids_in_page.append(video_id)
237 titles_in_page.append(video_title)
238 return zip(ids_in_page, titles_in_page)
239
240
061a75ed
S
241class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
242 def _process_page(self, content):
6dee688e
S
243 for playlist_id in orderedSet(re.findall(
244 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
245 content)):
061a75ed
S
246 yield self.url_result(
247 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
248
0c148415
S
249 def _real_extract(self, url):
250 playlist_id = self._match_id(url)
251 webpage = self._download_webpage(url, playlist_id)
0c148415 252 title = self._og_search_title(webpage, fatal=False)
061a75ed 253 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
254
255
360e1ca5 256class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 257 IE_DESC = 'YouTube.com'
cb7dfeea 258 _VALID_URL = r"""(?x)^
c5e8d7af 259 (
edb53e2d 260 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 261 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 262 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 263 (?:www\.)?pwnyoutube\.com/|
f7000f3a 264 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
265 tube\.majestyc\.net/|
266 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
267 (?:.*?\#/)? # handle anchor (#/) redirect urls
268 (?: # the various things that can precede the ID:
ac7553d0 269 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 270 |(?: # or the v= param in all its forms
f7000f3a 271 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 272 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 273 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
274 v=
275 )
f4b05232 276 ))
cbaed4bb
S
277 |(?:
278 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
279 vid\.plus| # or vid.plus/xxxx
280 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 281 )/
edb53e2d 282 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 283 )
c5e8d7af 284 )? # all until now is optional -> you can pass the naked ID
8963d9c2 285 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
9291475f 286 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
287 (?(1).+)? # if we found the ID, everything can follow
288 $"""
c5e8d7af 289 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 290 _formats = {
c2d3cb4c 291 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
292 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
293 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
294 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
295 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
296 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
297 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
298 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 299 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 300 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
301 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
302 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
303 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
304 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
305 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 306 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 307 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
308 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 309
310
311 # 3D videos
c2d3cb4c 312 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
313 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
314 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
315 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 316 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
317 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
318 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 319
96fb5605 320 # Apple HTTP Live Streaming
11f12195 321 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 322 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
323 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
324 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
325 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
326 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 327 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
328 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
329
330 # DASH mp4 video
c2d3cb4c 331 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
332 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
333 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
334 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
335 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
336 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
337 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
a6c2c244
YCH
338 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
339 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
340 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
341 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
836a086c 342
f6f1fc92 343 # Dash mp4 audio
c2d3cb4c 344 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
345 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
346 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
347
348 # Dash webm
a6c2c244
YCH
349 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
350 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
351 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
352 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
353 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
354 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
355 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
356 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
357 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
358 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
359 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
360 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
361 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
362 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
363 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
4c6b4764 364 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
a6c2c244
YCH
365 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
366 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
367 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
368 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
369 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
370 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
2c62dc26
PH
371
372 # Dash webm audio
a6c2c244
YCH
373 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
374 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 375
0857baad 376 # Dash webm audio with opus inside
a6c2c244
YCH
377 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
378 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
379 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
0857baad 380
ce6b9a2d
PH
381 # RTMP (unnamed)
382 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 383 }
23d17e4b 384 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 385
78caa52a 386 IE_NAME = 'youtube'
2eb88d95
PH
387 _TESTS = [
388 {
b67d6314 389 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
390 'info_dict': {
391 'id': 'BaW_jenozKc',
392 'ext': 'mp4',
393 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
394 'uploader': 'Philipp Hagemeister',
395 'uploader_id': 'phihag',
fd050249 396 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 397 'upload_date': '20121002',
7caf9830 398 'license': 'Standard YouTube License',
4bc3a23e
PH
399 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
400 'categories': ['Science & Technology'],
000b6b5a 401 'tags': ['youtube-dl'],
3e7c1224
PH
402 'like_count': int,
403 'dislike_count': int,
7c80519c 404 'start_time': 1,
297a564b 405 'end_time': 9,
2eb88d95 406 }
0e853ca4 407 },
0e853ca4 408 {
4bc3a23e
PH
409 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
410 'note': 'Test generic use_cipher_signature video (#897)',
411 'info_dict': {
412 'id': 'UxxajLWwzqY',
413 'ext': 'mp4',
414 'upload_date': '20120506',
415 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 416 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 417 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
418 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
419 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
420 'iconic ep', 'iconic', 'love', 'it'],
4bc3a23e
PH
421 'uploader': 'Icona Pop',
422 'uploader_id': 'IconaPop',
fd050249 423 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 424 'license': 'Standard YouTube License',
0cb58b02 425 'creator': 'Icona Pop',
2eb88d95 426 }
c108eb73
JMF
427 },
428 {
4bc3a23e
PH
429 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
430 'note': 'Test VEVO video with age protection (#956)',
431 'info_dict': {
432 'id': '07FYdnEawAQ',
433 'ext': 'mp4',
434 'upload_date': '20130703',
435 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 436 'alt_title': 'Tunnel Vision',
4bc3a23e
PH
437 'description': 'md5:64249768eec3bc4276236606ea996373',
438 'uploader': 'justintimberlakeVEVO',
439 'uploader_id': 'justintimberlakeVEVO',
fd050249 440 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 441 'license': 'Standard YouTube License',
0cb58b02 442 'creator': 'Justin Timberlake',
34952f09 443 'age_limit': 18,
c108eb73
JMF
444 }
445 },
fccd3771 446 {
4bc3a23e
PH
447 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
448 'note': 'Embed-only video (#1746)',
449 'info_dict': {
450 'id': 'yZIXLfi8CZQ',
451 'ext': 'mp4',
452 'upload_date': '20120608',
453 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
454 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
455 'uploader': 'SET India',
94bfcd23 456 'uploader_id': 'setindia',
fd050249 457 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 458 'license': 'Standard YouTube License',
94bfcd23 459 'age_limit': 18,
fccd3771
PH
460 }
461 },
11b56058 462 {
b67d6314 463 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
464 'note': 'Use the first video ID in the URL',
465 'info_dict': {
466 'id': 'BaW_jenozKc',
467 'ext': 'mp4',
468 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
469 'uploader': 'Philipp Hagemeister',
470 'uploader_id': 'phihag',
fd050249 471 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 472 'upload_date': '20121002',
7caf9830 473 'license': 'Standard YouTube License',
11b56058
PM
474 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
475 'categories': ['Science & Technology'],
476 'tags': ['youtube-dl'],
477 'like_count': int,
478 'dislike_count': int,
34a7de29
S
479 },
480 'params': {
481 'skip_download': True,
482 },
11b56058 483 },
dd27fd17 484 {
4bc3a23e
PH
485 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
486 'note': '256k DASH audio (format 141) via DASH manifest',
487 'info_dict': {
488 'id': 'a9LDPn-MO4I',
489 'ext': 'm4a',
490 'upload_date': '20121002',
491 'uploader_id': '8KVIDEO',
fd050249 492 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
493 'description': '',
494 'uploader': '8KVIDEO',
7caf9830 495 'license': 'Standard YouTube License',
4bc3a23e 496 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 497 },
4bc3a23e
PH
498 'params': {
499 'youtube_include_dash_manifest': True,
500 'format': '141',
4919603f 501 },
dd27fd17 502 },
3489b7d2
JMF
503 # DASH manifest with encrypted signature
504 {
78caa52a
PH
505 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
506 'info_dict': {
507 'id': 'IB3lcPjvWLA',
508 'ext': 'm4a',
b766eb27
JMF
509 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
510 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
78caa52a
PH
511 'uploader': 'AfrojackVEVO',
512 'uploader_id': 'AfrojackVEVO',
513 'upload_date': '20131011',
7caf9830 514 'license': 'Standard YouTube License',
3489b7d2 515 },
4bc3a23e 516 'params': {
78caa52a
PH
517 'youtube_include_dash_manifest': True,
518 'format': '141',
3489b7d2
JMF
519 },
520 },
aaeb86f6
S
521 # JS player signature function name containing $
522 {
523 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
524 'info_dict': {
525 'id': 'nfWlot6h_JM',
526 'ext': 'm4a',
527 'title': 'Taylor Swift - Shake It Off',
0cb58b02 528 'alt_title': 'Shake It Off',
f57b7835 529 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
aaeb86f6
S
530 'uploader': 'TaylorSwiftVEVO',
531 'uploader_id': 'TaylorSwiftVEVO',
532 'upload_date': '20140818',
7caf9830 533 'license': 'Standard YouTube License',
0cb58b02 534 'creator': 'Taylor Swift',
aaeb86f6
S
535 },
536 'params': {
537 'youtube_include_dash_manifest': True,
538 'format': '141',
539 },
540 },
aa79ac0c
PH
541 # Controversy video
542 {
543 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
544 'info_dict': {
545 'id': 'T4XJQO3qol8',
546 'ext': 'mp4',
547 'upload_date': '20100909',
548 'uploader': 'The Amazing Atheist',
549 'uploader_id': 'TheAmazingAtheist',
fd050249 550 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 551 'license': 'Standard YouTube License',
aa79ac0c
PH
552 'title': 'Burning Everyone\'s Koran',
553 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
554 }
c522adb1
JMF
555 },
556 # Normal age-gate video (No vevo, embed allowed)
557 {
558 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
559 'info_dict': {
560 'id': 'HtVdAasjOgU',
561 'ext': 'mp4',
562 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
9ed99402 563 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
c522adb1
JMF
564 'uploader': 'The Witcher',
565 'uploader_id': 'WitcherGame',
fd050249 566 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 567 'upload_date': '20140605',
7caf9830 568 'license': 'Standard YouTube License',
34952f09 569 'age_limit': 18,
c522adb1
JMF
570 },
571 },
fccae2b9
S
572 # Age-gate video with encrypted signature
573 {
574 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
575 'info_dict': {
576 'id': '6kLq3WMV1nU',
577 'ext': 'mp4',
578 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
579 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
580 'uploader': 'LloydVEVO',
581 'uploader_id': 'LloydVEVO',
fd050249 582 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 583 'upload_date': '20110629',
7caf9830 584 'license': 'Standard YouTube License',
34952f09 585 'age_limit': 18,
fccae2b9
S
586 },
587 },
774e208f
PH
588 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
589 {
590 'url': '__2ABJjxzNo',
591 'info_dict': {
592 'id': '__2ABJjxzNo',
593 'ext': 'mp4',
594 'upload_date': '20100430',
595 'uploader_id': 'deadmau5',
fd050249 596 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 597 'creator': 'deadmau5',
774e208f
PH
598 'description': 'md5:12c56784b8032162bb936a5f76d55360',
599 'uploader': 'deadmau5',
7caf9830 600 'license': 'Standard YouTube License',
774e208f 601 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 602 'alt_title': 'Some Chords',
774e208f
PH
603 },
604 'expected_warnings': [
605 'DASH manifest missing',
606 ]
e52a40ab
PH
607 },
608 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
609 {
610 'url': 'lqQg6PlCWgI',
611 'info_dict': {
612 'id': 'lqQg6PlCWgI',
613 'ext': 'mp4',
90227264 614 'upload_date': '20150827',
cbe2bd91 615 'uploader_id': 'olympic',
fd050249 616 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 617 'license': 'Standard YouTube License',
cbe2bd91
PH
618 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
619 'uploader': 'Olympics',
620 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
621 },
622 'params': {
623 'skip_download': 'requires avconv',
e52a40ab 624 }
cbe2bd91 625 },
6271f1ca
PH
626 # Non-square pixels
627 {
628 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
629 'info_dict': {
630 'id': '_b-2C3KPAM0',
631 'ext': 'mp4',
632 'stretched_ratio': 16 / 9.,
633 'upload_date': '20110310',
634 'uploader_id': 'AllenMeow',
fd050249 635 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
636 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
637 'uploader': '孫艾倫',
7caf9830 638 'license': 'Standard YouTube License',
6271f1ca
PH
639 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
640 },
06b491eb
S
641 },
642 # url_encoded_fmt_stream_map is empty string
643 {
644 'url': 'qEJwOuvDf7I',
645 'info_dict': {
646 'id': 'qEJwOuvDf7I',
f57b7835 647 'ext': 'webm',
06b491eb
S
648 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
649 'description': '',
650 'upload_date': '20150404',
651 'uploader_id': 'spbelect',
652 'uploader': 'Наблюдатели Петербурга',
653 },
654 'params': {
655 'skip_download': 'requires avconv',
e323cf3f
S
656 },
657 'skip': 'This live event has ended.',
06b491eb 658 },
da77d856
S
659 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
660 {
661 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
662 'info_dict': {
663 'id': 'FIl7x6_3R5Y',
664 'ext': 'mp4',
665 'title': 'md5:7b81415841e02ecd4313668cde88737a',
666 'description': 'md5:116377fd2963b81ec4ce64b542173306',
667 'upload_date': '20150625',
668 'uploader_id': 'dorappi2000',
fd050249 669 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 670 'uploader': 'dorappi2000',
7caf9830 671 'license': 'Standard YouTube License',
da77d856
S
672 'formats': 'mincount:33',
673 },
2ee8f5d8 674 },
8a1a26ce
YCH
675 # DASH manifest with segment_list
676 {
677 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
678 'md5': '8ce563a1d667b599d21064e982ab9e31',
679 'info_dict': {
680 'id': 'CsmdDsKjzN8',
681 'ext': 'mp4',
17ee98e1 682 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
683 'uploader': 'Airtek',
684 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
685 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 686 'license': 'Standard YouTube License',
8a1a26ce
YCH
687 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
688 },
689 'params': {
690 'youtube_include_dash_manifest': True,
691 'format': '135', # bestvideo
692 }
2ee8f5d8 693 },
cf7e015f
S
694 {
695 # Multifeed videos (multiple cameras), URL is for Main Camera
696 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
697 'info_dict': {
698 'id': 'jqWvoWXjCVs',
699 'title': 'teamPGP: Rocket League Noob Stream',
700 'description': 'md5:dc7872fb300e143831327f1bae3af010',
701 },
702 'playlist': [{
703 'info_dict': {
704 'id': 'jqWvoWXjCVs',
705 'ext': 'mp4',
706 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
707 'description': 'md5:dc7872fb300e143831327f1bae3af010',
708 'upload_date': '20150721',
709 'uploader': 'Beer Games Beer',
710 'uploader_id': 'beergamesbeer',
fd050249 711 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 712 'license': 'Standard YouTube License',
cf7e015f
S
713 },
714 }, {
715 'info_dict': {
716 'id': '6h8e8xoXJzg',
717 'ext': 'mp4',
718 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
719 'description': 'md5:dc7872fb300e143831327f1bae3af010',
720 'upload_date': '20150721',
721 'uploader': 'Beer Games Beer',
722 'uploader_id': 'beergamesbeer',
fd050249 723 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 724 'license': 'Standard YouTube License',
cf7e015f
S
725 },
726 }, {
727 'info_dict': {
728 'id': 'PUOgX5z9xZw',
729 'ext': 'mp4',
730 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
731 'description': 'md5:dc7872fb300e143831327f1bae3af010',
732 'upload_date': '20150721',
733 'uploader': 'Beer Games Beer',
734 'uploader_id': 'beergamesbeer',
fd050249 735 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 736 'license': 'Standard YouTube License',
cf7e015f
S
737 },
738 }, {
739 'info_dict': {
740 'id': 'teuwxikvS5k',
741 'ext': 'mp4',
742 'title': 'teamPGP: Rocket League Noob Stream (zim)',
743 'description': 'md5:dc7872fb300e143831327f1bae3af010',
744 'upload_date': '20150721',
745 'uploader': 'Beer Games Beer',
746 'uploader_id': 'beergamesbeer',
fd050249 747 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 748 'license': 'Standard YouTube License',
cf7e015f
S
749 },
750 }],
751 'params': {
752 'skip_download': True,
753 },
cbaed4bb 754 },
f9f49d87
S
755 {
756 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
757 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
758 'info_dict': {
759 'id': 'gVfLd0zydlo',
760 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
761 },
762 'playlist_count': 2,
763 },
cbaed4bb
S
764 {
765 'url': 'http://vid.plus/FlRa-iH7PGw',
766 'only_matching': True,
0e49d9a6 767 },
6d4fc66b
S
768 {
769 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
770 'only_matching': True,
771 },
0e49d9a6 772 {
61f92af1 773 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
774 # Also tests cut-off URL expansion in video description (see
775 # https://github.com/rg3/youtube-dl/issues/1892,
776 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
777 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
778 'info_dict': {
779 'id': 'lsguqyKfVQg',
780 'ext': 'mp4',
781 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 782 'alt_title': 'Dark Walk',
0e49d9a6
LL
783 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
784 'upload_date': '20151119',
785 'uploader_id': 'IronSoulElf',
fd050249 786 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 787 'uploader': 'IronSoulElf',
7caf9830 788 'license': 'Standard YouTube License',
0cb58b02 789 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
790 },
791 'params': {
792 'skip_download': True,
793 },
794 },
61f92af1
S
795 {
796 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
797 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
798 'only_matching': True,
799 },
313dfc45
LL
800 {
801 # Video with yt:stretch=17:0
802 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
803 'info_dict': {
804 'id': 'Q39EVAstoRM',
805 'ext': 'mp4',
806 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
807 'description': 'md5:ee18a25c350637c8faff806845bddee9',
808 'upload_date': '20151107',
809 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
810 'uploader': 'CH GAMER DROID',
811 },
812 'params': {
813 'skip_download': True,
814 },
815 },
7caf9830
S
816 {
817 # Video licensed under Creative Commons
818 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
819 'info_dict': {
820 'id': 'M4gD1WSo5mA',
821 'ext': 'mp4',
822 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
823 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
824 'upload_date': '20150127',
825 'uploader_id': 'BerkmanCenter',
fd050249 826 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
7caf9830
S
827 'uploader': 'BerkmanCenter',
828 'license': 'Creative Commons Attribution license (reuse allowed)',
829 },
830 'params': {
831 'skip_download': True,
832 },
833 },
fd050249
S
834 {
835 # Channel-like uploader_url
836 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
837 'info_dict': {
838 'id': 'eQcmzGIKrzg',
839 'ext': 'mp4',
840 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
841 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
842 'upload_date': '20151119',
843 'uploader': 'Bernie 2016',
844 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
845 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
846 'license': 'Creative Commons Attribution license (reuse allowed)',
847 },
848 'params': {
849 'skip_download': True,
850 },
851 },
040ac686
S
852 {
853 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
854 'only_matching': True,
855 }
2eb88d95
PH
856 ]
857
e0df6211
PH
858 def __init__(self, *args, **kwargs):
859 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 860 self._player_cache = {}
e0df6211 861
c5e8d7af
PH
862 def report_video_info_webpage_download(self, video_id):
863 """Report attempt to download video info webpage."""
69ea8ca4 864 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 865
c5e8d7af
PH
866 def report_information_extraction(self, video_id):
867 """Report attempt to extract video information."""
69ea8ca4 868 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
869
870 def report_unavailable_format(self, video_id, format):
871 """Report extracted video URL."""
69ea8ca4 872 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
873
874 def report_rtmp_download(self):
875 """Indicate the download will use the RTMP protocol."""
69ea8ca4 876 self.to_screen('RTMP download detected')
c5e8d7af 877
60064c53
PH
878 def _signature_cache_id(self, example_sig):
879 """ Return a string representation of a signature """
78caa52a 880 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
881
882 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 883 id_m = re.match(
50f84a9a 884 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
cf010131 885 player_url)
c081b35c
PH
886 if not id_m:
887 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
888 player_type = id_m.group('ext')
889 player_id = id_m.group('id')
890
c4417ddb 891 # Read from filesystem cache
60064c53
PH
892 func_id = '%s_%s_%s' % (
893 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 894 assert os.path.basename(func_id) == func_id
a0e07d31 895
69ea8ca4 896 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 897 if cache_spec is not None:
78caa52a 898 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 899
6d1a55a5
PH
900 download_note = (
901 'Downloading player %s' % player_url
902 if self._downloader.params.get('verbose') else
903 'Downloading %s player %s' % (player_type, player_id)
904 )
e0df6211
PH
905 if player_type == 'js':
906 code = self._download_webpage(
907 player_url, video_id,
6d1a55a5 908 note=download_note,
69ea8ca4 909 errnote='Download of %s failed' % player_url)
83799698 910 res = self._parse_sig_js(code)
c4417ddb 911 elif player_type == 'swf':
e0df6211
PH
912 urlh = self._request_webpage(
913 player_url, video_id,
6d1a55a5 914 note=download_note,
69ea8ca4 915 errnote='Download of %s failed' % player_url)
e0df6211 916 code = urlh.read()
83799698 917 res = self._parse_sig_swf(code)
e0df6211
PH
918 else:
919 assert False, 'Invalid player type %r' % player_type
920
785521bf
PH
921 test_string = ''.join(map(compat_chr, range(len(example_sig))))
922 cache_res = res(test_string)
923 cache_spec = [ord(c) for c in cache_res]
83799698 924
69ea8ca4 925 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
926 return res
927
60064c53 928 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
929 def gen_sig_code(idxs):
930 def _genslice(start, end, step):
78caa52a 931 starts = '' if start == 0 else str(start)
8bcc8756 932 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 933 steps = '' if step == 1 else (':%d' % step)
78caa52a 934 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
935
936 step = None
7af808a5
PH
937 # Quelch pyflakes warnings - start will be set when step is set
938 start = '(Never used)'
edf3e38e
PH
939 for i, prev in zip(idxs[1:], idxs[:-1]):
940 if step is not None:
941 if i - prev == step:
942 continue
943 yield _genslice(start, prev, step)
944 step = None
945 continue
946 if i - prev in [-1, 1]:
947 step = i - prev
948 start = prev
949 continue
950 else:
78caa52a 951 yield 's[%d]' % prev
edf3e38e 952 if step is None:
78caa52a 953 yield 's[%d]' % i
edf3e38e
PH
954 else:
955 yield _genslice(start, i, step)
956
78caa52a 957 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 958 cache_res = func(test_string)
edf3e38e 959 cache_spec = [ord(c) for c in cache_res]
78caa52a 960 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
961 signature_id_tuple = '(%s)' % (
962 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 963 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 964 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 965 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 966
e0df6211
PH
967 def _parse_sig_js(self, jscode):
968 funcname = self._search_regex(
aaeb86f6 969 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
8bcc8756 970 'Initial JS player signature function name')
2b25cb5d
PH
971
972 jsi = JSInterpreter(jscode)
973 initial_function = jsi.extract_function(funcname)
e0df6211
PH
974 return lambda s: initial_function([s])
975
976 def _parse_sig_swf(self, file_contents):
54256267 977 swfi = SWFInterpreter(file_contents)
78caa52a 978 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 979 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 980 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
981 return lambda s: initial_function([s])
982
83799698 983 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 984 """Turn the encrypted s field into a working signature"""
6b37f0be 985
c8bf86d5 986 if player_url is None:
69ea8ca4 987 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 988
69ea8ca4 989 if player_url.startswith('//'):
78caa52a 990 player_url = 'https:' + player_url
c8bf86d5 991 try:
62af3a0e 992 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
993 if player_id not in self._player_cache:
994 func = self._extract_signature_function(
60064c53 995 video_id, player_url, s
c8bf86d5
PH
996 )
997 self._player_cache[player_id] = func
998 func = self._player_cache[player_id]
999 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1000 self._print_sig_code(func, s)
c8bf86d5
PH
1001 return func(s)
1002 except Exception as e:
1003 tb = traceback.format_exc()
1004 raise ExtractorError(
78caa52a 1005 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1006
360e1ca5 1007 def _get_subtitles(self, video_id, webpage):
de7f3446 1008 try:
60e47a26 1009 subs_doc = self._download_xml(
38c2e5b8 1010 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1011 video_id, note=False)
1012 except ExtractorError as err:
9b9c5355 1013 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1014 return {}
de7f3446
JMF
1015
1016 sub_lang_list = {}
60e47a26
JMF
1017 for track in subs_doc.findall('track'):
1018 lang = track.attrib['lang_code']
7e660ac1
LD
1019 if lang in sub_lang_list:
1020 continue
360e1ca5 1021 sub_formats = []
23d17e4b 1022 for ext in self._SUBTITLE_FORMATS:
15707c7e 1023 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1024 'lang': lang,
1025 'v': video_id,
1026 'fmt': ext,
1027 'name': track.attrib['name'].encode('utf-8'),
1028 })
1029 sub_formats.append({
1030 'url': 'https://www.youtube.com/api/timedtext?' + params,
1031 'ext': ext,
1032 })
1033 sub_lang_list[lang] = sub_formats
de7f3446 1034 if not sub_lang_list:
69ea8ca4 1035 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1036 return {}
1037 return sub_lang_list
1038
a72778d3
S
1039 def _get_ytplayer_config(self, video_id, webpage):
1040 patterns = (
526b3b07
S
1041 # User data may contain arbitrary character sequences that may affect
1042 # JSON extraction with regex, e.g. when '};' is contained the second
1043 # regex won't capture the whole JSON. Yet working around by trying more
1044 # concrete regex first keeping in mind proper quoted string handling
1045 # to be implemented in future that will replace this workaround (see
1046 # https://github.com/rg3/youtube-dl/issues/7468,
1047 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1048 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1049 r';ytplayer\.config\s*=\s*({.+?});',
1050 )
1051 config = self._search_regex(
1052 patterns, webpage, 'ytplayer.config', default=None)
1053 if config:
1054 return self._parse_json(
1055 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1056
360e1ca5 1057 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1058 """We need the webpage for getting the captions url, pass it as an
1059 argument to speed up the process."""
69ea8ca4 1060 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1061 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1062 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1063 if not player_config:
de7f3446
JMF
1064 self._downloader.report_warning(err_msg)
1065 return {}
de7f3446 1066 try:
0792d563 1067 args = player_config['args']
b78b292f
S
1068 caption_url = args.get('ttsurl')
1069 if caption_url:
1070 timestamp = args['timestamp']
1071 # We get the available subtitles
15707c7e 1072 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1073 'type': 'list',
1074 'tlangs': 1,
1075 'asrs': 1,
1076 })
1077 list_url = caption_url + '&' + list_params
1078 caption_list = self._download_xml(list_url, video_id)
1079 original_lang_node = caption_list.find('track')
1080 if original_lang_node is None:
1081 self._downloader.report_warning('Video doesn\'t have automatic captions')
1082 return {}
1083 original_lang = original_lang_node.attrib['lang_code']
1084 caption_kind = original_lang_node.attrib.get('kind', '')
1085
1086 sub_lang_list = {}
1087 for lang_node in caption_list.findall('target'):
1088 sub_lang = lang_node.attrib['lang_code']
1089 sub_formats = []
1090 for ext in self._SUBTITLE_FORMATS:
15707c7e 1091 params = compat_urllib_parse_urlencode({
b78b292f
S
1092 'lang': original_lang,
1093 'tlang': sub_lang,
1094 'fmt': ext,
1095 'ts': timestamp,
1096 'kind': caption_kind,
1097 })
1098 sub_formats.append({
1099 'url': caption_url + '&' + params,
1100 'ext': ext,
1101 })
1102 sub_lang_list[sub_lang] = sub_formats
1103 return sub_lang_list
1104
1105 # Some videos don't provide ttsurl but rather caption_tracks and
1106 # caption_translation_languages (e.g. 20LmZk1hakA)
1107 caption_tracks = args['caption_tracks']
1108 caption_translation_languages = args['caption_translation_languages']
1109 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
15707c7e 1110 parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
b78b292f 1111 caption_qs = compat_parse_qs(parsed_caption_url.query)
055e6f36
JMF
1112
1113 sub_lang_list = {}
b78b292f
S
1114 for lang in caption_translation_languages.split(','):
1115 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1116 sub_lang = lang_qs.get('lc', [None])[0]
1117 if not sub_lang:
1118 continue
360e1ca5 1119 sub_formats = []
23d17e4b 1120 for ext in self._SUBTITLE_FORMATS:
b78b292f
S
1121 caption_qs.update({
1122 'tlang': [sub_lang],
1123 'fmt': [ext],
360e1ca5 1124 })
b78b292f 1125 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
15707c7e 1126 query=compat_urllib_parse_urlencode(caption_qs, True)))
360e1ca5 1127 sub_formats.append({
b78b292f 1128 'url': sub_url,
360e1ca5
JMF
1129 'ext': ext,
1130 })
1131 sub_lang_list[sub_lang] = sub_formats
055e6f36 1132 return sub_lang_list
de7f3446
JMF
1133 # An extractor error can be raise by the download process if there are
1134 # no automatic captions but there are subtitles
1135 except (KeyError, ExtractorError):
1136 self._downloader.report_warning(err_msg)
1137 return {}
1138
d77ab8e2
S
1139 def _mark_watched(self, video_id, video_info):
1140 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1141 if not playback_url:
1142 return
1143 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1144 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1145
1146 # cpn generation algorithm is reverse engineered from base.js.
1147 # In fact it works even with dummy cpn.
1148 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1149 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1150
1151 qs.update({
1152 'ver': ['2'],
1153 'cpn': [cpn],
1154 })
1155 playback_url = compat_urlparse.urlunparse(
15707c7e 1156 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1157
1158 self._download_webpage(
1159 playback_url, video_id, 'Marking watched',
1160 'Unable to mark watched', fatal=False)
1161
97665381
PH
1162 @classmethod
1163 def extract_id(cls, url):
1164 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1165 if mobj is None:
69ea8ca4 1166 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1167 video_id = mobj.group(2)
1168 return video_id
1169
1d043b93
JMF
1170 def _extract_from_m3u8(self, manifest_url, video_id):
1171 url_map = {}
5f6a1245 1172
1d043b93
JMF
1173 def _get_urls(_manifest):
1174 lines = _manifest.split('\n')
1175 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 1176 lines)
1d043b93 1177 return urls
78caa52a 1178 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
1179 formats_urls = _get_urls(manifest)
1180 for format_url in formats_urls:
890f62e8 1181 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1182 url_map[itag] = format_url
1183 return url_map
1184
1fb07d10
JG
1185 def _extract_annotations(self, video_id):
1186 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1187 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1188
c5e8d7af 1189 def _real_extract(self, url):
cf7e015f
S
1190 url, smuggled_data = unsmuggle_url(url, {})
1191
7e8c0af0 1192 proto = (
78caa52a
PH
1193 'http' if self._downloader.params.get('prefer_insecure', False)
1194 else 'https')
7e8c0af0 1195
7c80519c 1196 start_time = None
297a564b 1197 end_time = None
7c80519c
JMF
1198 parsed_url = compat_urllib_parse_urlparse(url)
1199 for component in [parsed_url.fragment, parsed_url.query]:
1200 query = compat_parse_qs(component)
297a564b 1201 if start_time is None and 't' in query:
7c80519c 1202 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1203 if start_time is None and 'start' in query:
1204 start_time = parse_duration(query['start'][0])
297a564b
JMF
1205 if end_time is None and 'end' in query:
1206 end_time = parse_duration(query['end'][0])
7c80519c 1207
c5e8d7af
PH
1208 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1209 mobj = re.search(self._NEXT_URL_RE, url)
1210 if mobj:
7fd002c0 1211 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1212 video_id = self.extract_id(url)
c5e8d7af
PH
1213
1214 # Get video webpage
aa79ac0c 1215 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1216 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1217
1218 # Attempt to extract SWF player URL
e0df6211 1219 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1220 if mobj is not None:
1221 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1222 else:
1223 player_url = None
1224
d8d24a92
S
1225 dash_mpds = []
1226
1227 def add_dash_mpd(video_info):
1228 dash_mpd = video_info.get('dashmpd')
1229 if dash_mpd and dash_mpd[0] not in dash_mpds:
1230 dash_mpds.append(dash_mpd[0])
1231
c5e8d7af 1232 # Get video info
6449cd80 1233 embed_webpage = None
2fe1ff85 1234 is_live = None
c108eb73 1235 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1236 age_gate = True
1237 # We simulate the access to the video from www.youtube.com/v/{video_id}
1238 # this can be viewed without login into Youtube
beb95e77
CL
1239 url = proto + '://www.youtube.com/embed/%s' % video_id
1240 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1241 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1242 'video_id': video_id,
1243 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1244 'sts': self._search_regex(
beb95e77 1245 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1246 })
7e8c0af0 1247 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1248 video_info_webpage = self._download_webpage(
1249 video_info_url, video_id,
20436c30 1250 note='Refetching age-gated info webpage',
94bd3613 1251 errnote='unable to download video info webpage')
c5e8d7af 1252 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1253 add_dash_mpd(video_info)
c108eb73
JMF
1254 else:
1255 age_gate = False
bc93bdb5 1256 video_info = None
d8d24a92 1257 # Try looking directly into the video webpage
a72778d3
S
1258 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1259 if ytplayer_config:
4e62ebe2 1260 args = ytplayer_config['args']
d8d24a92
S
1261 if args.get('url_encoded_fmt_stream_map'):
1262 # Convert to the same format returned by compat_parse_qs
1263 video_info = dict((k, [v]) for k, v in args.items())
1264 add_dash_mpd(video_info)
2fe1ff85
JMF
1265 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1266 is_live = True
0a3cf9ad
S
1267 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1268 # We also try looking in get_video_info since it may contain different dashmpd
1269 # URL that points to a DASH manifest with possibly different itag set (some itags
1270 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1271 # manifest pointed by get_video_info's dashmpd).
1272 # The general idea is to take a union of itags of both DASH manifests (for example
1273 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1274 self.report_video_info_webpage_download(video_id)
0a3cf9ad 1275 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
1276 video_info_url = (
1277 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1278 % (proto, video_id, el_type))
1279 video_info_webpage = self._download_webpage(
1280 video_info_url,
4e62ebe2
JMF
1281 video_id, note=False,
1282 errnote='unable to download video info webpage')
0a3cf9ad 1283 get_video_info = compat_parse_qs(video_info_webpage)
87dc4511
JMF
1284 if get_video_info.get('use_cipher_signature') != ['True']:
1285 add_dash_mpd(get_video_info)
0a3cf9ad
S
1286 if not video_info:
1287 video_info = get_video_info
1288 if 'token' in get_video_info:
89ea063e
S
1289 # Different get_video_info requests may report different results, e.g.
1290 # some may report video unavailability, but some may serve it without
1291 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1292 # the original webpage as well as el=info and el=embedded get_video_info
1293 # requests report video unavailability due to geo restriction while
1294 # el=detailpage succeeds and returns valid data). This is probably
1295 # due to YouTube measures against IP ranges of hosting providers.
1296 # Working around by preferring the first succeeded video_info containing
1297 # the token if no such video_info yet was found.
44b2264f
S
1298 if 'token' not in video_info:
1299 video_info = get_video_info
4e62ebe2 1300 break
c5e8d7af
PH
1301 if 'token' not in video_info:
1302 if 'reason' in video_info:
af214c3a
YCH
1303 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1304 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
678e436f 1305 if regions_allowed:
af214c3a
YCH
1306 raise ExtractorError('YouTube said: This video is available in %s only' % (
1307 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1308 expected=True)
d11271dd 1309 raise ExtractorError(
78caa52a 1310 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1311 expected=True, video_id=video_id)
c5e8d7af 1312 else:
d11271dd 1313 raise ExtractorError(
78caa52a 1314 '"token" parameter not in video info for unknown reason',
d11271dd 1315 video_id=video_id)
c5e8d7af 1316
cf7e015f
S
1317 # title
1318 if 'title' in video_info:
1319 video_title = video_info['title'][0]
1320 else:
1321 self._downloader.report_warning('Unable to extract video title')
1322 video_title = '_'
1323
1324 # description
1325 video_description = get_element_by_id("eow-description", video_webpage)
1326 if video_description:
1327 video_description = re.sub(r'''(?x)
1328 <a\s+
25cb7a0e 1329 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1330 (?:title|href)="([^"]+)"\s+
25cb7a0e 1331 (?:[a-zA-Z-]+="[^"]*"\s+)*?
096b5339 1332 class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
23f13e97 1333 [^<]+\.{3}\s*
cf7e015f
S
1334 </a>
1335 ''', r'\1', video_description)
1336 video_description = clean_html(video_description)
1337 else:
1338 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1339 if fd_mobj:
1340 video_description = unescapeHTML(fd_mobj.group(1))
1341 else:
1342 video_description = ''
1343
5e1eddb9
S
1344 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1345 if not self._downloader.params.get('noplaylist'):
1346 entries = []
1347 feed_ids = []
6863631c 1348 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1349 for feed in multifeed_metadata_list.split(','):
6863631c
S
1350 # Unquote should take place before split on comma (,) since textual
1351 # fields may contain comma as well (see
1352 # https://github.com/rg3/youtube-dl/issues/8536)
1353 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1354 entries.append({
1355 '_type': 'url_transparent',
1356 'ie_key': 'Youtube',
1357 'url': smuggle_url(
1358 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1359 {'force_singlefeed': True}),
1360 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1361 })
1362 feed_ids.append(feed_data['id'][0])
1363 self.to_screen(
1364 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1365 % (', '.join(feed_ids), video_id))
1366 return self.playlist_result(entries, video_id, video_title, video_description)
1367 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1368
1d699755
PH
1369 if 'view_count' in video_info:
1370 view_count = int(video_info['view_count'][0])
1371 else:
1372 view_count = None
1373
c5e8d7af
PH
1374 # Check for "rental" videos
1375 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 1376 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
1377
1378 # Start extracting information
1379 self.report_information_extraction(video_id)
1380
1381 # uploader
1382 if 'author' not in video_info:
69ea8ca4 1383 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1384 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1385
1386 # uploader_id
1387 video_uploader_id = None
fd050249
S
1388 video_uploader_url = None
1389 mobj = re.search(
1390 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1391 video_webpage)
c5e8d7af 1392 if mobj is not None:
fd050249
S
1393 video_uploader_id = mobj.group('uploader_id')
1394 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1395 else:
69ea8ca4 1396 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1397
c5e8d7af 1398 # thumbnail image
7763b04e
JMF
1399 # We try first to get a high quality image:
1400 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1401 video_webpage, re.DOTALL)
1402 if m_thumb is not None:
1403 video_thumbnail = m_thumb.group(1)
1404 elif 'thumbnail_url' not in video_info:
69ea8ca4 1405 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1406 video_thumbnail = None
c5e8d7af 1407 else: # don't panic if we can't find it
7fd002c0 1408 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1409
1410 # upload date
9d0b581f
S
1411 upload_date = self._html_search_meta(
1412 'datePublished', video_webpage, 'upload date', default=None)
1413 if not upload_date:
1414 upload_date = self._search_regex(
1415 [r'(?s)id="eow-date.*?>(.*?)</span>',
1416 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1417 video_webpage, 'upload date', default=None)
1418 if upload_date:
1419 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1420 upload_date = unified_strdate(upload_date)
c5e8d7af 1421
7caf9830
S
1422 video_license = self._html_search_regex(
1423 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1424 video_webpage, 'license', default=None)
1425
0cb58b02
S
1426 m_music = re.search(
1427 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1428 video_webpage)
1429 if m_music:
1430 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1431 video_creator = clean_html(m_music.group('creator'))
1432 else:
1433 video_alt_title = video_creator = None
1434
55f7bd2d
PH
1435 m_cat_container = self._search_regex(
1436 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1437 video_webpage, 'categories', default=None)
ec8deefc 1438 if m_cat_container:
ad3bc6ac 1439 category = self._html_search_regex(
01ed5c9b 1440 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1441 default=None)
1442 video_categories = None if category is None else [category]
1443 else:
1444 video_categories = None
ec8deefc 1445
000b6b5a
S
1446 video_tags = [
1447 unescapeHTML(m.group('content'))
1448 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1449
f30a38be 1450 def _extract_count(count_name):
c93d53f5
S
1451 return str_to_int(self._search_regex(
1452 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1453 % re.escape(count_name),
1454 video_webpage, count_name, default=None))
1455
69ea8ca4
PH
1456 like_count = _extract_count('like')
1457 dislike_count = _extract_count('dislike')
336c3a69 1458
c5e8d7af 1459 # subtitles
d82134c3 1460 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1461 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af
PH
1462
1463 if 'length_seconds' not in video_info:
69ea8ca4 1464 self._downloader.report_warning('unable to extract video duration')
b466b702 1465 video_duration = None
c5e8d7af 1466 else:
7fd002c0 1467 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 1468
1fb07d10
JG
1469 # annotations
1470 video_annotations = None
1471 if self._downloader.params.get('writeannotations', False):
5f6a1245 1472 video_annotations = self._extract_annotations(video_id)
1fb07d10 1473
dd27fd17
PH
1474 def _map_to_format_list(urlmap):
1475 formats = []
1476 for itag, video_real_url in urlmap.items():
1477 dct = {
1478 'format_id': itag,
1479 'url': video_real_url,
1480 'player_url': player_url,
1481 }
0b65e5d4
PH
1482 if itag in self._formats:
1483 dct.update(self._formats[itag])
dd27fd17
PH
1484 formats.append(dct)
1485 return formats
1486
c5e8d7af
PH
1487 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1488 self.report_rtmp_download()
dd27fd17
PH
1489 formats = [{
1490 'format_id': '_rtmp',
1491 'protocol': 'rtmp',
1492 'url': video_info['conn'][0],
1493 'player_url': player_url,
1494 }]
24270b03 1495 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1496 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1497 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1498 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1499 formats_spec = {}
82156fdb 1500 fmt_list = video_info.get('fmt_list', [''])[0]
1501 if fmt_list:
1502 for fmt in fmt_list.split(','):
1503 spec = fmt.split('/')
3318832e 1504 if len(spec) > 1:
1505 width_height = spec[1].split('x')
1506 if len(width_height) == 2:
1507 formats_spec[spec[0]] = {
1508 'resolution': spec[1],
1509 'width': int_or_none(width_height[0]),
1510 'height': int_or_none(width_height[1]),
1511 }
c9afb51c 1512 formats = []
00fe14fc 1513 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1514 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1515 if 'itag' not in url_data or 'url' not in url_data:
1516 continue
1517 format_id = url_data['itag'][0]
1518 url = url_data['url'][0]
1519
1520 if 'sig' in url_data:
1521 url += '&signature=' + url_data['sig'][0]
1522 elif 's' in url_data:
1523 encrypted_sig = url_data['s'][0]
6449cd80 1524 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1525
beb95e77 1526 jsplayer_url_json = self._search_regex(
6449cd80
PH
1527 ASSETS_RE,
1528 embed_webpage if age_gate else video_webpage,
1529 'JS player URL (1)', default=None)
1530 if not jsplayer_url_json and not age_gate:
1531 # We need the embed website after all
1532 if embed_webpage is None:
1533 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1534 embed_webpage = self._download_webpage(
1535 embed_url, video_id, 'Downloading embed webpage')
1536 jsplayer_url_json = self._search_regex(
1537 ASSETS_RE, embed_webpage, 'JS player URL')
1538
beb95e77 1539 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1540 if player_url is None:
1541 player_url_json = self._search_regex(
1542 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1543 video_webpage, 'age gate player URL')
201e9eaa
PH
1544 player_url = json.loads(player_url_json)
1545
1546 if self._downloader.params.get('verbose'):
cf010131 1547 if player_url is None:
201e9eaa
PH
1548 player_version = 'unknown'
1549 player_desc = 'unknown'
1550 else:
1551 if player_url.endswith('swf'):
1552 player_version = self._search_regex(
1553 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1554 'flash player', fatal=False)
201e9eaa 1555 player_desc = 'flash player %s' % player_version
cf010131 1556 else:
201e9eaa 1557 player_version = self._search_regex(
50f84a9a 1558 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
201e9eaa
PH
1559 player_url,
1560 'html5 player', fatal=False)
78caa52a 1561 player_desc = 'html5 player %s' % player_version
201e9eaa 1562
60064c53 1563 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1564 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1565 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1566
1567 signature = self._decrypt_signature(
1568 encrypted_sig, video_id, player_url, age_gate)
1569 url += '&signature=' + signature
1570 if 'ratebypass' not in url:
1571 url += '&ratebypass=yes'
c9afb51c 1572
94278f72
YCH
1573 dct = {
1574 'format_id': format_id,
1575 'url': url,
1576 'player_url': player_url,
1577 }
1578 if format_id in self._formats:
1579 dct.update(self._formats[format_id])
3318832e 1580 if format_id in formats_spec:
1581 dct.update(formats_spec[format_id])
94278f72 1582
aabc2be6
S
1583 # Some itags are not included in DASH manifest thus corresponding formats will
1584 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1585 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1586 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1587 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1588
1589 more_fields = {
c9afb51c 1590 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1591 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1592 'width': width,
1593 'height': height,
1594 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1595 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1596 }
94278f72
YCH
1597 for key, value in more_fields.items():
1598 if value:
1599 dct[key] = value
aabc2be6
S
1600 type_ = url_data.get('type', [None])[0]
1601 if type_:
1602 type_split = type_.split(';')
1603 kind_ext = type_split[0].split('/')
1604 if len(kind_ext) == 2:
94278f72
YCH
1605 kind, _ = kind_ext
1606 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1607 if kind in ('audio', 'video'):
1608 codecs = None
1609 for mobj in re.finditer(
1610 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1611 if mobj.group('key') == 'codecs':
1612 codecs = mobj.group('val')
1613 break
1614 if codecs:
1615 codecs = codecs.split(',')
1616 if len(codecs) == 2:
cc28492d 1617 acodec, vcodec = codecs[1], codecs[0]
aabc2be6
S
1618 else:
1619 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1620 dct.update({
1621 'acodec': acodec,
1622 'vcodec': vcodec,
1623 })
aabc2be6 1624 formats.append(dct)
1d043b93
JMF
1625 elif video_info.get('hlsvp'):
1626 manifest_url = video_info['hlsvp'][0]
1627 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1628 formats = _map_to_format_list(url_map)
ac5a69af
YCH
1629 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1630 for a_format in formats:
049d71d8 1631 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
c5e8d7af 1632 else:
8ceabd4d
S
1633 unavailable_message = self._html_search_regex(
1634 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1635 video_webpage, 'unavailable message', default=None)
1636 if unavailable_message:
1637 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1638 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1639
dd27fd17 1640 # Look for the DASH manifest
203fb43f 1641 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1642 dash_mpd_fatal = True
8ff648e4 1643 for mpd_url in dash_mpds:
d8d24a92 1644 dash_formats = {}
774e208f 1645 try:
05d0d131
YCH
1646 def decrypt_sig(mobj):
1647 s = mobj.group(1)
1648 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1649 return '/signature/%s' % dec_s
1650
8ff648e4 1651 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1652
8ff648e4 1653 for df in self._extract_mpd_formats(
1654 mpd_url, video_id, fatal=dash_mpd_fatal,
1655 formats_dict=self._formats):
d8d24a92
S
1656 # Do not overwrite DASH format found in some previous DASH manifest
1657 if df['format_id'] not in dash_formats:
1658 dash_formats[df['format_id']] = df
77c6fb5b
S
1659 # Additional DASH manifests may end up in HTTP Error 403 therefore
1660 # allow them to fail without bug report message if we already have
1661 # some DASH manifest succeeded. This is temporary workaround to reduce
1662 # burst of bug reports until we figure out the reason and whether it
1663 # can be fixed at all.
1664 dash_mpd_fatal = False
774e208f
PH
1665 except (ExtractorError, KeyError) as e:
1666 self.report_warning(
1667 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1668 if dash_formats:
04b3b3df
JMF
1669 # Remove the formats we found through non-DASH, they
1670 # contain less info and it can be wrong, because we use
1671 # fixed values (for example the resolution). See
1672 # https://github.com/rg3/youtube-dl/issues/5774 for an
1673 # example.
d80265cc 1674 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1675 formats.extend(dash_formats.values())
d80044c2 1676
6271f1ca
PH
1677 # Check for malformed aspect ratio
1678 stretched_m = re.search(
1679 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1680 video_webpage)
1681 if stretched_m:
313dfc45
LL
1682 w = float(stretched_m.group('w'))
1683 h = float(stretched_m.group('h'))
5faf9fed
S
1684 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1685 # We will only process correct ratios.
313dfc45 1686 if w > 0 and h > 0:
41f24c32 1687 ratio = w / h
313dfc45
LL
1688 for f in formats:
1689 if f.get('vcodec') != 'none':
1690 f['stretched_ratio'] = ratio
6271f1ca 1691
4bcc7bd1 1692 self._sort_formats(formats)
4ea3be0a 1693
d77ab8e2
S
1694 self.mark_watched(video_id, video_info)
1695
4ea3be0a 1696 return {
8bcc8756
JW
1697 'id': video_id,
1698 'uploader': video_uploader,
1699 'uploader_id': video_uploader_id,
fd050249 1700 'uploader_url': video_uploader_url,
8bcc8756 1701 'upload_date': upload_date,
7caf9830 1702 'license': video_license,
0cb58b02 1703 'creator': video_creator,
8bcc8756 1704 'title': video_title,
0cb58b02 1705 'alt_title': video_alt_title,
8bcc8756
JW
1706 'thumbnail': video_thumbnail,
1707 'description': video_description,
1708 'categories': video_categories,
000b6b5a 1709 'tags': video_tags,
8bcc8756 1710 'subtitles': video_subtitles,
360e1ca5 1711 'automatic_captions': automatic_captions,
8bcc8756
JW
1712 'duration': video_duration,
1713 'age_limit': 18 if age_gate else 0,
1714 'annotations': video_annotations,
7e8c0af0 1715 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1716 'view_count': view_count,
4ea3be0a 1717 'like_count': like_count,
1718 'dislike_count': dislike_count,
2d30521a 1719 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1720 'formats': formats,
2fe1ff85 1721 'is_live': is_live,
7c80519c 1722 'start_time': start_time,
297a564b 1723 'end_time': end_time,
4ea3be0a 1724 }
c5e8d7af 1725
5f6a1245 1726
8e7aad20 1727class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1728 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1729 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1730 (?:https?://)?
1731 (?:\w+\.)?
1732 youtube\.com/
1733 (?:
ac7553d0 1734 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
2e1b9285 1735 \? (?:.*?[&;])*? (?:p|a|list)=
c5e8d7af
PH
1736 | p/
1737 )
d67cc9fa 1738 (
99209c29 1739 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
5f6a1245 1740 # Top tracks, they can also include dots
d67cc9fa
JMF
1741 |(?:MC)[\w\.]*
1742 )
c5e8d7af
PH
1743 .*
1744 |
99209c29 1745 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
c5e8d7af 1746 )"""
dbb94fb0 1747 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 1748 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 1749 IE_NAME = 'youtube:playlist'
81127aa5
PH
1750 _TESTS = [{
1751 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1752 'info_dict': {
1753 'title': 'ytdl test PL',
a1cf99d0 1754 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1755 },
1756 'playlist_count': 3,
9291475f
PH
1757 }, {
1758 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1759 'info_dict': {
acf757f4 1760 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1761 'title': 'YDL_Empty_List',
1762 },
1763 'playlist_count': 0,
1764 }, {
1765 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1766 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1767 'info_dict': {
1768 'title': '29C3: Not my department',
acf757f4 1769 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1770 },
1771 'playlist_count': 95,
1772 }, {
1773 'note': 'issue #673',
1774 'url': 'PLBB231211A4F62143',
1775 'info_dict': {
f46a8702 1776 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1777 'id': 'PLBB231211A4F62143',
9291475f
PH
1778 },
1779 'playlist_mincount': 26,
1780 }, {
1781 'note': 'Large playlist',
1782 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1783 'info_dict': {
1784 'title': 'Uploads from Cauchemar',
acf757f4 1785 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1786 },
1787 'playlist_mincount': 799,
1788 }, {
1789 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1790 'info_dict': {
1791 'title': 'YDL_safe_search',
acf757f4 1792 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1793 },
1794 'playlist_count': 2,
ac7553d0
PH
1795 }, {
1796 'note': 'embedded',
1797 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1798 'playlist_count': 4,
1799 'info_dict': {
1800 'title': 'JODA15',
acf757f4 1801 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1802 }
6b08cdf6
PH
1803 }, {
1804 'note': 'Embedded SWF player',
1805 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1806 'playlist_count': 4,
1807 'info_dict': {
1808 'title': 'JODA7',
acf757f4 1809 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1810 }
4b7df0d3
JMF
1811 }, {
1812 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1813 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1814 'info_dict': {
acf757f4
PH
1815 'title': 'Uploads from Interstellar Movie',
1816 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3
JMF
1817 },
1818 'playlist_mincout': 21,
81127aa5 1819 }]
c5e8d7af 1820
880e1c52
JMF
1821 def _real_initialize(self):
1822 self._login()
1823
652cdaa2 1824 def _extract_mix(self, playlist_id):
99209c29 1825 # The mixes are generated from a single video
652cdaa2 1826 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
1827 ids = []
1828 last_id = playlist_id[-11:]
1829 for n in itertools.count(1):
1830 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1831 webpage = self._download_webpage(
1832 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
1833 new_ids = orderedSet(re.findall(
1834 r'''(?xs)data-video-username=".*?".*?
1835 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1836 webpage))
1837 # Fetch new pages until all the videos are repeated, it seems that
1838 # there are always 51 unique videos.
1839 new_ids = [_id for _id in new_ids if _id not in ids]
1840 if not new_ids:
1841 break
1842 ids.extend(new_ids)
1843 last_id = ids[-1]
1844
1845 url_results = self._ids_to_results(ids)
1846
bc2f773b 1847 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
1848 title_span = (
1849 search_title('playlist-title') or
1850 search_title('title long-title') or
1851 search_title('title'))
76d1700b 1852 title = clean_html(title_span)
652cdaa2
JMF
1853
1854 return self.playlist_result(url_results, playlist_id, title)
1855
448830ce 1856 def _extract_playlist(self, playlist_id):
dbb94fb0
S
1857 url = self._TEMPLATE_URL % playlist_id
1858 page = self._download_webpage(url, playlist_id)
dbb94fb0 1859
39b62db1
YCH
1860 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1861 match = match.strip()
1862 # Check if the playlist exists or is private
1863 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1864 raise ExtractorError(
1865 'The playlist doesn\'t exist or is private, use --username or '
1866 '--netrc to access it.',
1867 expected=True)
1868 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1869 raise ExtractorError(
1870 'Invalid parameters. Maybe URL is incorrect.',
1871 expected=True)
1872 elif re.match(r'[^<]*Choose your language[^<]*', match):
1873 continue
1874 else:
1875 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 1876
dbb94fb0 1877 playlist_title = self._html_search_regex(
63b4295d 1878 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
78caa52a 1879 page, 'title')
c5e8d7af 1880
648e6a1f 1881 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 1882
ebf1b291 1883 def _check_download_just_video(self, url, playlist_id):
448830ce
S
1884 # Check if it's a video-specific URL
1885 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1886 if 'v' in query_dict:
1887 video_id = query_dict['v'][0]
1888 if self._downloader.params.get('noplaylist'):
1889 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1890 return self.url_result(video_id, 'Youtube', video_id=video_id)
1891 else:
1892 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1893
ebf1b291
S
1894 def _real_extract(self, url):
1895 # Extract playlist id
1896 mobj = re.match(self._VALID_URL, url)
1897 if mobj is None:
1898 raise ExtractorError('Invalid URL: %s' % url)
1899 playlist_id = mobj.group(1) or mobj.group(2)
1900
1901 video = self._check_download_just_video(url, playlist_id)
1902 if video:
1903 return video
1904
466a6145 1905 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
1906 # Mixes require a custom extraction process
1907 return self._extract_mix(playlist_id)
1908
1909 return self._extract_playlist(playlist_id)
1910
c5e8d7af 1911
648e6a1f 1912class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1913 IE_DESC = 'YouTube.com channels'
9ff67727 1914 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 1915 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 1916 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 1917 IE_NAME = 'youtube:channel'
cdc628a4
PH
1918 _TESTS = [{
1919 'note': 'paginated channel',
1920 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1921 'playlist_mincount': 91,
acf757f4 1922 'info_dict': {
9170ca5b
JMF
1923 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1924 'title': 'Uploads from lex will',
acf757f4 1925 }
5c43afd4
JMF
1926 }, {
1927 'note': 'Age restricted channel',
1928 # from https://www.youtube.com/user/DeusExOfficial
1929 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1930 'playlist_mincount': 64,
1931 'info_dict': {
1932 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1933 'title': 'Uploads from Deus Ex',
1934 },
cdc628a4 1935 }]
c5e8d7af 1936
e462474e
S
1937 @classmethod
1938 def suitable(cls, url):
f07e276a
S
1939 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
1940 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 1941
c5e8d7af 1942 def _real_extract(self, url):
9ff67727 1943 channel_id = self._match_id(url)
c5e8d7af 1944
eb0f3e7e 1945 url = self._TEMPLATE_URL % channel_id
386bdfa6
S
1946
1947 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1948 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1949 # otherwise fallback on channel by page extraction
1950 channel_page = self._download_webpage(
1951 url + '?view=57', channel_id,
1952 'Downloading channel page', fatal=False)
2b3c2546
PH
1953 if channel_page is False:
1954 channel_playlist_id = False
1955 else:
1956 channel_playlist_id = self._html_search_meta(
1957 'channelId', channel_page, 'channel id', default=None)
1958 if not channel_playlist_id:
1959 channel_playlist_id = self._search_regex(
5c43afd4 1960 r'data-(?:channel-external-|yt)id="([^"]+)"',
2b3c2546 1961 channel_page, 'channel id', default=None)
386bdfa6
S
1962 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1963 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
1964 return self.url_result(
1965 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 1966
60bf45c8 1967 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
1968 autogenerated = re.search(r'''(?x)
1969 class="[^"]*?(?:
1970 channel-header-autogenerated-label|
1971 yt-channel-title-autogenerated
1972 )[^"]*"''', channel_page) is not None
c5e8d7af 1973
b9643eed
JMF
1974 if autogenerated:
1975 # The videos are contained in a single page
1976 # the ajax pages can't be used, they are empty
b82f815f 1977 entries = [
fb69240c
S
1978 self.url_result(
1979 video_id, 'Youtube', video_id=video_id,
1980 video_title=video_title)
8f02ad4f 1981 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
1982 return self.playlist_result(entries, channel_id)
1983
648e6a1f 1984 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
1985
1986
eb0f3e7e 1987class YoutubeUserIE(YoutubeChannelIE):
78caa52a 1988 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
70029bc3 1989 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
eb0f3e7e 1990 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
78caa52a 1991 IE_NAME = 'youtube:user'
c5e8d7af 1992
cdc628a4
PH
1993 _TESTS = [{
1994 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1995 'playlist_mincount': 320,
1996 'info_dict': {
1997 'title': 'TheLinuxFoundation',
1998 }
1999 }, {
2000 'url': 'ytuser:phihag',
2001 'only_matching': True,
2002 }]
2003
e3ea4790 2004 @classmethod
f4b05232 2005 def suitable(cls, url):
e3ea4790
JMF
2006 # Don't return True if the url can be extracted with other youtube
2007 # extractor, the regex would is too permissive and it would match.
f3a58d46 2008 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2009 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2010 return False
2011 else:
2012 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2013
b05654f0 2014
f07e276a
S
2015class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2016 IE_DESC = 'YouTube.com live streams'
2017 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
2018 IE_NAME = 'youtube:live'
2019
2020 _TESTS = [{
2021 'url': 'http://www.youtube.com/user/TheYoungTurks/live',
2022 'info_dict': {
2023 'id': 'a48o2S1cPoo',
2024 'ext': 'mp4',
2025 'title': 'The Young Turks - Live Main Show',
2026 'uploader': 'The Young Turks',
2027 'uploader_id': 'TheYoungTurks',
2028 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2029 'upload_date': '20150715',
2030 'license': 'Standard YouTube License',
2031 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2032 'categories': ['News & Politics'],
2033 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2034 'like_count': int,
2035 'dislike_count': int,
2036 },
2037 'params': {
2038 'skip_download': True,
2039 },
2040 }, {
2041 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2042 'only_matching': True,
2043 }]
2044
2045 def _real_extract(self, url):
2046 mobj = re.match(self._VALID_URL, url)
2047 channel_id = mobj.group('id')
2048 base_url = mobj.group('base_url')
2049 webpage = self._download_webpage(url, channel_id, fatal=False)
2050 if webpage:
2051 page_type = self._og_search_property(
2052 'type', webpage, 'page type', default=None)
2053 video_id = self._html_search_meta(
2054 'videoId', webpage, 'video id', default=None)
2055 if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2056 return self.url_result(video_id, YoutubeIE.ie_key())
2057 return self.url_result(base_url)
2058
2059
e462474e
S
2060class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2061 IE_DESC = 'YouTube.com user/channel playlists'
2062 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2063 IE_NAME = 'youtube:playlists'
0c148415 2064
e568c223 2065 _TESTS = [{
0c148415
S
2066 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
2067 'playlist_mincount': 4,
2068 'info_dict': {
2069 'id': 'ThirstForScience',
2070 'title': 'Thirst for Science',
2071 },
e568c223
S
2072 }, {
2073 # with "Load more" button
2074 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2075 'playlist_mincount': 70,
2076 'info_dict': {
2077 'id': 'igorkle1',
2078 'title': 'Игорь Клейнер',
2079 },
e462474e
S
2080 }, {
2081 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2082 'playlist_mincount': 17,
2083 'info_dict': {
2084 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2085 'title': 'Chem Player',
2086 },
e568c223 2087 }]
0c148415
S
2088
2089
b4c08069 2090class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2091 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2092 # there doesn't appear to be a real limit, for example if you search for
2093 # 'python' you get more than 8.000.000 results
2094 _MAX_RESULTS = float('inf')
78caa52a 2095 IE_NAME = 'youtube:search'
b05654f0 2096 _SEARCH_KEY = 'ytsearch'
b4c08069 2097 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2098 _TESTS = []
b05654f0 2099
b05654f0
PH
2100 def _get_n_results(self, query, n):
2101 """Get a specified number of results for a query"""
2102
b4c08069 2103 videos = []
b05654f0
PH
2104 limit = n
2105
b4c08069
JMF
2106 for pagenum in itertools.count(1):
2107 url_query = {
02175a79 2108 'search_query': query.encode('utf-8'),
b4c08069
JMF
2109 'page': pagenum,
2110 'spf': 'navigate',
2111 }
2112 url_query.update(self._EXTRA_QUERY_ARGS)
15707c7e 2113 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
b4c08069 2114 data = self._download_json(
69ea8ca4 2115 result_url, video_id='query "%s"' % query,
b4c08069 2116 note='Downloading page %s' % pagenum,
69ea8ca4 2117 errnote='Unable to download API page')
b4c08069 2118 html_content = data[1]['body']['content']
7cc3570e 2119
b4c08069 2120 if 'class="search-message' in html_content:
07ad22b8 2121 raise ExtractorError(
78caa52a 2122 '[youtube] No video results', expected=True)
b05654f0 2123
b4c08069
JMF
2124 new_videos = self._ids_to_results(orderedSet(re.findall(
2125 r'href="/watch\?v=(.{11})', html_content)))
2126 videos += new_videos
2127 if not new_videos or len(videos) > limit:
2128 break
b05654f0 2129
b4c08069
JMF
2130 if len(videos) > n:
2131 videos = videos[:n]
b05654f0 2132 return self.playlist_result(videos, query)
75dff0ee 2133
c9ae7b95 2134
a3dd9248 2135class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2136 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2137 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2138 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2139 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2140
c9ae7b95 2141
175c2e9e 2142class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
78caa52a
PH
2143 IE_DESC = 'YouTube.com search URLs'
2144 IE_NAME = 'youtube:search_url'
d2c1f79f 2145 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
175c2e9e 2146 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
cdc628a4
PH
2147 _TESTS = [{
2148 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2149 'playlist_mincount': 5,
2150 'info_dict': {
2151 'title': 'youtube-dl test video',
2152 }
d2c1f79f
S
2153 }, {
2154 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2155 'only_matching': True,
cdc628a4 2156 }]
c9ae7b95
PH
2157
2158 def _real_extract(self, url):
2159 mobj = re.match(self._VALID_URL, url)
7fd002c0 2160 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2161 webpage = self._download_webpage(url, query)
175c2e9e 2162 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2163
2164
136dadde 2165class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2166 IE_DESC = 'YouTube.com (multi-season) shows'
cdc628a4 2167 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2168 IE_NAME = 'youtube:show'
cdc628a4 2169 _TESTS = [{
4003bd82 2170 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2171 'playlist_mincount': 5,
cdc628a4
PH
2172 'info_dict': {
2173 'id': 'airdisasters',
2174 'title': 'Air Disasters',
2175 }
2176 }]
75dff0ee
JMF
2177
2178 def _real_extract(self, url):
136dadde
S
2179 playlist_id = self._match_id(url)
2180 return super(YoutubeShowIE, self)._real_extract(
2181 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2182
2183
b2e8bc1b 2184class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2185 """
25f14e9f 2186 Base class for feed extractors
d7ae0639
JMF
2187 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2188 """
b2e8bc1b 2189 _LOGIN_REQUIRED = True
d7ae0639
JMF
2190
2191 @property
2192 def IE_NAME(self):
78caa52a 2193 return 'youtube:%s' % self._FEED_NAME
04cc9617 2194
81f0259b 2195 def _real_initialize(self):
b2e8bc1b 2196 self._login()
81f0259b 2197
04cc9617 2198 def _real_extract(self, url):
25f14e9f
S
2199 page = self._download_webpage(
2200 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2201
2202 # The extraction process is the same as for playlists, but the regex
2203 # for the video ids doesn't contain an index
2204 ids = []
2205 more_widget_html = content_html = page
2bc43303
JMF
2206 for page_num in itertools.count(1):
2207 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2208
2209 # 'recommended' feed has infinite 'load more' and each new portion spins
2210 # the same videos in (sometimes) slightly different order, so we'll check
2211 # for unicity and break when portion has no new videos
2212 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2213 if not new_ids:
2214 break
2215
2bc43303
JMF
2216 ids.extend(new_ids)
2217
2218 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2219 if not mobj:
2220 break
2221
2222 more = self._download_json(
25f14e9f 2223 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2224 'Downloading page #%s' % page_num,
2225 transform_source=uppercase_escape)
2226 content_html = more['content_html']
2227 more_widget_html = more['load_more_widget_html']
2228
25f14e9f
S
2229 return self.playlist_result(
2230 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2231
2232
2233class YoutubeWatchLaterIE(YoutubePlaylistIE):
2234 IE_NAME = 'youtube:watchlater'
2235 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
bc7a9cd8 2236 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2237
bc7a9cd8
S
2238 _TESTS = [{
2239 'url': 'https://www.youtube.com/playlist?list=WL',
2240 'only_matching': True,
2241 }, {
2242 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2243 'only_matching': True,
2244 }]
25f14e9f
S
2245
2246 def _real_extract(self, url):
ebf1b291
S
2247 video = self._check_download_just_video(url, 'WL')
2248 if video:
2249 return video
25f14e9f 2250 return self._extract_playlist('WL')
f459d170 2251
5f6a1245 2252
c626a3d9 2253class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2254 IE_NAME = 'youtube:favorites'
f3a34072 2255 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
c7a7750d 2256 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2257 _LOGIN_REQUIRED = True
2258
2259 def _real_extract(self, url):
2260 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2261 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2262 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2263
2264
25f14e9f
S
2265class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2266 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2267 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2268 _FEED_NAME = 'recommended'
2269 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2270
1ed5b5c9 2271
25f14e9f
S
2272class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2273 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2274 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2275 _FEED_NAME = 'subscriptions'
2276 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2277
1ed5b5c9 2278
25f14e9f
S
2279class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2280 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2281 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2282 _FEED_NAME = 'history'
2283 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2284
2285
15870e90
PH
2286class YoutubeTruncatedURLIE(InfoExtractor):
2287 IE_NAME = 'youtube:truncated_url'
2288 IE_DESC = False # Do not list
975d35db 2289 _VALID_URL = r'''(?x)
b95aab84
PH
2290 (?:https?://)?
2291 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2292 (?:watch\?(?:
c4808c60 2293 feature=[a-z_]+|
b95aab84
PH
2294 annotation_id=annotation_[^&]+|
2295 x-yt-cl=[0-9]+|
c1708b89 2296 hl=[^&]*|
287be8c6 2297 t=[0-9]+
b95aab84
PH
2298 )?
2299 |
2300 attribution_link\?a=[^&]+
2301 )
2302 $
975d35db 2303 '''
15870e90 2304
c4808c60
PH
2305 _TESTS = [{
2306 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2307 'only_matching': True,
dc2fc736
PH
2308 }, {
2309 'url': 'http://www.youtube.com/watch?',
2310 'only_matching': True,
b95aab84
PH
2311 }, {
2312 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2313 'only_matching': True,
2314 }, {
2315 'url': 'https://www.youtube.com/watch?feature=foo',
2316 'only_matching': True,
c1708b89
PH
2317 }, {
2318 'url': 'https://www.youtube.com/watch?hl=en-GB',
2319 'only_matching': True,
287be8c6
PH
2320 }, {
2321 'url': 'https://www.youtube.com/watch?t=2372',
2322 'only_matching': True,
c4808c60
PH
2323 }]
2324
15870e90
PH
2325 def _real_extract(self, url):
2326 raise ExtractorError(
78caa52a
PH
2327 'Did you forget to quote the URL? Remember that & is a meta '
2328 'character in most shells, so you want to put the URL in quotes, '
2329 'like youtube-dl '
2330 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2331 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2332 expected=True)
772fd5cc
PH
2333
2334
2335class YoutubeTruncatedIDIE(InfoExtractor):
2336 IE_NAME = 'youtube:truncated_id'
2337 IE_DESC = False # Do not list
b95aab84 2338 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2339
2340 _TESTS = [{
2341 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2342 'only_matching': True,
2343 }]
2344
2345 def _real_extract(self, url):
2346 video_id = self._match_id(url)
2347 raise ExtractorError(
2348 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2349 expected=True)