]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[sbs] improve extraction(fixes #3811)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
c5e8d7af 19 compat_parse_qs,
c5e8d7af 20 compat_urllib_parse,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
c5e8d7af 25 compat_str,
4bb4a188
PH
26)
27from ..utils import (
c5e8d7af 28 clean_html,
515fc877 29 encode_dict,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
7c80519c 38 parse_duration,
0cb58b02 39 remove_quotes,
041bc3ad 40 remove_start,
5c2266df 41 sanitized_Request,
cf7e015f 42 smuggle_url,
c93d53f5 43 str_to_int,
c5e8d7af
PH
44 unescapeHTML,
45 unified_strdate,
cf7e015f 46 unsmuggle_url,
81c2f20b 47 uppercase_escape,
af214c3a 48 ISO3166Utils,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
b2e8bc1b
JMF
56 _NETRC_MACHINE = 'youtube'
57 # If True it will raise an error if no login info is provided
58 _LOGIN_REQUIRED = False
59
b2e8bc1b 60 def _set_language(self):
810fb84d
PH
61 self._set_cookie(
62 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 63 # YouTube sets the expire time to about two months
810fb84d 64 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 65
25f14e9f
S
66 def _ids_to_results(self, ids):
67 return [
68 self.url_result(vid_id, 'Youtube', video_id=vid_id)
69 for vid_id in ids]
70
b2e8bc1b 71 def _login(self):
83317f69 72 """
73 Attempt to log in to YouTube.
74 True is returned if successful or skipped.
75 False is returned if login failed.
76
77 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
78 """
b2e8bc1b
JMF
79 (username, password) = self._get_login_info()
80 # No authentication to be performed
81 if username is None:
82 if self._LOGIN_REQUIRED:
69ea8ca4 83 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 84 return True
b2e8bc1b 85
7cc3570e
PH
86 login_page = self._download_webpage(
87 self._LOGIN_URL, None,
69ea8ca4
PH
88 note='Downloading login page',
89 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
90 if login_page is False:
91 return
b2e8bc1b 92
795f28f8 93 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
78caa52a 94 login_page, 'Login GALX parameter')
c5e8d7af 95
b2e8bc1b
JMF
96 # Log in
97 login_form_strs = {
8bcc8756
JW
98 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
99 'Email': username,
100 'GALX': galx,
101 'Passwd': password,
102
103 'PersistentCookie': 'yes',
104 '_utf8': '霱',
105 'bgresponse': 'js_disabled',
106 'checkConnection': '',
107 'checkedDomains': 'youtube',
108 'dnConn': '',
109 'pstMsg': '0',
110 'rmShown': '1',
111 'secTok': '',
112 'signIn': 'Sign in',
113 'timeStmp': '',
114 'service': 'youtube',
115 'uilel': '3',
116 'hl': 'en_US',
b2e8bc1b 117 }
83317f69 118
515fc877 119 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
7cc3570e 120
5c2266df 121 req = sanitized_Request(self._LOGIN_URL, login_data)
7cc3570e
PH
122 login_results = self._download_webpage(
123 req, None,
69ea8ca4 124 note='Logging in', errnote='unable to log in', fatal=False)
7cc3570e
PH
125 if login_results is False:
126 return False
83317f69 127
128 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 129 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 130
131 # Two-Factor
132 # TODO add SMS and phone call support - these require making a request and then prompting the user
133
9303ce3e 134 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
041bc3ad 135 tfa_code = self._get_tfa_info('2-step verification code')
83317f69 136
041bc3ad
S
137 if not tfa_code:
138 self._downloader.report_warning(
139 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
140 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 141 return False
142
041bc3ad
S
143 tfa_code = remove_start(tfa_code, 'G-')
144
145 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
146
147 tfa_form_strs.update({
9303ce3e 148 'Pin': tfa_code,
149 'TrustDevice': 'on',
041bc3ad
S
150 })
151
515fc877 152 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
83317f69 153
5c2266df 154 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
83317f69 155 tfa_results = self._download_webpage(
156 tfa_req, None,
69ea8ca4 157 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 158
159 if tfa_results is False:
160 return False
161
9303ce3e 162 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
041bc3ad 163 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
83317f69 164 return False
165 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 166 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 167 return False
168 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 169 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 170 return False
171
7cc3570e 172 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
69ea8ca4 173 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
174 return False
175 return True
176
b2e8bc1b
JMF
177 def _real_initialize(self):
178 if self._downloader is None:
179 return
42939b61 180 self._set_language()
b2e8bc1b
JMF
181 if not self._login():
182 return
c5e8d7af 183
8377574c 184
8e7aad20 185class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 186 # Extract entries from page with "Load more" button
648e6a1f
S
187 def _entries(self, page, playlist_id):
188 more_widget_html = content_html = page
189 for page_num in itertools.count(1):
061a75ed
S
190 for entry in self._process_page(content_html):
191 yield entry
648e6a1f
S
192
193 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
194 if not mobj:
195 break
196
197 more = self._download_json(
198 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
199 'Downloading page #%s' % page_num,
200 transform_source=uppercase_escape)
201 content_html = more['content_html']
202 if not content_html.strip():
203 # Some webpages show a "Load more" button but they don't
204 # have more videos
205 break
206 more_widget_html = more['load_more_widget_html']
207
061a75ed
S
208
209class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
210 def _process_page(self, content):
211 for video_id, video_title in self.extract_videos_from_page(content):
212 yield self.url_result(video_id, 'Youtube', video_id, video_title)
213
648e6a1f
S
214 def extract_videos_from_page(self, page):
215 ids_in_page = []
216 titles_in_page = []
217 for mobj in re.finditer(self._VIDEO_RE, page):
218 # The link with index 0 is not the first video of the playlist (not sure if still actual)
219 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
220 continue
221 video_id = mobj.group('id')
222 video_title = unescapeHTML(mobj.group('title'))
223 if video_title:
224 video_title = video_title.strip()
225 try:
226 idx = ids_in_page.index(video_id)
227 if video_title and not titles_in_page[idx]:
228 titles_in_page[idx] = video_title
229 except ValueError:
230 ids_in_page.append(video_id)
231 titles_in_page.append(video_title)
232 return zip(ids_in_page, titles_in_page)
233
234
061a75ed
S
235class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
236 def _process_page(self, content):
3ccb0655 237 for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
061a75ed
S
238 yield self.url_result(
239 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
240
0c148415
S
241 def _real_extract(self, url):
242 playlist_id = self._match_id(url)
243 webpage = self._download_webpage(url, playlist_id)
0c148415 244 title = self._og_search_title(webpage, fatal=False)
061a75ed 245 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
246
247
360e1ca5 248class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 249 IE_DESC = 'YouTube.com'
cb7dfeea 250 _VALID_URL = r"""(?x)^
c5e8d7af 251 (
edb53e2d 252 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 253 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 254 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 255 (?:www\.)?pwnyoutube\.com/|
f7000f3a 256 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
257 tube\.majestyc\.net/|
258 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
259 (?:.*?\#/)? # handle anchor (#/) redirect urls
260 (?: # the various things that can precede the ID:
ac7553d0 261 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 262 |(?: # or the v= param in all its forms
f7000f3a 263 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 264 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 265 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
266 v=
267 )
f4b05232 268 ))
cbaed4bb
S
269 |(?:
270 youtu\.be| # just youtu.be/xxxx
271 vid\.plus # or vid.plus/xxxx
272 )/
edb53e2d 273 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 274 )
c5e8d7af 275 )? # all until now is optional -> you can pass the naked ID
8963d9c2 276 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
9291475f 277 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
278 (?(1).+)? # if we found the ID, everything can follow
279 $"""
c5e8d7af 280 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 281 _formats = {
c2d3cb4c 282 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
283 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
284 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
285 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
286 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
287 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
288 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
289 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 290 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 291 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
292 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
293 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
294 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
295 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
296 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 297 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 298 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
299 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 300
301
302 # 3D videos
c2d3cb4c 303 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
304 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
305 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
306 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 307 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
308 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
309 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 310
96fb5605 311 # Apple HTTP Live Streaming
c2d3cb4c 312 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
313 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
314 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
315 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
316 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 317 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
318 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
319
320 # DASH mp4 video
c2d3cb4c 321 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
322 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
323 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
324 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
325 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
326 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
327 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
a6c2c244
YCH
328 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
329 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
330 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
331 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
836a086c 332
f6f1fc92 333 # Dash mp4 audio
c2d3cb4c 334 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
335 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
336 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
337
338 # Dash webm
a6c2c244
YCH
339 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
340 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
341 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
342 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
343 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
344 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
345 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
346 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
347 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
348 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
349 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
350 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
351 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
352 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
353 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
4c6b4764 354 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
a6c2c244
YCH
355 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
356 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
357 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
358 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
359 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
360 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
2c62dc26
PH
361
362 # Dash webm audio
a6c2c244
YCH
363 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
364 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 365
0857baad 366 # Dash webm audio with opus inside
a6c2c244
YCH
367 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
368 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
369 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
0857baad 370
ce6b9a2d
PH
371 # RTMP (unnamed)
372 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 373 }
23d17e4b 374 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 375
78caa52a 376 IE_NAME = 'youtube'
2eb88d95
PH
377 _TESTS = [
378 {
b67d6314 379 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
380 'info_dict': {
381 'id': 'BaW_jenozKc',
382 'ext': 'mp4',
383 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
384 'uploader': 'Philipp Hagemeister',
385 'uploader_id': 'phihag',
fd050249 386 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 387 'upload_date': '20121002',
7caf9830 388 'license': 'Standard YouTube License',
4bc3a23e
PH
389 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
390 'categories': ['Science & Technology'],
000b6b5a 391 'tags': ['youtube-dl'],
3e7c1224
PH
392 'like_count': int,
393 'dislike_count': int,
7c80519c 394 'start_time': 1,
297a564b 395 'end_time': 9,
2eb88d95 396 }
0e853ca4 397 },
0e853ca4 398 {
4bc3a23e
PH
399 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
400 'note': 'Test generic use_cipher_signature video (#897)',
401 'info_dict': {
402 'id': 'UxxajLWwzqY',
403 'ext': 'mp4',
404 'upload_date': '20120506',
405 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 406 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 407 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
408 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
409 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
410 'iconic ep', 'iconic', 'love', 'it'],
4bc3a23e
PH
411 'uploader': 'Icona Pop',
412 'uploader_id': 'IconaPop',
fd050249 413 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 414 'license': 'Standard YouTube License',
0cb58b02 415 'creator': 'Icona Pop',
2eb88d95 416 }
c108eb73
JMF
417 },
418 {
4bc3a23e
PH
419 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
420 'note': 'Test VEVO video with age protection (#956)',
421 'info_dict': {
422 'id': '07FYdnEawAQ',
423 'ext': 'mp4',
424 'upload_date': '20130703',
425 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 426 'alt_title': 'Tunnel Vision',
4bc3a23e
PH
427 'description': 'md5:64249768eec3bc4276236606ea996373',
428 'uploader': 'justintimberlakeVEVO',
429 'uploader_id': 'justintimberlakeVEVO',
fd050249 430 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 431 'license': 'Standard YouTube License',
0cb58b02 432 'creator': 'Justin Timberlake',
34952f09 433 'age_limit': 18,
c108eb73
JMF
434 }
435 },
fccd3771 436 {
4bc3a23e
PH
437 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
438 'note': 'Embed-only video (#1746)',
439 'info_dict': {
440 'id': 'yZIXLfi8CZQ',
441 'ext': 'mp4',
442 'upload_date': '20120608',
443 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
444 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
445 'uploader': 'SET India',
94bfcd23 446 'uploader_id': 'setindia',
fd050249 447 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 448 'license': 'Standard YouTube License',
94bfcd23 449 'age_limit': 18,
fccd3771
PH
450 }
451 },
11b56058 452 {
b67d6314 453 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
454 'note': 'Use the first video ID in the URL',
455 'info_dict': {
456 'id': 'BaW_jenozKc',
457 'ext': 'mp4',
458 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
459 'uploader': 'Philipp Hagemeister',
460 'uploader_id': 'phihag',
fd050249 461 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 462 'upload_date': '20121002',
7caf9830 463 'license': 'Standard YouTube License',
11b56058
PM
464 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
465 'categories': ['Science & Technology'],
466 'tags': ['youtube-dl'],
467 'like_count': int,
468 'dislike_count': int,
34a7de29
S
469 },
470 'params': {
471 'skip_download': True,
472 },
11b56058 473 },
dd27fd17 474 {
4bc3a23e
PH
475 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
476 'note': '256k DASH audio (format 141) via DASH manifest',
477 'info_dict': {
478 'id': 'a9LDPn-MO4I',
479 'ext': 'm4a',
480 'upload_date': '20121002',
481 'uploader_id': '8KVIDEO',
fd050249 482 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
483 'description': '',
484 'uploader': '8KVIDEO',
7caf9830 485 'license': 'Standard YouTube License',
4bc3a23e 486 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 487 },
4bc3a23e
PH
488 'params': {
489 'youtube_include_dash_manifest': True,
490 'format': '141',
4919603f 491 },
dd27fd17 492 },
3489b7d2
JMF
493 # DASH manifest with encrypted signature
494 {
78caa52a
PH
495 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
496 'info_dict': {
497 'id': 'IB3lcPjvWLA',
498 'ext': 'm4a',
b766eb27
JMF
499 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
500 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
78caa52a
PH
501 'uploader': 'AfrojackVEVO',
502 'uploader_id': 'AfrojackVEVO',
503 'upload_date': '20131011',
7caf9830 504 'license': 'Standard YouTube License',
3489b7d2 505 },
4bc3a23e 506 'params': {
78caa52a
PH
507 'youtube_include_dash_manifest': True,
508 'format': '141',
3489b7d2
JMF
509 },
510 },
aaeb86f6
S
511 # JS player signature function name containing $
512 {
513 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
514 'info_dict': {
515 'id': 'nfWlot6h_JM',
516 'ext': 'm4a',
517 'title': 'Taylor Swift - Shake It Off',
0cb58b02 518 'alt_title': 'Shake It Off',
f57b7835 519 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
aaeb86f6
S
520 'uploader': 'TaylorSwiftVEVO',
521 'uploader_id': 'TaylorSwiftVEVO',
522 'upload_date': '20140818',
7caf9830 523 'license': 'Standard YouTube License',
0cb58b02 524 'creator': 'Taylor Swift',
aaeb86f6
S
525 },
526 'params': {
527 'youtube_include_dash_manifest': True,
528 'format': '141',
529 },
530 },
aa79ac0c
PH
531 # Controversy video
532 {
533 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
534 'info_dict': {
535 'id': 'T4XJQO3qol8',
536 'ext': 'mp4',
537 'upload_date': '20100909',
538 'uploader': 'The Amazing Atheist',
539 'uploader_id': 'TheAmazingAtheist',
fd050249 540 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 541 'license': 'Standard YouTube License',
aa79ac0c
PH
542 'title': 'Burning Everyone\'s Koran',
543 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
544 }
c522adb1
JMF
545 },
546 # Normal age-gate video (No vevo, embed allowed)
547 {
548 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
549 'info_dict': {
550 'id': 'HtVdAasjOgU',
551 'ext': 'mp4',
552 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
9ed99402 553 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
c522adb1
JMF
554 'uploader': 'The Witcher',
555 'uploader_id': 'WitcherGame',
fd050249 556 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 557 'upload_date': '20140605',
7caf9830 558 'license': 'Standard YouTube License',
34952f09 559 'age_limit': 18,
c522adb1
JMF
560 },
561 },
fccae2b9
S
562 # Age-gate video with encrypted signature
563 {
564 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
565 'info_dict': {
566 'id': '6kLq3WMV1nU',
567 'ext': 'mp4',
568 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
569 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
570 'uploader': 'LloydVEVO',
571 'uploader_id': 'LloydVEVO',
fd050249 572 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 573 'upload_date': '20110629',
7caf9830 574 'license': 'Standard YouTube License',
34952f09 575 'age_limit': 18,
fccae2b9
S
576 },
577 },
774e208f
PH
578 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
579 {
580 'url': '__2ABJjxzNo',
581 'info_dict': {
582 'id': '__2ABJjxzNo',
583 'ext': 'mp4',
584 'upload_date': '20100430',
585 'uploader_id': 'deadmau5',
fd050249 586 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 587 'creator': 'deadmau5',
774e208f
PH
588 'description': 'md5:12c56784b8032162bb936a5f76d55360',
589 'uploader': 'deadmau5',
7caf9830 590 'license': 'Standard YouTube License',
774e208f 591 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 592 'alt_title': 'Some Chords',
774e208f
PH
593 },
594 'expected_warnings': [
595 'DASH manifest missing',
596 ]
e52a40ab
PH
597 },
598 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
599 {
600 'url': 'lqQg6PlCWgI',
601 'info_dict': {
602 'id': 'lqQg6PlCWgI',
603 'ext': 'mp4',
90227264 604 'upload_date': '20150827',
cbe2bd91 605 'uploader_id': 'olympic',
fd050249 606 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 607 'license': 'Standard YouTube License',
cbe2bd91
PH
608 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
609 'uploader': 'Olympics',
610 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
611 },
612 'params': {
613 'skip_download': 'requires avconv',
e52a40ab 614 }
cbe2bd91 615 },
6271f1ca
PH
616 # Non-square pixels
617 {
618 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
619 'info_dict': {
620 'id': '_b-2C3KPAM0',
621 'ext': 'mp4',
622 'stretched_ratio': 16 / 9.,
623 'upload_date': '20110310',
624 'uploader_id': 'AllenMeow',
fd050249 625 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
626 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
627 'uploader': '孫艾倫',
7caf9830 628 'license': 'Standard YouTube License',
6271f1ca
PH
629 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
630 },
06b491eb
S
631 },
632 # url_encoded_fmt_stream_map is empty string
633 {
634 'url': 'qEJwOuvDf7I',
635 'info_dict': {
636 'id': 'qEJwOuvDf7I',
f57b7835 637 'ext': 'webm',
06b491eb
S
638 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
639 'description': '',
640 'upload_date': '20150404',
641 'uploader_id': 'spbelect',
642 'uploader': 'Наблюдатели Петербурга',
643 },
644 'params': {
645 'skip_download': 'requires avconv',
e323cf3f
S
646 },
647 'skip': 'This live event has ended.',
06b491eb 648 },
da77d856
S
649 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
650 {
651 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
652 'info_dict': {
653 'id': 'FIl7x6_3R5Y',
654 'ext': 'mp4',
655 'title': 'md5:7b81415841e02ecd4313668cde88737a',
656 'description': 'md5:116377fd2963b81ec4ce64b542173306',
657 'upload_date': '20150625',
658 'uploader_id': 'dorappi2000',
fd050249 659 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 660 'uploader': 'dorappi2000',
7caf9830 661 'license': 'Standard YouTube License',
da77d856
S
662 'formats': 'mincount:33',
663 },
2ee8f5d8 664 },
8a1a26ce
YCH
665 # DASH manifest with segment_list
666 {
667 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
668 'md5': '8ce563a1d667b599d21064e982ab9e31',
669 'info_dict': {
670 'id': 'CsmdDsKjzN8',
671 'ext': 'mp4',
17ee98e1 672 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
673 'uploader': 'Airtek',
674 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
675 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 676 'license': 'Standard YouTube License',
8a1a26ce
YCH
677 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
678 },
679 'params': {
680 'youtube_include_dash_manifest': True,
681 'format': '135', # bestvideo
682 }
2ee8f5d8 683 },
cf7e015f
S
684 {
685 # Multifeed videos (multiple cameras), URL is for Main Camera
686 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
687 'info_dict': {
688 'id': 'jqWvoWXjCVs',
689 'title': 'teamPGP: Rocket League Noob Stream',
690 'description': 'md5:dc7872fb300e143831327f1bae3af010',
691 },
692 'playlist': [{
693 'info_dict': {
694 'id': 'jqWvoWXjCVs',
695 'ext': 'mp4',
696 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
697 'description': 'md5:dc7872fb300e143831327f1bae3af010',
698 'upload_date': '20150721',
699 'uploader': 'Beer Games Beer',
700 'uploader_id': 'beergamesbeer',
fd050249 701 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 702 'license': 'Standard YouTube License',
cf7e015f
S
703 },
704 }, {
705 'info_dict': {
706 'id': '6h8e8xoXJzg',
707 'ext': 'mp4',
708 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
709 'description': 'md5:dc7872fb300e143831327f1bae3af010',
710 'upload_date': '20150721',
711 'uploader': 'Beer Games Beer',
712 'uploader_id': 'beergamesbeer',
fd050249 713 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 714 'license': 'Standard YouTube License',
cf7e015f
S
715 },
716 }, {
717 'info_dict': {
718 'id': 'PUOgX5z9xZw',
719 'ext': 'mp4',
720 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
721 'description': 'md5:dc7872fb300e143831327f1bae3af010',
722 'upload_date': '20150721',
723 'uploader': 'Beer Games Beer',
724 'uploader_id': 'beergamesbeer',
fd050249 725 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 726 'license': 'Standard YouTube License',
cf7e015f
S
727 },
728 }, {
729 'info_dict': {
730 'id': 'teuwxikvS5k',
731 'ext': 'mp4',
732 'title': 'teamPGP: Rocket League Noob Stream (zim)',
733 'description': 'md5:dc7872fb300e143831327f1bae3af010',
734 'upload_date': '20150721',
735 'uploader': 'Beer Games Beer',
736 'uploader_id': 'beergamesbeer',
fd050249 737 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 738 'license': 'Standard YouTube License',
cf7e015f
S
739 },
740 }],
741 'params': {
742 'skip_download': True,
743 },
cbaed4bb 744 },
f9f49d87
S
745 {
746 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
747 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
748 'info_dict': {
749 'id': 'gVfLd0zydlo',
750 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
751 },
752 'playlist_count': 2,
753 },
cbaed4bb
S
754 {
755 'url': 'http://vid.plus/FlRa-iH7PGw',
756 'only_matching': True,
0e49d9a6
LL
757 },
758 {
61f92af1 759 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
760 # Also tests cut-off URL expansion in video description (see
761 # https://github.com/rg3/youtube-dl/issues/1892,
762 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
763 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
764 'info_dict': {
765 'id': 'lsguqyKfVQg',
766 'ext': 'mp4',
767 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 768 'alt_title': 'Dark Walk',
0e49d9a6
LL
769 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
770 'upload_date': '20151119',
771 'uploader_id': 'IronSoulElf',
fd050249 772 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 773 'uploader': 'IronSoulElf',
7caf9830 774 'license': 'Standard YouTube License',
0cb58b02 775 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
776 },
777 'params': {
778 'skip_download': True,
779 },
780 },
61f92af1
S
781 {
782 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
783 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
784 'only_matching': True,
785 },
313dfc45
LL
786 {
787 # Video with yt:stretch=17:0
788 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
789 'info_dict': {
790 'id': 'Q39EVAstoRM',
791 'ext': 'mp4',
792 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
793 'description': 'md5:ee18a25c350637c8faff806845bddee9',
794 'upload_date': '20151107',
795 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
796 'uploader': 'CH GAMER DROID',
797 },
798 'params': {
799 'skip_download': True,
800 },
801 },
7caf9830
S
802 {
803 # Video licensed under Creative Commons
804 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
805 'info_dict': {
806 'id': 'M4gD1WSo5mA',
807 'ext': 'mp4',
808 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
809 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
810 'upload_date': '20150127',
811 'uploader_id': 'BerkmanCenter',
fd050249 812 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
7caf9830
S
813 'uploader': 'BerkmanCenter',
814 'license': 'Creative Commons Attribution license (reuse allowed)',
815 },
816 'params': {
817 'skip_download': True,
818 },
819 },
fd050249
S
820 {
821 # Channel-like uploader_url
822 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
823 'info_dict': {
824 'id': 'eQcmzGIKrzg',
825 'ext': 'mp4',
826 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
827 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
828 'upload_date': '20151119',
829 'uploader': 'Bernie 2016',
830 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
831 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
832 'license': 'Creative Commons Attribution license (reuse allowed)',
833 },
834 'params': {
835 'skip_download': True,
836 },
837 },
040ac686
S
838 {
839 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
840 'only_matching': True,
841 }
2eb88d95
PH
842 ]
843
e0df6211
PH
844 def __init__(self, *args, **kwargs):
845 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 846 self._player_cache = {}
e0df6211 847
c5e8d7af
PH
848 def report_video_info_webpage_download(self, video_id):
849 """Report attempt to download video info webpage."""
69ea8ca4 850 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 851
c5e8d7af
PH
852 def report_information_extraction(self, video_id):
853 """Report attempt to extract video information."""
69ea8ca4 854 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
855
856 def report_unavailable_format(self, video_id, format):
857 """Report extracted video URL."""
69ea8ca4 858 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
859
860 def report_rtmp_download(self):
861 """Indicate the download will use the RTMP protocol."""
69ea8ca4 862 self.to_screen('RTMP download detected')
c5e8d7af 863
60064c53
PH
864 def _signature_cache_id(self, example_sig):
865 """ Return a string representation of a signature """
78caa52a 866 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
867
868 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 869 id_m = re.match(
50f84a9a 870 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
cf010131 871 player_url)
c081b35c
PH
872 if not id_m:
873 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
874 player_type = id_m.group('ext')
875 player_id = id_m.group('id')
876
c4417ddb 877 # Read from filesystem cache
60064c53
PH
878 func_id = '%s_%s_%s' % (
879 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 880 assert os.path.basename(func_id) == func_id
a0e07d31 881
69ea8ca4 882 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 883 if cache_spec is not None:
78caa52a 884 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 885
6d1a55a5
PH
886 download_note = (
887 'Downloading player %s' % player_url
888 if self._downloader.params.get('verbose') else
889 'Downloading %s player %s' % (player_type, player_id)
890 )
e0df6211
PH
891 if player_type == 'js':
892 code = self._download_webpage(
893 player_url, video_id,
6d1a55a5 894 note=download_note,
69ea8ca4 895 errnote='Download of %s failed' % player_url)
83799698 896 res = self._parse_sig_js(code)
c4417ddb 897 elif player_type == 'swf':
e0df6211
PH
898 urlh = self._request_webpage(
899 player_url, video_id,
6d1a55a5 900 note=download_note,
69ea8ca4 901 errnote='Download of %s failed' % player_url)
e0df6211 902 code = urlh.read()
83799698 903 res = self._parse_sig_swf(code)
e0df6211
PH
904 else:
905 assert False, 'Invalid player type %r' % player_type
906
785521bf
PH
907 test_string = ''.join(map(compat_chr, range(len(example_sig))))
908 cache_res = res(test_string)
909 cache_spec = [ord(c) for c in cache_res]
83799698 910
69ea8ca4 911 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
912 return res
913
60064c53 914 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
915 def gen_sig_code(idxs):
916 def _genslice(start, end, step):
78caa52a 917 starts = '' if start == 0 else str(start)
8bcc8756 918 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 919 steps = '' if step == 1 else (':%d' % step)
78caa52a 920 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
921
922 step = None
7af808a5
PH
923 # Quelch pyflakes warnings - start will be set when step is set
924 start = '(Never used)'
edf3e38e
PH
925 for i, prev in zip(idxs[1:], idxs[:-1]):
926 if step is not None:
927 if i - prev == step:
928 continue
929 yield _genslice(start, prev, step)
930 step = None
931 continue
932 if i - prev in [-1, 1]:
933 step = i - prev
934 start = prev
935 continue
936 else:
78caa52a 937 yield 's[%d]' % prev
edf3e38e 938 if step is None:
78caa52a 939 yield 's[%d]' % i
edf3e38e
PH
940 else:
941 yield _genslice(start, i, step)
942
78caa52a 943 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 944 cache_res = func(test_string)
edf3e38e 945 cache_spec = [ord(c) for c in cache_res]
78caa52a 946 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
947 signature_id_tuple = '(%s)' % (
948 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 949 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 950 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 951 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 952
e0df6211
PH
953 def _parse_sig_js(self, jscode):
954 funcname = self._search_regex(
aaeb86f6 955 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
8bcc8756 956 'Initial JS player signature function name')
2b25cb5d
PH
957
958 jsi = JSInterpreter(jscode)
959 initial_function = jsi.extract_function(funcname)
e0df6211
PH
960 return lambda s: initial_function([s])
961
962 def _parse_sig_swf(self, file_contents):
54256267 963 swfi = SWFInterpreter(file_contents)
78caa52a 964 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 965 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 966 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
967 return lambda s: initial_function([s])
968
83799698 969 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 970 """Turn the encrypted s field into a working signature"""
6b37f0be 971
c8bf86d5 972 if player_url is None:
69ea8ca4 973 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 974
69ea8ca4 975 if player_url.startswith('//'):
78caa52a 976 player_url = 'https:' + player_url
c8bf86d5 977 try:
62af3a0e 978 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
979 if player_id not in self._player_cache:
980 func = self._extract_signature_function(
60064c53 981 video_id, player_url, s
c8bf86d5
PH
982 )
983 self._player_cache[player_id] = func
984 func = self._player_cache[player_id]
985 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 986 self._print_sig_code(func, s)
c8bf86d5
PH
987 return func(s)
988 except Exception as e:
989 tb = traceback.format_exc()
990 raise ExtractorError(
78caa52a 991 'Signature extraction failed: ' + tb, cause=e)
e0df6211 992
360e1ca5 993 def _get_subtitles(self, video_id, webpage):
de7f3446 994 try:
60e47a26 995 subs_doc = self._download_xml(
38c2e5b8 996 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
997 video_id, note=False)
998 except ExtractorError as err:
9b9c5355 999 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1000 return {}
de7f3446
JMF
1001
1002 sub_lang_list = {}
60e47a26
JMF
1003 for track in subs_doc.findall('track'):
1004 lang = track.attrib['lang_code']
7e660ac1
LD
1005 if lang in sub_lang_list:
1006 continue
360e1ca5 1007 sub_formats = []
23d17e4b 1008 for ext in self._SUBTITLE_FORMATS:
360e1ca5
JMF
1009 params = compat_urllib_parse.urlencode({
1010 'lang': lang,
1011 'v': video_id,
1012 'fmt': ext,
1013 'name': track.attrib['name'].encode('utf-8'),
1014 })
1015 sub_formats.append({
1016 'url': 'https://www.youtube.com/api/timedtext?' + params,
1017 'ext': ext,
1018 })
1019 sub_lang_list[lang] = sub_formats
de7f3446 1020 if not sub_lang_list:
69ea8ca4 1021 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1022 return {}
1023 return sub_lang_list
1024
a72778d3
S
1025 def _get_ytplayer_config(self, video_id, webpage):
1026 patterns = (
526b3b07
S
1027 # User data may contain arbitrary character sequences that may affect
1028 # JSON extraction with regex, e.g. when '};' is contained the second
1029 # regex won't capture the whole JSON. Yet working around by trying more
1030 # concrete regex first keeping in mind proper quoted string handling
1031 # to be implemented in future that will replace this workaround (see
1032 # https://github.com/rg3/youtube-dl/issues/7468,
1033 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1034 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1035 r';ytplayer\.config\s*=\s*({.+?});',
1036 )
1037 config = self._search_regex(
1038 patterns, webpage, 'ytplayer.config', default=None)
1039 if config:
1040 return self._parse_json(
1041 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1042
360e1ca5 1043 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1044 """We need the webpage for getting the captions url, pass it as an
1045 argument to speed up the process."""
69ea8ca4 1046 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1047 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1048 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1049 if not player_config:
de7f3446
JMF
1050 self._downloader.report_warning(err_msg)
1051 return {}
de7f3446 1052 try:
0792d563 1053 args = player_config['args']
b78b292f
S
1054 caption_url = args.get('ttsurl')
1055 if caption_url:
1056 timestamp = args['timestamp']
1057 # We get the available subtitles
1058 list_params = compat_urllib_parse.urlencode({
1059 'type': 'list',
1060 'tlangs': 1,
1061 'asrs': 1,
1062 })
1063 list_url = caption_url + '&' + list_params
1064 caption_list = self._download_xml(list_url, video_id)
1065 original_lang_node = caption_list.find('track')
1066 if original_lang_node is None:
1067 self._downloader.report_warning('Video doesn\'t have automatic captions')
1068 return {}
1069 original_lang = original_lang_node.attrib['lang_code']
1070 caption_kind = original_lang_node.attrib.get('kind', '')
1071
1072 sub_lang_list = {}
1073 for lang_node in caption_list.findall('target'):
1074 sub_lang = lang_node.attrib['lang_code']
1075 sub_formats = []
1076 for ext in self._SUBTITLE_FORMATS:
1077 params = compat_urllib_parse.urlencode({
1078 'lang': original_lang,
1079 'tlang': sub_lang,
1080 'fmt': ext,
1081 'ts': timestamp,
1082 'kind': caption_kind,
1083 })
1084 sub_formats.append({
1085 'url': caption_url + '&' + params,
1086 'ext': ext,
1087 })
1088 sub_lang_list[sub_lang] = sub_formats
1089 return sub_lang_list
1090
1091 # Some videos don't provide ttsurl but rather caption_tracks and
1092 # caption_translation_languages (e.g. 20LmZk1hakA)
1093 caption_tracks = args['caption_tracks']
1094 caption_translation_languages = args['caption_translation_languages']
1095 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1096 parsed_caption_url = compat_urlparse.urlparse(caption_url)
1097 caption_qs = compat_parse_qs(parsed_caption_url.query)
055e6f36
JMF
1098
1099 sub_lang_list = {}
b78b292f
S
1100 for lang in caption_translation_languages.split(','):
1101 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1102 sub_lang = lang_qs.get('lc', [None])[0]
1103 if not sub_lang:
1104 continue
360e1ca5 1105 sub_formats = []
23d17e4b 1106 for ext in self._SUBTITLE_FORMATS:
b78b292f
S
1107 caption_qs.update({
1108 'tlang': [sub_lang],
1109 'fmt': [ext],
360e1ca5 1110 })
b78b292f
S
1111 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
1112 query=compat_urllib_parse.urlencode(caption_qs, True)))
360e1ca5 1113 sub_formats.append({
b78b292f 1114 'url': sub_url,
360e1ca5
JMF
1115 'ext': ext,
1116 })
1117 sub_lang_list[sub_lang] = sub_formats
055e6f36 1118 return sub_lang_list
de7f3446
JMF
1119 # An extractor error can be raise by the download process if there are
1120 # no automatic captions but there are subtitles
1121 except (KeyError, ExtractorError):
1122 self._downloader.report_warning(err_msg)
1123 return {}
1124
d77ab8e2
S
1125 def _mark_watched(self, video_id, video_info):
1126 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1127 if not playback_url:
1128 return
1129 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1130 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1131
1132 # cpn generation algorithm is reverse engineered from base.js.
1133 # In fact it works even with dummy cpn.
1134 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1135 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1136
1137 qs.update({
1138 'ver': ['2'],
1139 'cpn': [cpn],
1140 })
1141 playback_url = compat_urlparse.urlunparse(
1142 parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
1143
1144 self._download_webpage(
1145 playback_url, video_id, 'Marking watched',
1146 'Unable to mark watched', fatal=False)
1147
97665381
PH
1148 @classmethod
1149 def extract_id(cls, url):
1150 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1151 if mobj is None:
69ea8ca4 1152 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1153 video_id = mobj.group(2)
1154 return video_id
1155
1d043b93
JMF
1156 def _extract_from_m3u8(self, manifest_url, video_id):
1157 url_map = {}
5f6a1245 1158
1d043b93
JMF
1159 def _get_urls(_manifest):
1160 lines = _manifest.split('\n')
1161 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 1162 lines)
1d043b93 1163 return urls
78caa52a 1164 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
1165 formats_urls = _get_urls(manifest)
1166 for format_url in formats_urls:
890f62e8 1167 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1168 url_map[itag] = format_url
1169 return url_map
1170
1fb07d10
JG
1171 def _extract_annotations(self, video_id):
1172 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1173 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1174
c5e8d7af 1175 def _real_extract(self, url):
cf7e015f
S
1176 url, smuggled_data = unsmuggle_url(url, {})
1177
7e8c0af0 1178 proto = (
78caa52a
PH
1179 'http' if self._downloader.params.get('prefer_insecure', False)
1180 else 'https')
7e8c0af0 1181
7c80519c 1182 start_time = None
297a564b 1183 end_time = None
7c80519c
JMF
1184 parsed_url = compat_urllib_parse_urlparse(url)
1185 for component in [parsed_url.fragment, parsed_url.query]:
1186 query = compat_parse_qs(component)
297a564b 1187 if start_time is None and 't' in query:
7c80519c 1188 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1189 if start_time is None and 'start' in query:
1190 start_time = parse_duration(query['start'][0])
297a564b
JMF
1191 if end_time is None and 'end' in query:
1192 end_time = parse_duration(query['end'][0])
7c80519c 1193
c5e8d7af
PH
1194 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1195 mobj = re.search(self._NEXT_URL_RE, url)
1196 if mobj:
7fd002c0 1197 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1198 video_id = self.extract_id(url)
c5e8d7af
PH
1199
1200 # Get video webpage
aa79ac0c 1201 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1202 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1203
1204 # Attempt to extract SWF player URL
e0df6211 1205 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1206 if mobj is not None:
1207 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1208 else:
1209 player_url = None
1210
d8d24a92
S
1211 dash_mpds = []
1212
1213 def add_dash_mpd(video_info):
1214 dash_mpd = video_info.get('dashmpd')
1215 if dash_mpd and dash_mpd[0] not in dash_mpds:
1216 dash_mpds.append(dash_mpd[0])
1217
c5e8d7af 1218 # Get video info
6449cd80 1219 embed_webpage = None
2fe1ff85 1220 is_live = None
c108eb73 1221 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1222 age_gate = True
1223 # We simulate the access to the video from www.youtube.com/v/{video_id}
1224 # this can be viewed without login into Youtube
beb95e77
CL
1225 url = proto + '://www.youtube.com/embed/%s' % video_id
1226 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
2c57c7fa
JMF
1227 data = compat_urllib_parse.urlencode({
1228 'video_id': video_id,
1229 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1230 'sts': self._search_regex(
beb95e77 1231 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1232 })
7e8c0af0 1233 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1234 video_info_webpage = self._download_webpage(
1235 video_info_url, video_id,
20436c30 1236 note='Refetching age-gated info webpage',
94bd3613 1237 errnote='unable to download video info webpage')
c5e8d7af 1238 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1239 add_dash_mpd(video_info)
c108eb73
JMF
1240 else:
1241 age_gate = False
bc93bdb5 1242 video_info = None
d8d24a92 1243 # Try looking directly into the video webpage
a72778d3
S
1244 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1245 if ytplayer_config:
4e62ebe2 1246 args = ytplayer_config['args']
d8d24a92
S
1247 if args.get('url_encoded_fmt_stream_map'):
1248 # Convert to the same format returned by compat_parse_qs
1249 video_info = dict((k, [v]) for k, v in args.items())
1250 add_dash_mpd(video_info)
2fe1ff85
JMF
1251 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1252 is_live = True
0a3cf9ad
S
1253 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1254 # We also try looking in get_video_info since it may contain different dashmpd
1255 # URL that points to a DASH manifest with possibly different itag set (some itags
1256 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1257 # manifest pointed by get_video_info's dashmpd).
1258 # The general idea is to take a union of itags of both DASH manifests (for example
1259 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1260 self.report_video_info_webpage_download(video_id)
0a3cf9ad 1261 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
1262 video_info_url = (
1263 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1264 % (proto, video_id, el_type))
1265 video_info_webpage = self._download_webpage(
1266 video_info_url,
4e62ebe2
JMF
1267 video_id, note=False,
1268 errnote='unable to download video info webpage')
0a3cf9ad 1269 get_video_info = compat_parse_qs(video_info_webpage)
87dc4511
JMF
1270 if get_video_info.get('use_cipher_signature') != ['True']:
1271 add_dash_mpd(get_video_info)
0a3cf9ad
S
1272 if not video_info:
1273 video_info = get_video_info
1274 if 'token' in get_video_info:
89ea063e
S
1275 # Different get_video_info requests may report different results, e.g.
1276 # some may report video unavailability, but some may serve it without
1277 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1278 # the original webpage as well as el=info and el=embedded get_video_info
1279 # requests report video unavailability due to geo restriction while
1280 # el=detailpage succeeds and returns valid data). This is probably
1281 # due to YouTube measures against IP ranges of hosting providers.
1282 # Working around by preferring the first succeeded video_info containing
1283 # the token if no such video_info yet was found.
44b2264f
S
1284 if 'token' not in video_info:
1285 video_info = get_video_info
4e62ebe2 1286 break
c5e8d7af
PH
1287 if 'token' not in video_info:
1288 if 'reason' in video_info:
af214c3a
YCH
1289 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1290 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
678e436f 1291 if regions_allowed:
af214c3a
YCH
1292 raise ExtractorError('YouTube said: This video is available in %s only' % (
1293 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1294 expected=True)
d11271dd 1295 raise ExtractorError(
78caa52a 1296 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1297 expected=True, video_id=video_id)
c5e8d7af 1298 else:
d11271dd 1299 raise ExtractorError(
78caa52a 1300 '"token" parameter not in video info for unknown reason',
d11271dd 1301 video_id=video_id)
c5e8d7af 1302
cf7e015f
S
1303 # title
1304 if 'title' in video_info:
1305 video_title = video_info['title'][0]
1306 else:
1307 self._downloader.report_warning('Unable to extract video title')
1308 video_title = '_'
1309
1310 # description
1311 video_description = get_element_by_id("eow-description", video_webpage)
1312 if video_description:
1313 video_description = re.sub(r'''(?x)
1314 <a\s+
1315 (?:[a-zA-Z-]+="[^"]+"\s+)*?
23f13e97 1316 (?:title|href)="([^"]+)"\s+
cf7e015f 1317 (?:[a-zA-Z-]+="[^"]+"\s+)*?
096b5339 1318 class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
23f13e97 1319 [^<]+\.{3}\s*
cf7e015f
S
1320 </a>
1321 ''', r'\1', video_description)
1322 video_description = clean_html(video_description)
1323 else:
1324 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1325 if fd_mobj:
1326 video_description = unescapeHTML(fd_mobj.group(1))
1327 else:
1328 video_description = ''
1329
5e1eddb9
S
1330 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1331 if not self._downloader.params.get('noplaylist'):
1332 entries = []
1333 feed_ids = []
6863631c 1334 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1335 for feed in multifeed_metadata_list.split(','):
6863631c
S
1336 # Unquote should take place before split on comma (,) since textual
1337 # fields may contain comma as well (see
1338 # https://github.com/rg3/youtube-dl/issues/8536)
1339 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1340 entries.append({
1341 '_type': 'url_transparent',
1342 'ie_key': 'Youtube',
1343 'url': smuggle_url(
1344 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1345 {'force_singlefeed': True}),
1346 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1347 })
1348 feed_ids.append(feed_data['id'][0])
1349 self.to_screen(
1350 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1351 % (', '.join(feed_ids), video_id))
1352 return self.playlist_result(entries, video_id, video_title, video_description)
1353 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1354
1d699755
PH
1355 if 'view_count' in video_info:
1356 view_count = int(video_info['view_count'][0])
1357 else:
1358 view_count = None
1359
c5e8d7af
PH
1360 # Check for "rental" videos
1361 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 1362 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
1363
1364 # Start extracting information
1365 self.report_information_extraction(video_id)
1366
1367 # uploader
1368 if 'author' not in video_info:
69ea8ca4 1369 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1370 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1371
1372 # uploader_id
1373 video_uploader_id = None
fd050249
S
1374 video_uploader_url = None
1375 mobj = re.search(
1376 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1377 video_webpage)
c5e8d7af 1378 if mobj is not None:
fd050249
S
1379 video_uploader_id = mobj.group('uploader_id')
1380 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1381 else:
69ea8ca4 1382 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1383
c5e8d7af 1384 # thumbnail image
7763b04e
JMF
1385 # We try first to get a high quality image:
1386 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1387 video_webpage, re.DOTALL)
1388 if m_thumb is not None:
1389 video_thumbnail = m_thumb.group(1)
1390 elif 'thumbnail_url' not in video_info:
69ea8ca4 1391 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1392 video_thumbnail = None
c5e8d7af 1393 else: # don't panic if we can't find it
7fd002c0 1394 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1395
1396 # upload date
9d0b581f
S
1397 upload_date = self._html_search_meta(
1398 'datePublished', video_webpage, 'upload date', default=None)
1399 if not upload_date:
1400 upload_date = self._search_regex(
1401 [r'(?s)id="eow-date.*?>(.*?)</span>',
1402 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1403 video_webpage, 'upload date', default=None)
1404 if upload_date:
1405 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1406 upload_date = unified_strdate(upload_date)
c5e8d7af 1407
7caf9830
S
1408 video_license = self._html_search_regex(
1409 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1410 video_webpage, 'license', default=None)
1411
0cb58b02
S
1412 m_music = re.search(
1413 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1414 video_webpage)
1415 if m_music:
1416 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1417 video_creator = clean_html(m_music.group('creator'))
1418 else:
1419 video_alt_title = video_creator = None
1420
55f7bd2d
PH
1421 m_cat_container = self._search_regex(
1422 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1423 video_webpage, 'categories', default=None)
ec8deefc 1424 if m_cat_container:
ad3bc6ac 1425 category = self._html_search_regex(
01ed5c9b 1426 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1427 default=None)
1428 video_categories = None if category is None else [category]
1429 else:
1430 video_categories = None
ec8deefc 1431
000b6b5a
S
1432 video_tags = [
1433 unescapeHTML(m.group('content'))
1434 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1435
f30a38be 1436 def _extract_count(count_name):
c93d53f5
S
1437 return str_to_int(self._search_regex(
1438 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1439 % re.escape(count_name),
1440 video_webpage, count_name, default=None))
1441
69ea8ca4
PH
1442 like_count = _extract_count('like')
1443 dislike_count = _extract_count('dislike')
336c3a69 1444
c5e8d7af 1445 # subtitles
d82134c3 1446 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1447 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af
PH
1448
1449 if 'length_seconds' not in video_info:
69ea8ca4 1450 self._downloader.report_warning('unable to extract video duration')
b466b702 1451 video_duration = None
c5e8d7af 1452 else:
7fd002c0 1453 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 1454
1fb07d10
JG
1455 # annotations
1456 video_annotations = None
1457 if self._downloader.params.get('writeannotations', False):
5f6a1245 1458 video_annotations = self._extract_annotations(video_id)
1fb07d10 1459
dd27fd17
PH
1460 def _map_to_format_list(urlmap):
1461 formats = []
1462 for itag, video_real_url in urlmap.items():
1463 dct = {
1464 'format_id': itag,
1465 'url': video_real_url,
1466 'player_url': player_url,
1467 }
0b65e5d4
PH
1468 if itag in self._formats:
1469 dct.update(self._formats[itag])
dd27fd17
PH
1470 formats.append(dct)
1471 return formats
1472
c5e8d7af
PH
1473 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1474 self.report_rtmp_download()
dd27fd17
PH
1475 formats = [{
1476 'format_id': '_rtmp',
1477 'protocol': 'rtmp',
1478 'url': video_info['conn'][0],
1479 'player_url': player_url,
1480 }]
24270b03 1481 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1482 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1483 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1484 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1485 formats_spec = {}
82156fdb 1486 fmt_list = video_info.get('fmt_list', [''])[0]
1487 if fmt_list:
1488 for fmt in fmt_list.split(','):
1489 spec = fmt.split('/')
3318832e 1490 if len(spec) > 1:
1491 width_height = spec[1].split('x')
1492 if len(width_height) == 2:
1493 formats_spec[spec[0]] = {
1494 'resolution': spec[1],
1495 'width': int_or_none(width_height[0]),
1496 'height': int_or_none(width_height[1]),
1497 }
c9afb51c 1498 formats = []
00fe14fc 1499 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1500 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1501 if 'itag' not in url_data or 'url' not in url_data:
1502 continue
1503 format_id = url_data['itag'][0]
1504 url = url_data['url'][0]
1505
1506 if 'sig' in url_data:
1507 url += '&signature=' + url_data['sig'][0]
1508 elif 's' in url_data:
1509 encrypted_sig = url_data['s'][0]
6449cd80 1510 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1511
beb95e77 1512 jsplayer_url_json = self._search_regex(
6449cd80
PH
1513 ASSETS_RE,
1514 embed_webpage if age_gate else video_webpage,
1515 'JS player URL (1)', default=None)
1516 if not jsplayer_url_json and not age_gate:
1517 # We need the embed website after all
1518 if embed_webpage is None:
1519 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1520 embed_webpage = self._download_webpage(
1521 embed_url, video_id, 'Downloading embed webpage')
1522 jsplayer_url_json = self._search_regex(
1523 ASSETS_RE, embed_webpage, 'JS player URL')
1524
beb95e77 1525 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1526 if player_url is None:
1527 player_url_json = self._search_regex(
1528 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1529 video_webpage, 'age gate player URL')
201e9eaa
PH
1530 player_url = json.loads(player_url_json)
1531
1532 if self._downloader.params.get('verbose'):
cf010131 1533 if player_url is None:
201e9eaa
PH
1534 player_version = 'unknown'
1535 player_desc = 'unknown'
1536 else:
1537 if player_url.endswith('swf'):
1538 player_version = self._search_regex(
1539 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1540 'flash player', fatal=False)
201e9eaa 1541 player_desc = 'flash player %s' % player_version
cf010131 1542 else:
201e9eaa 1543 player_version = self._search_regex(
50f84a9a 1544 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
201e9eaa
PH
1545 player_url,
1546 'html5 player', fatal=False)
78caa52a 1547 player_desc = 'html5 player %s' % player_version
201e9eaa 1548
60064c53 1549 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1550 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1551 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1552
1553 signature = self._decrypt_signature(
1554 encrypted_sig, video_id, player_url, age_gate)
1555 url += '&signature=' + signature
1556 if 'ratebypass' not in url:
1557 url += '&ratebypass=yes'
c9afb51c 1558
94278f72
YCH
1559 dct = {
1560 'format_id': format_id,
1561 'url': url,
1562 'player_url': player_url,
1563 }
1564 if format_id in self._formats:
1565 dct.update(self._formats[format_id])
3318832e 1566 if format_id in formats_spec:
1567 dct.update(formats_spec[format_id])
94278f72 1568
aabc2be6
S
1569 # Some itags are not included in DASH manifest thus corresponding formats will
1570 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1571 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1572 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1573 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1574
1575 more_fields = {
c9afb51c 1576 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1577 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1578 'width': width,
1579 'height': height,
1580 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1581 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1582 }
94278f72
YCH
1583 for key, value in more_fields.items():
1584 if value:
1585 dct[key] = value
aabc2be6
S
1586 type_ = url_data.get('type', [None])[0]
1587 if type_:
1588 type_split = type_.split(';')
1589 kind_ext = type_split[0].split('/')
1590 if len(kind_ext) == 2:
94278f72
YCH
1591 kind, _ = kind_ext
1592 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1593 if kind in ('audio', 'video'):
1594 codecs = None
1595 for mobj in re.finditer(
1596 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1597 if mobj.group('key') == 'codecs':
1598 codecs = mobj.group('val')
1599 break
1600 if codecs:
1601 codecs = codecs.split(',')
1602 if len(codecs) == 2:
cc28492d 1603 acodec, vcodec = codecs[1], codecs[0]
aabc2be6
S
1604 else:
1605 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1606 dct.update({
1607 'acodec': acodec,
1608 'vcodec': vcodec,
1609 })
aabc2be6 1610 formats.append(dct)
1d043b93
JMF
1611 elif video_info.get('hlsvp'):
1612 manifest_url = video_info['hlsvp'][0]
1613 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1614 formats = _map_to_format_list(url_map)
ac5a69af
YCH
1615 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1616 for a_format in formats:
049d71d8 1617 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
c5e8d7af 1618 else:
8ceabd4d
S
1619 unavailable_message = self._html_search_regex(
1620 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1621 video_webpage, 'unavailable message', default=None)
1622 if unavailable_message:
1623 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1624 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1625
dd27fd17 1626 # Look for the DASH manifest
203fb43f 1627 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1628 dash_mpd_fatal = True
8ff648e4 1629 for mpd_url in dash_mpds:
d8d24a92 1630 dash_formats = {}
774e208f 1631 try:
05d0d131
YCH
1632 def decrypt_sig(mobj):
1633 s = mobj.group(1)
1634 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1635 return '/signature/%s' % dec_s
1636
8ff648e4 1637 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1638
8ff648e4 1639 for df in self._extract_mpd_formats(
1640 mpd_url, video_id, fatal=dash_mpd_fatal,
1641 formats_dict=self._formats):
d8d24a92
S
1642 # Do not overwrite DASH format found in some previous DASH manifest
1643 if df['format_id'] not in dash_formats:
1644 dash_formats[df['format_id']] = df
77c6fb5b
S
1645 # Additional DASH manifests may end up in HTTP Error 403 therefore
1646 # allow them to fail without bug report message if we already have
1647 # some DASH manifest succeeded. This is temporary workaround to reduce
1648 # burst of bug reports until we figure out the reason and whether it
1649 # can be fixed at all.
1650 dash_mpd_fatal = False
774e208f
PH
1651 except (ExtractorError, KeyError) as e:
1652 self.report_warning(
1653 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1654 if dash_formats:
04b3b3df
JMF
1655 # Remove the formats we found through non-DASH, they
1656 # contain less info and it can be wrong, because we use
1657 # fixed values (for example the resolution). See
1658 # https://github.com/rg3/youtube-dl/issues/5774 for an
1659 # example.
d80265cc 1660 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1661 formats.extend(dash_formats.values())
d80044c2 1662
6271f1ca
PH
1663 # Check for malformed aspect ratio
1664 stretched_m = re.search(
1665 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1666 video_webpage)
1667 if stretched_m:
313dfc45
LL
1668 w = float(stretched_m.group('w'))
1669 h = float(stretched_m.group('h'))
5faf9fed
S
1670 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1671 # We will only process correct ratios.
313dfc45 1672 if w > 0 and h > 0:
41f24c32 1673 ratio = w / h
313dfc45
LL
1674 for f in formats:
1675 if f.get('vcodec') != 'none':
1676 f['stretched_ratio'] = ratio
6271f1ca 1677
4bcc7bd1 1678 self._sort_formats(formats)
4ea3be0a 1679
d77ab8e2
S
1680 self.mark_watched(video_id, video_info)
1681
4ea3be0a 1682 return {
8bcc8756
JW
1683 'id': video_id,
1684 'uploader': video_uploader,
1685 'uploader_id': video_uploader_id,
fd050249 1686 'uploader_url': video_uploader_url,
8bcc8756 1687 'upload_date': upload_date,
7caf9830 1688 'license': video_license,
0cb58b02 1689 'creator': video_creator,
8bcc8756 1690 'title': video_title,
0cb58b02 1691 'alt_title': video_alt_title,
8bcc8756
JW
1692 'thumbnail': video_thumbnail,
1693 'description': video_description,
1694 'categories': video_categories,
000b6b5a 1695 'tags': video_tags,
8bcc8756 1696 'subtitles': video_subtitles,
360e1ca5 1697 'automatic_captions': automatic_captions,
8bcc8756
JW
1698 'duration': video_duration,
1699 'age_limit': 18 if age_gate else 0,
1700 'annotations': video_annotations,
7e8c0af0 1701 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1702 'view_count': view_count,
4ea3be0a 1703 'like_count': like_count,
1704 'dislike_count': dislike_count,
2d30521a 1705 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1706 'formats': formats,
2fe1ff85 1707 'is_live': is_live,
7c80519c 1708 'start_time': start_time,
297a564b 1709 'end_time': end_time,
4ea3be0a 1710 }
c5e8d7af 1711
5f6a1245 1712
8e7aad20 1713class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1714 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1715 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1716 (?:https?://)?
1717 (?:\w+\.)?
1718 youtube\.com/
1719 (?:
ac7553d0 1720 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
2e1b9285 1721 \? (?:.*?[&;])*? (?:p|a|list)=
c5e8d7af
PH
1722 | p/
1723 )
d67cc9fa 1724 (
99209c29 1725 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
5f6a1245 1726 # Top tracks, they can also include dots
d67cc9fa
JMF
1727 |(?:MC)[\w\.]*
1728 )
c5e8d7af
PH
1729 .*
1730 |
99209c29 1731 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
c5e8d7af 1732 )"""
dbb94fb0 1733 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 1734 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 1735 IE_NAME = 'youtube:playlist'
81127aa5
PH
1736 _TESTS = [{
1737 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1738 'info_dict': {
1739 'title': 'ytdl test PL',
a1cf99d0 1740 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1741 },
1742 'playlist_count': 3,
9291475f
PH
1743 }, {
1744 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1745 'info_dict': {
acf757f4 1746 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1747 'title': 'YDL_Empty_List',
1748 },
1749 'playlist_count': 0,
1750 }, {
1751 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1752 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1753 'info_dict': {
1754 'title': '29C3: Not my department',
acf757f4 1755 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1756 },
1757 'playlist_count': 95,
1758 }, {
1759 'note': 'issue #673',
1760 'url': 'PLBB231211A4F62143',
1761 'info_dict': {
f46a8702 1762 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1763 'id': 'PLBB231211A4F62143',
9291475f
PH
1764 },
1765 'playlist_mincount': 26,
1766 }, {
1767 'note': 'Large playlist',
1768 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1769 'info_dict': {
1770 'title': 'Uploads from Cauchemar',
acf757f4 1771 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1772 },
1773 'playlist_mincount': 799,
1774 }, {
1775 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1776 'info_dict': {
1777 'title': 'YDL_safe_search',
acf757f4 1778 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1779 },
1780 'playlist_count': 2,
ac7553d0
PH
1781 }, {
1782 'note': 'embedded',
1783 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1784 'playlist_count': 4,
1785 'info_dict': {
1786 'title': 'JODA15',
acf757f4 1787 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1788 }
6b08cdf6
PH
1789 }, {
1790 'note': 'Embedded SWF player',
1791 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1792 'playlist_count': 4,
1793 'info_dict': {
1794 'title': 'JODA7',
acf757f4 1795 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1796 }
4b7df0d3
JMF
1797 }, {
1798 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1799 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1800 'info_dict': {
acf757f4
PH
1801 'title': 'Uploads from Interstellar Movie',
1802 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3
JMF
1803 },
1804 'playlist_mincout': 21,
81127aa5 1805 }]
c5e8d7af 1806
880e1c52
JMF
1807 def _real_initialize(self):
1808 self._login()
1809
652cdaa2 1810 def _extract_mix(self, playlist_id):
99209c29 1811 # The mixes are generated from a single video
652cdaa2 1812 # the id of the playlist is just 'RD' + video_id
7d4afc55 1813 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
c9cc0bf5 1814 webpage = self._download_webpage(
78caa52a 1815 url, playlist_id, 'Downloading Youtube mix')
bc2f773b 1816 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
1817 title_span = (
1818 search_title('playlist-title') or
1819 search_title('title long-title') or
1820 search_title('title'))
76d1700b 1821 title = clean_html(title_span)
c9cc0bf5
PH
1822 ids = orderedSet(re.findall(
1823 r'''(?xs)data-video-username=".*?".*?
1824 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1825 webpage))
652cdaa2
JMF
1826 url_results = self._ids_to_results(ids)
1827
1828 return self.playlist_result(url_results, playlist_id, title)
1829
448830ce 1830 def _extract_playlist(self, playlist_id):
dbb94fb0
S
1831 url = self._TEMPLATE_URL % playlist_id
1832 page = self._download_webpage(url, playlist_id)
dbb94fb0 1833
39b62db1
YCH
1834 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1835 match = match.strip()
1836 # Check if the playlist exists or is private
1837 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1838 raise ExtractorError(
1839 'The playlist doesn\'t exist or is private, use --username or '
1840 '--netrc to access it.',
1841 expected=True)
1842 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1843 raise ExtractorError(
1844 'Invalid parameters. Maybe URL is incorrect.',
1845 expected=True)
1846 elif re.match(r'[^<]*Choose your language[^<]*', match):
1847 continue
1848 else:
1849 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 1850
dbb94fb0 1851 playlist_title = self._html_search_regex(
63b4295d 1852 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
78caa52a 1853 page, 'title')
c5e8d7af 1854
648e6a1f 1855 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 1856
ebf1b291 1857 def _check_download_just_video(self, url, playlist_id):
448830ce
S
1858 # Check if it's a video-specific URL
1859 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1860 if 'v' in query_dict:
1861 video_id = query_dict['v'][0]
1862 if self._downloader.params.get('noplaylist'):
1863 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1864 return self.url_result(video_id, 'Youtube', video_id=video_id)
1865 else:
1866 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1867
ebf1b291
S
1868 def _real_extract(self, url):
1869 # Extract playlist id
1870 mobj = re.match(self._VALID_URL, url)
1871 if mobj is None:
1872 raise ExtractorError('Invalid URL: %s' % url)
1873 playlist_id = mobj.group(1) or mobj.group(2)
1874
1875 video = self._check_download_just_video(url, playlist_id)
1876 if video:
1877 return video
1878
448830ce
S
1879 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1880 # Mixes require a custom extraction process
1881 return self._extract_mix(playlist_id)
1882
1883 return self._extract_playlist(playlist_id)
1884
c5e8d7af 1885
648e6a1f 1886class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1887 IE_DESC = 'YouTube.com channels'
9ff67727 1888 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 1889 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 1890 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 1891 IE_NAME = 'youtube:channel'
cdc628a4
PH
1892 _TESTS = [{
1893 'note': 'paginated channel',
1894 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1895 'playlist_mincount': 91,
acf757f4 1896 'info_dict': {
9170ca5b
JMF
1897 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1898 'title': 'Uploads from lex will',
acf757f4 1899 }
5c43afd4
JMF
1900 }, {
1901 'note': 'Age restricted channel',
1902 # from https://www.youtube.com/user/DeusExOfficial
1903 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1904 'playlist_mincount': 64,
1905 'info_dict': {
1906 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1907 'title': 'Uploads from Deus Ex',
1908 },
cdc628a4 1909 }]
c5e8d7af 1910
e462474e
S
1911 @classmethod
1912 def suitable(cls, url):
1913 return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
1914
c5e8d7af 1915 def _real_extract(self, url):
9ff67727 1916 channel_id = self._match_id(url)
c5e8d7af 1917
eb0f3e7e 1918 url = self._TEMPLATE_URL % channel_id
386bdfa6
S
1919
1920 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1921 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1922 # otherwise fallback on channel by page extraction
1923 channel_page = self._download_webpage(
1924 url + '?view=57', channel_id,
1925 'Downloading channel page', fatal=False)
2b3c2546
PH
1926 if channel_page is False:
1927 channel_playlist_id = False
1928 else:
1929 channel_playlist_id = self._html_search_meta(
1930 'channelId', channel_page, 'channel id', default=None)
1931 if not channel_playlist_id:
1932 channel_playlist_id = self._search_regex(
5c43afd4 1933 r'data-(?:channel-external-|yt)id="([^"]+)"',
2b3c2546 1934 channel_page, 'channel id', default=None)
386bdfa6
S
1935 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1936 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
1937 return self.url_result(
1938 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 1939
60bf45c8 1940 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
1941 autogenerated = re.search(r'''(?x)
1942 class="[^"]*?(?:
1943 channel-header-autogenerated-label|
1944 yt-channel-title-autogenerated
1945 )[^"]*"''', channel_page) is not None
c5e8d7af 1946
b9643eed
JMF
1947 if autogenerated:
1948 # The videos are contained in a single page
1949 # the ajax pages can't be used, they are empty
b82f815f 1950 entries = [
fb69240c
S
1951 self.url_result(
1952 video_id, 'Youtube', video_id=video_id,
1953 video_title=video_title)
8f02ad4f 1954 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
1955 return self.playlist_result(entries, channel_id)
1956
648e6a1f 1957 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
1958
1959
eb0f3e7e 1960class YoutubeUserIE(YoutubeChannelIE):
78caa52a 1961 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
70029bc3 1962 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
eb0f3e7e 1963 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
78caa52a 1964 IE_NAME = 'youtube:user'
c5e8d7af 1965
cdc628a4
PH
1966 _TESTS = [{
1967 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1968 'playlist_mincount': 320,
1969 'info_dict': {
1970 'title': 'TheLinuxFoundation',
1971 }
1972 }, {
1973 'url': 'ytuser:phihag',
1974 'only_matching': True,
1975 }]
1976
e3ea4790 1977 @classmethod
f4b05232 1978 def suitable(cls, url):
e3ea4790
JMF
1979 # Don't return True if the url can be extracted with other youtube
1980 # extractor, the regex would is too permissive and it would match.
1981 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
5f6a1245
JW
1982 if any(ie.suitable(url) for ie in other_ies):
1983 return False
1984 else:
1985 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 1986
b05654f0 1987
e462474e
S
1988class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1989 IE_DESC = 'YouTube.com user/channel playlists'
1990 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
1991 IE_NAME = 'youtube:playlists'
0c148415 1992
e568c223 1993 _TESTS = [{
0c148415
S
1994 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1995 'playlist_mincount': 4,
1996 'info_dict': {
1997 'id': 'ThirstForScience',
1998 'title': 'Thirst for Science',
1999 },
e568c223
S
2000 }, {
2001 # with "Load more" button
2002 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2003 'playlist_mincount': 70,
2004 'info_dict': {
2005 'id': 'igorkle1',
2006 'title': 'Игорь Клейнер',
2007 },
e462474e
S
2008 }, {
2009 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2010 'playlist_mincount': 17,
2011 'info_dict': {
2012 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2013 'title': 'Chem Player',
2014 },
e568c223 2015 }]
0c148415
S
2016
2017
b4c08069 2018class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2019 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2020 # there doesn't appear to be a real limit, for example if you search for
2021 # 'python' you get more than 8.000.000 results
2022 _MAX_RESULTS = float('inf')
78caa52a 2023 IE_NAME = 'youtube:search'
b05654f0 2024 _SEARCH_KEY = 'ytsearch'
b4c08069 2025 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2026 _TESTS = []
b05654f0 2027
b05654f0
PH
2028 def _get_n_results(self, query, n):
2029 """Get a specified number of results for a query"""
2030
b4c08069 2031 videos = []
b05654f0
PH
2032 limit = n
2033
b4c08069
JMF
2034 for pagenum in itertools.count(1):
2035 url_query = {
02175a79 2036 'search_query': query.encode('utf-8'),
b4c08069
JMF
2037 'page': pagenum,
2038 'spf': 'navigate',
2039 }
2040 url_query.update(self._EXTRA_QUERY_ARGS)
2041 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
2042 data = self._download_json(
69ea8ca4 2043 result_url, video_id='query "%s"' % query,
b4c08069 2044 note='Downloading page %s' % pagenum,
69ea8ca4 2045 errnote='Unable to download API page')
b4c08069 2046 html_content = data[1]['body']['content']
7cc3570e 2047
b4c08069 2048 if 'class="search-message' in html_content:
07ad22b8 2049 raise ExtractorError(
78caa52a 2050 '[youtube] No video results', expected=True)
b05654f0 2051
b4c08069
JMF
2052 new_videos = self._ids_to_results(orderedSet(re.findall(
2053 r'href="/watch\?v=(.{11})', html_content)))
2054 videos += new_videos
2055 if not new_videos or len(videos) > limit:
2056 break
b05654f0 2057
b4c08069
JMF
2058 if len(videos) > n:
2059 videos = videos[:n]
b05654f0 2060 return self.playlist_result(videos, query)
75dff0ee 2061
c9ae7b95 2062
a3dd9248 2063class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2064 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2065 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2066 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2067 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2068
c9ae7b95
PH
2069
2070class YoutubeSearchURLIE(InfoExtractor):
78caa52a
PH
2071 IE_DESC = 'YouTube.com search URLs'
2072 IE_NAME = 'youtube:search_url'
d2c1f79f 2073 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
2074 _TESTS = [{
2075 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2076 'playlist_mincount': 5,
2077 'info_dict': {
2078 'title': 'youtube-dl test video',
2079 }
d2c1f79f
S
2080 }, {
2081 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2082 'only_matching': True,
cdc628a4 2083 }]
c9ae7b95
PH
2084
2085 def _real_extract(self, url):
2086 mobj = re.match(self._VALID_URL, url)
7fd002c0 2087 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95
PH
2088
2089 webpage = self._download_webpage(url, query)
2090 result_code = self._search_regex(
98998cde 2091 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
c9ae7b95
PH
2092
2093 part_codes = re.findall(
f74a7348 2094 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
c9ae7b95
PH
2095 entries = []
2096 for part_code in part_codes:
2097 part_title = self._html_search_regex(
6feb2d5e 2098 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
c9ae7b95
PH
2099 part_url_snippet = self._html_search_regex(
2100 r'(?s)href="([^"]+)"', part_code, 'item URL')
2101 part_url = compat_urlparse.urljoin(
2102 'https://www.youtube.com/', part_url_snippet)
2103 entries.append({
2104 '_type': 'url',
2105 'url': part_url,
2106 'title': part_title,
2107 })
2108
2109 return {
2110 '_type': 'playlist',
2111 'entries': entries,
2112 'title': query,
2113 }
2114
2115
136dadde 2116class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2117 IE_DESC = 'YouTube.com (multi-season) shows'
cdc628a4 2118 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2119 IE_NAME = 'youtube:show'
cdc628a4 2120 _TESTS = [{
4003bd82 2121 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2122 'playlist_mincount': 5,
cdc628a4
PH
2123 'info_dict': {
2124 'id': 'airdisasters',
2125 'title': 'Air Disasters',
2126 }
2127 }]
75dff0ee
JMF
2128
2129 def _real_extract(self, url):
136dadde
S
2130 playlist_id = self._match_id(url)
2131 return super(YoutubeShowIE, self)._real_extract(
2132 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2133
2134
b2e8bc1b 2135class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2136 """
25f14e9f 2137 Base class for feed extractors
d7ae0639
JMF
2138 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2139 """
b2e8bc1b 2140 _LOGIN_REQUIRED = True
d7ae0639
JMF
2141
2142 @property
2143 def IE_NAME(self):
78caa52a 2144 return 'youtube:%s' % self._FEED_NAME
04cc9617 2145
81f0259b 2146 def _real_initialize(self):
b2e8bc1b 2147 self._login()
81f0259b 2148
04cc9617 2149 def _real_extract(self, url):
25f14e9f
S
2150 page = self._download_webpage(
2151 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2152
2153 # The extraction process is the same as for playlists, but the regex
2154 # for the video ids doesn't contain an index
2155 ids = []
2156 more_widget_html = content_html = page
2bc43303
JMF
2157 for page_num in itertools.count(1):
2158 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2159
2160 # 'recommended' feed has infinite 'load more' and each new portion spins
2161 # the same videos in (sometimes) slightly different order, so we'll check
2162 # for unicity and break when portion has no new videos
2163 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2164 if not new_ids:
2165 break
2166
2bc43303
JMF
2167 ids.extend(new_ids)
2168
2169 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2170 if not mobj:
2171 break
2172
2173 more = self._download_json(
25f14e9f 2174 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2175 'Downloading page #%s' % page_num,
2176 transform_source=uppercase_escape)
2177 content_html = more['content_html']
2178 more_widget_html = more['load_more_widget_html']
2179
25f14e9f
S
2180 return self.playlist_result(
2181 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2182
2183
2184class YoutubeWatchLaterIE(YoutubePlaylistIE):
2185 IE_NAME = 'youtube:watchlater'
2186 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
bc7a9cd8 2187 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2188
bc7a9cd8
S
2189 _TESTS = [{
2190 'url': 'https://www.youtube.com/playlist?list=WL',
2191 'only_matching': True,
2192 }, {
2193 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2194 'only_matching': True,
2195 }]
25f14e9f
S
2196
2197 def _real_extract(self, url):
ebf1b291
S
2198 video = self._check_download_just_video(url, 'WL')
2199 if video:
2200 return video
25f14e9f 2201 return self._extract_playlist('WL')
f459d170 2202
5f6a1245 2203
c626a3d9 2204class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2205 IE_NAME = 'youtube:favorites'
f3a34072 2206 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
c7a7750d 2207 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2208 _LOGIN_REQUIRED = True
2209
2210 def _real_extract(self, url):
2211 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2212 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2213 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2214
2215
25f14e9f
S
2216class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2217 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2218 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2219 _FEED_NAME = 'recommended'
2220 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2221
1ed5b5c9 2222
25f14e9f
S
2223class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2224 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2225 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2226 _FEED_NAME = 'subscriptions'
2227 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2228
1ed5b5c9 2229
25f14e9f
S
2230class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2231 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2232 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2233 _FEED_NAME = 'history'
2234 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2235
2236
15870e90
PH
2237class YoutubeTruncatedURLIE(InfoExtractor):
2238 IE_NAME = 'youtube:truncated_url'
2239 IE_DESC = False # Do not list
975d35db 2240 _VALID_URL = r'''(?x)
b95aab84
PH
2241 (?:https?://)?
2242 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2243 (?:watch\?(?:
c4808c60 2244 feature=[a-z_]+|
b95aab84
PH
2245 annotation_id=annotation_[^&]+|
2246 x-yt-cl=[0-9]+|
c1708b89 2247 hl=[^&]*|
287be8c6 2248 t=[0-9]+
b95aab84
PH
2249 )?
2250 |
2251 attribution_link\?a=[^&]+
2252 )
2253 $
975d35db 2254 '''
15870e90 2255
c4808c60
PH
2256 _TESTS = [{
2257 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2258 'only_matching': True,
dc2fc736
PH
2259 }, {
2260 'url': 'http://www.youtube.com/watch?',
2261 'only_matching': True,
b95aab84
PH
2262 }, {
2263 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2264 'only_matching': True,
2265 }, {
2266 'url': 'https://www.youtube.com/watch?feature=foo',
2267 'only_matching': True,
c1708b89
PH
2268 }, {
2269 'url': 'https://www.youtube.com/watch?hl=en-GB',
2270 'only_matching': True,
287be8c6
PH
2271 }, {
2272 'url': 'https://www.youtube.com/watch?t=2372',
2273 'only_matching': True,
c4808c60
PH
2274 }]
2275
15870e90
PH
2276 def _real_extract(self, url):
2277 raise ExtractorError(
78caa52a
PH
2278 'Did you forget to quote the URL? Remember that & is a meta '
2279 'character in most shells, so you want to put the URL in quotes, '
2280 'like youtube-dl '
2281 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2282 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2283 expected=True)
772fd5cc
PH
2284
2285
2286class YoutubeTruncatedIDIE(InfoExtractor):
2287 IE_NAME = 'youtube:truncated_id'
2288 IE_DESC = False # Do not list
b95aab84 2289 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2290
2291 _TESTS = [{
2292 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2293 'only_matching': True,
2294 }]
2295
2296 def _real_extract(self, url):
2297 video_id = self._match_id(url)
2298 raise ExtractorError(
2299 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2300 expected=True)