]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Fix authentication (#10392)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
c5e8d7af 19 compat_parse_qs,
7fd002c0
S
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
c5e8d7af 25 compat_str,
4bb4a188
PH
26)
27from ..utils import (
c5e8d7af 28 clean_html,
9b9c5355 29 error_to_compat_str,
c5e8d7af 30 ExtractorError,
2d30521a 31 float_or_none,
4bb4a188
PH
32 get_element_by_attribute,
33 get_element_by_id,
dd27fd17 34 int_or_none,
94278f72 35 mimetype2ext,
4bb4a188 36 orderedSet,
7c80519c 37 parse_duration,
0cb58b02 38 remove_quotes,
041bc3ad 39 remove_start,
5c2266df 40 sanitized_Request,
cf7e015f 41 smuggle_url,
c93d53f5 42 str_to_int,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
81c2f20b 46 uppercase_escape,
6e6bc8da 47 urlencode_postdata,
af214c3a 48 ISO3166Utils,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e298d3a0 56 _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
b2e8bc1b
JMF
57 _NETRC_MACHINE = 'youtube'
58 # If True it will raise an error if no login info is provided
59 _LOGIN_REQUIRED = False
60
b2e8bc1b 61 def _set_language(self):
810fb84d
PH
62 self._set_cookie(
63 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 64 # YouTube sets the expire time to about two months
810fb84d 65 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 66
25f14e9f
S
67 def _ids_to_results(self, ids):
68 return [
69 self.url_result(vid_id, 'Youtube', video_id=vid_id)
70 for vid_id in ids]
71
b2e8bc1b 72 def _login(self):
83317f69 73 """
74 Attempt to log in to YouTube.
75 True is returned if successful or skipped.
76 False is returned if login failed.
77
78 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
79 """
b2e8bc1b
JMF
80 (username, password) = self._get_login_info()
81 # No authentication to be performed
82 if username is None:
83 if self._LOGIN_REQUIRED:
69ea8ca4 84 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 85 return True
b2e8bc1b 86
7cc3570e
PH
87 login_page = self._download_webpage(
88 self._LOGIN_URL, None,
69ea8ca4
PH
89 note='Downloading login page',
90 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
91 if login_page is False:
92 return
b2e8bc1b 93
1212e997 94 login_form = self._hidden_inputs(login_page)
c5e8d7af 95
1212e997 96 login_form.update({
8bcc8756 97 'Email': username,
8bcc8756 98 'Passwd': password,
1212e997 99 })
83317f69 100
7cc3570e 101 login_results = self._download_webpage(
e298d3a0
S
102 self._PASSWORD_CHALLENGE_URL, None,
103 note='Logging in', errnote='unable to log in', fatal=False,
1212e997 104 data=urlencode_postdata(login_form))
7cc3570e
PH
105 if login_results is False:
106 return False
83317f69 107
494ab6db
S
108 error_msg = self._html_search_regex(
109 r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
110 login_results, 'error message', default=None)
111 if error_msg:
112 raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
113
83317f69 114 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 115 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 116
117 # Two-Factor
118 # TODO add SMS and phone call support - these require making a request and then prompting the user
119
e9fb6a4b 120 if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
041bc3ad 121 tfa_code = self._get_tfa_info('2-step verification code')
83317f69 122
041bc3ad
S
123 if not tfa_code:
124 self._downloader.report_warning(
125 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
126 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 127 return False
128
041bc3ad
S
129 tfa_code = remove_start(tfa_code, 'G-')
130
131 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
132
133 tfa_form_strs.update({
9303ce3e 134 'Pin': tfa_code,
135 'TrustDevice': 'on',
041bc3ad
S
136 })
137
6e6bc8da 138 tfa_data = urlencode_postdata(tfa_form_strs)
83317f69 139
5c2266df 140 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
83317f69 141 tfa_results = self._download_webpage(
142 tfa_req, None,
69ea8ca4 143 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 144
145 if tfa_results is False:
146 return False
147
e9fb6a4b 148 if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
041bc3ad 149 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
83317f69 150 return False
e9fb6a4b 151 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 152 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 153 return False
154 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 155 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 156 return False
157
e9fb6a4b 158 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
69ea8ca4 159 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
160 return False
161 return True
162
b2e8bc1b
JMF
163 def _real_initialize(self):
164 if self._downloader is None:
165 return
42939b61 166 self._set_language()
b2e8bc1b
JMF
167 if not self._login():
168 return
c5e8d7af 169
8377574c 170
8e7aad20 171class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 172 # Extract entries from page with "Load more" button
648e6a1f
S
173 def _entries(self, page, playlist_id):
174 more_widget_html = content_html = page
175 for page_num in itertools.count(1):
061a75ed
S
176 for entry in self._process_page(content_html):
177 yield entry
648e6a1f
S
178
179 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
180 if not mobj:
181 break
182
183 more = self._download_json(
184 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
185 'Downloading page #%s' % page_num,
186 transform_source=uppercase_escape)
187 content_html = more['content_html']
188 if not content_html.strip():
189 # Some webpages show a "Load more" button but they don't
190 # have more videos
191 break
192 more_widget_html = more['load_more_widget_html']
193
061a75ed
S
194
195class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
196 def _process_page(self, content):
197 for video_id, video_title in self.extract_videos_from_page(content):
198 yield self.url_result(video_id, 'Youtube', video_id, video_title)
199
648e6a1f
S
200 def extract_videos_from_page(self, page):
201 ids_in_page = []
202 titles_in_page = []
203 for mobj in re.finditer(self._VIDEO_RE, page):
204 # The link with index 0 is not the first video of the playlist (not sure if still actual)
205 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
206 continue
207 video_id = mobj.group('id')
208 video_title = unescapeHTML(mobj.group('title'))
209 if video_title:
210 video_title = video_title.strip()
211 try:
212 idx = ids_in_page.index(video_id)
213 if video_title and not titles_in_page[idx]:
214 titles_in_page[idx] = video_title
215 except ValueError:
216 ids_in_page.append(video_id)
217 titles_in_page.append(video_title)
218 return zip(ids_in_page, titles_in_page)
219
220
061a75ed
S
221class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
222 def _process_page(self, content):
6dee688e
S
223 for playlist_id in orderedSet(re.findall(
224 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
225 content)):
061a75ed
S
226 yield self.url_result(
227 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
228
0c148415
S
229 def _real_extract(self, url):
230 playlist_id = self._match_id(url)
231 webpage = self._download_webpage(url, playlist_id)
0c148415 232 title = self._og_search_title(webpage, fatal=False)
061a75ed 233 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
234
235
360e1ca5 236class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 237 IE_DESC = 'YouTube.com'
cb7dfeea 238 _VALID_URL = r"""(?x)^
c5e8d7af 239 (
edb53e2d 240 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 241 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 242 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 243 (?:www\.)?pwnyoutube\.com/|
f7000f3a 244 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
245 tube\.majestyc\.net/|
246 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
247 (?:.*?\#/)? # handle anchor (#/) redirect urls
248 (?: # the various things that can precede the ID:
ac7553d0 249 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 250 |(?: # or the v= param in all its forms
f7000f3a 251 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 252 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 253 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
254 v=
255 )
f4b05232 256 ))
cbaed4bb
S
257 |(?:
258 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
259 vid\.plus| # or vid.plus/xxxx
260 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 261 )/
edb53e2d 262 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 263 )
c5e8d7af 264 )? # all until now is optional -> you can pass the naked ID
8963d9c2 265 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
9291475f 266 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
267 (?(1).+)? # if we found the ID, everything can follow
268 $"""
c5e8d7af 269 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 270 _formats = {
c2d3cb4c 271 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
272 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
273 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
274 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
275 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
276 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
277 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
278 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 279 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 280 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
281 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
282 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
283 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
284 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
285 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 286 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 287 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
288 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 289
290
291 # 3D videos
c2d3cb4c 292 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
293 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
294 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
295 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 296 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
297 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
298 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 299
96fb5605 300 # Apple HTTP Live Streaming
11f12195 301 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 302 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
303 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
304 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
305 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
306 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 307 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
308 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
309
310 # DASH mp4 video
c2d3cb4c 311 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
312 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
313 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
314 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
315 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
316 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
317 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
a6c2c244
YCH
318 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
319 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
320 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
321 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
836a086c 322
f6f1fc92 323 # Dash mp4 audio
c2d3cb4c 324 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
325 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
326 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
2c347352
S
327 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
328 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
329
330 # Dash webm
a6c2c244
YCH
331 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
332 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
333 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
334 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
335 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
336 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
337 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
338 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
339 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
340 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
341 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
342 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
343 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
344 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
345 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
4c6b4764 346 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
a6c2c244
YCH
347 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
348 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
349 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
350 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
351 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
352 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
2c62dc26
PH
353
354 # Dash webm audio
a6c2c244
YCH
355 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
356 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 357
0857baad 358 # Dash webm audio with opus inside
a6c2c244
YCH
359 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
360 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
361 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
0857baad 362
ce6b9a2d
PH
363 # RTMP (unnamed)
364 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 365 }
23d17e4b 366 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 367
78caa52a 368 IE_NAME = 'youtube'
2eb88d95
PH
369 _TESTS = [
370 {
b67d6314 371 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
372 'info_dict': {
373 'id': 'BaW_jenozKc',
374 'ext': 'mp4',
375 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
376 'uploader': 'Philipp Hagemeister',
377 'uploader_id': 'phihag',
fd050249 378 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 379 'upload_date': '20121002',
7caf9830 380 'license': 'Standard YouTube License',
4bc3a23e
PH
381 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
382 'categories': ['Science & Technology'],
000b6b5a 383 'tags': ['youtube-dl'],
3e7c1224
PH
384 'like_count': int,
385 'dislike_count': int,
7c80519c 386 'start_time': 1,
297a564b 387 'end_time': 9,
2eb88d95 388 }
0e853ca4 389 },
0e853ca4 390 {
4bc3a23e
PH
391 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
392 'note': 'Test generic use_cipher_signature video (#897)',
393 'info_dict': {
394 'id': 'UxxajLWwzqY',
395 'ext': 'mp4',
396 'upload_date': '20120506',
397 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 398 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 399 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
400 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
401 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
402 'iconic ep', 'iconic', 'love', 'it'],
4bc3a23e
PH
403 'uploader': 'Icona Pop',
404 'uploader_id': 'IconaPop',
fd050249 405 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 406 'license': 'Standard YouTube License',
0cb58b02 407 'creator': 'Icona Pop',
2eb88d95 408 }
c108eb73
JMF
409 },
410 {
4bc3a23e
PH
411 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
412 'note': 'Test VEVO video with age protection (#956)',
413 'info_dict': {
414 'id': '07FYdnEawAQ',
415 'ext': 'mp4',
416 'upload_date': '20130703',
417 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 418 'alt_title': 'Tunnel Vision',
4bc3a23e
PH
419 'description': 'md5:64249768eec3bc4276236606ea996373',
420 'uploader': 'justintimberlakeVEVO',
421 'uploader_id': 'justintimberlakeVEVO',
fd050249 422 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 423 'license': 'Standard YouTube License',
0cb58b02 424 'creator': 'Justin Timberlake',
34952f09 425 'age_limit': 18,
c108eb73
JMF
426 }
427 },
fccd3771 428 {
4bc3a23e
PH
429 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
430 'note': 'Embed-only video (#1746)',
431 'info_dict': {
432 'id': 'yZIXLfi8CZQ',
433 'ext': 'mp4',
434 'upload_date': '20120608',
435 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
436 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
437 'uploader': 'SET India',
94bfcd23 438 'uploader_id': 'setindia',
fd050249 439 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 440 'license': 'Standard YouTube License',
94bfcd23 441 'age_limit': 18,
fccd3771
PH
442 }
443 },
11b56058 444 {
b67d6314 445 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
446 'note': 'Use the first video ID in the URL',
447 'info_dict': {
448 'id': 'BaW_jenozKc',
449 'ext': 'mp4',
450 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
451 'uploader': 'Philipp Hagemeister',
452 'uploader_id': 'phihag',
fd050249 453 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 454 'upload_date': '20121002',
7caf9830 455 'license': 'Standard YouTube License',
11b56058
PM
456 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
457 'categories': ['Science & Technology'],
458 'tags': ['youtube-dl'],
459 'like_count': int,
460 'dislike_count': int,
34a7de29
S
461 },
462 'params': {
463 'skip_download': True,
464 },
11b56058 465 },
dd27fd17 466 {
4bc3a23e
PH
467 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
468 'note': '256k DASH audio (format 141) via DASH manifest',
469 'info_dict': {
470 'id': 'a9LDPn-MO4I',
471 'ext': 'm4a',
472 'upload_date': '20121002',
473 'uploader_id': '8KVIDEO',
fd050249 474 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
475 'description': '',
476 'uploader': '8KVIDEO',
7caf9830 477 'license': 'Standard YouTube License',
4bc3a23e 478 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 479 },
4bc3a23e
PH
480 'params': {
481 'youtube_include_dash_manifest': True,
482 'format': '141',
4919603f 483 },
de3c7fe0 484 'skip': 'format 141 not served anymore',
dd27fd17 485 },
3489b7d2
JMF
486 # DASH manifest with encrypted signature
487 {
78caa52a
PH
488 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
489 'info_dict': {
490 'id': 'IB3lcPjvWLA',
491 'ext': 'm4a',
b766eb27
JMF
492 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
493 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
78caa52a
PH
494 'uploader': 'AfrojackVEVO',
495 'uploader_id': 'AfrojackVEVO',
496 'upload_date': '20131011',
7caf9830 497 'license': 'Standard YouTube License',
3489b7d2 498 },
4bc3a23e 499 'params': {
78caa52a 500 'youtube_include_dash_manifest': True,
de3c7fe0 501 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
502 },
503 },
aaeb86f6
S
504 # JS player signature function name containing $
505 {
506 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
507 'info_dict': {
508 'id': 'nfWlot6h_JM',
509 'ext': 'm4a',
510 'title': 'Taylor Swift - Shake It Off',
0cb58b02 511 'alt_title': 'Shake It Off',
f57b7835 512 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
aaeb86f6
S
513 'uploader': 'TaylorSwiftVEVO',
514 'uploader_id': 'TaylorSwiftVEVO',
515 'upload_date': '20140818',
7caf9830 516 'license': 'Standard YouTube License',
0cb58b02 517 'creator': 'Taylor Swift',
aaeb86f6
S
518 },
519 'params': {
520 'youtube_include_dash_manifest': True,
de3c7fe0 521 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
522 },
523 },
aa79ac0c
PH
524 # Controversy video
525 {
526 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
527 'info_dict': {
528 'id': 'T4XJQO3qol8',
529 'ext': 'mp4',
530 'upload_date': '20100909',
531 'uploader': 'The Amazing Atheist',
532 'uploader_id': 'TheAmazingAtheist',
fd050249 533 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 534 'license': 'Standard YouTube License',
aa79ac0c
PH
535 'title': 'Burning Everyone\'s Koran',
536 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
537 }
c522adb1
JMF
538 },
539 # Normal age-gate video (No vevo, embed allowed)
540 {
541 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
542 'info_dict': {
543 'id': 'HtVdAasjOgU',
544 'ext': 'mp4',
545 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
9ed99402 546 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
c522adb1
JMF
547 'uploader': 'The Witcher',
548 'uploader_id': 'WitcherGame',
fd050249 549 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 550 'upload_date': '20140605',
7caf9830 551 'license': 'Standard YouTube License',
34952f09 552 'age_limit': 18,
c522adb1
JMF
553 },
554 },
fccae2b9
S
555 # Age-gate video with encrypted signature
556 {
557 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
558 'info_dict': {
559 'id': '6kLq3WMV1nU',
560 'ext': 'mp4',
561 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
562 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
563 'uploader': 'LloydVEVO',
564 'uploader_id': 'LloydVEVO',
fd050249 565 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 566 'upload_date': '20110629',
7caf9830 567 'license': 'Standard YouTube License',
34952f09 568 'age_limit': 18,
fccae2b9
S
569 },
570 },
774e208f
PH
571 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
572 {
573 'url': '__2ABJjxzNo',
574 'info_dict': {
575 'id': '__2ABJjxzNo',
576 'ext': 'mp4',
577 'upload_date': '20100430',
578 'uploader_id': 'deadmau5',
fd050249 579 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 580 'creator': 'deadmau5',
774e208f
PH
581 'description': 'md5:12c56784b8032162bb936a5f76d55360',
582 'uploader': 'deadmau5',
7caf9830 583 'license': 'Standard YouTube License',
774e208f 584 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 585 'alt_title': 'Some Chords',
774e208f
PH
586 },
587 'expected_warnings': [
588 'DASH manifest missing',
589 ]
e52a40ab
PH
590 },
591 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
592 {
593 'url': 'lqQg6PlCWgI',
594 'info_dict': {
595 'id': 'lqQg6PlCWgI',
596 'ext': 'mp4',
90227264 597 'upload_date': '20150827',
cbe2bd91 598 'uploader_id': 'olympic',
fd050249 599 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 600 'license': 'Standard YouTube License',
cbe2bd91 601 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 602 'uploader': 'Olympic',
cbe2bd91
PH
603 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
604 },
605 'params': {
606 'skip_download': 'requires avconv',
e52a40ab 607 }
cbe2bd91 608 },
6271f1ca
PH
609 # Non-square pixels
610 {
611 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
612 'info_dict': {
613 'id': '_b-2C3KPAM0',
614 'ext': 'mp4',
615 'stretched_ratio': 16 / 9.,
616 'upload_date': '20110310',
617 'uploader_id': 'AllenMeow',
fd050249 618 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
619 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
620 'uploader': '孫艾倫',
7caf9830 621 'license': 'Standard YouTube License',
6271f1ca
PH
622 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
623 },
06b491eb
S
624 },
625 # url_encoded_fmt_stream_map is empty string
626 {
627 'url': 'qEJwOuvDf7I',
628 'info_dict': {
629 'id': 'qEJwOuvDf7I',
f57b7835 630 'ext': 'webm',
06b491eb
S
631 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
632 'description': '',
633 'upload_date': '20150404',
634 'uploader_id': 'spbelect',
635 'uploader': 'Наблюдатели Петербурга',
636 },
637 'params': {
638 'skip_download': 'requires avconv',
e323cf3f
S
639 },
640 'skip': 'This live event has ended.',
06b491eb 641 },
da77d856
S
642 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
643 {
644 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
645 'info_dict': {
646 'id': 'FIl7x6_3R5Y',
647 'ext': 'mp4',
648 'title': 'md5:7b81415841e02ecd4313668cde88737a',
649 'description': 'md5:116377fd2963b81ec4ce64b542173306',
650 'upload_date': '20150625',
651 'uploader_id': 'dorappi2000',
fd050249 652 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 653 'uploader': 'dorappi2000',
7caf9830 654 'license': 'Standard YouTube License',
be49068d 655 'formats': 'mincount:32',
da77d856 656 },
2ee8f5d8 657 },
8a1a26ce
YCH
658 # DASH manifest with segment_list
659 {
660 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
661 'md5': '8ce563a1d667b599d21064e982ab9e31',
662 'info_dict': {
663 'id': 'CsmdDsKjzN8',
664 'ext': 'mp4',
17ee98e1 665 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
666 'uploader': 'Airtek',
667 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
668 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 669 'license': 'Standard YouTube License',
8a1a26ce
YCH
670 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
671 },
672 'params': {
673 'youtube_include_dash_manifest': True,
674 'format': '135', # bestvideo
be49068d
S
675 },
676 'skip': 'This live event has ended.',
2ee8f5d8 677 },
cf7e015f
S
678 {
679 # Multifeed videos (multiple cameras), URL is for Main Camera
680 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
681 'info_dict': {
682 'id': 'jqWvoWXjCVs',
683 'title': 'teamPGP: Rocket League Noob Stream',
684 'description': 'md5:dc7872fb300e143831327f1bae3af010',
685 },
686 'playlist': [{
687 'info_dict': {
688 'id': 'jqWvoWXjCVs',
689 'ext': 'mp4',
690 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
691 'description': 'md5:dc7872fb300e143831327f1bae3af010',
692 'upload_date': '20150721',
693 'uploader': 'Beer Games Beer',
694 'uploader_id': 'beergamesbeer',
fd050249 695 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 696 'license': 'Standard YouTube License',
cf7e015f
S
697 },
698 }, {
699 'info_dict': {
700 'id': '6h8e8xoXJzg',
701 'ext': 'mp4',
702 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
703 'description': 'md5:dc7872fb300e143831327f1bae3af010',
704 'upload_date': '20150721',
705 'uploader': 'Beer Games Beer',
706 'uploader_id': 'beergamesbeer',
fd050249 707 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 708 'license': 'Standard YouTube License',
cf7e015f
S
709 },
710 }, {
711 'info_dict': {
712 'id': 'PUOgX5z9xZw',
713 'ext': 'mp4',
714 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
715 'description': 'md5:dc7872fb300e143831327f1bae3af010',
716 'upload_date': '20150721',
717 'uploader': 'Beer Games Beer',
718 'uploader_id': 'beergamesbeer',
fd050249 719 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 720 'license': 'Standard YouTube License',
cf7e015f
S
721 },
722 }, {
723 'info_dict': {
724 'id': 'teuwxikvS5k',
725 'ext': 'mp4',
726 'title': 'teamPGP: Rocket League Noob Stream (zim)',
727 'description': 'md5:dc7872fb300e143831327f1bae3af010',
728 'upload_date': '20150721',
729 'uploader': 'Beer Games Beer',
730 'uploader_id': 'beergamesbeer',
fd050249 731 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 732 'license': 'Standard YouTube License',
cf7e015f
S
733 },
734 }],
735 'params': {
736 'skip_download': True,
737 },
cbaed4bb 738 },
f9f49d87
S
739 {
740 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
741 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
742 'info_dict': {
743 'id': 'gVfLd0zydlo',
744 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
745 },
746 'playlist_count': 2,
be49068d 747 'skip': 'Not multifeed anymore',
f9f49d87 748 },
cbaed4bb
S
749 {
750 'url': 'http://vid.plus/FlRa-iH7PGw',
751 'only_matching': True,
0e49d9a6 752 },
6d4fc66b
S
753 {
754 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
755 'only_matching': True,
756 },
0e49d9a6 757 {
61f92af1 758 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
759 # Also tests cut-off URL expansion in video description (see
760 # https://github.com/rg3/youtube-dl/issues/1892,
761 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
762 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
763 'info_dict': {
764 'id': 'lsguqyKfVQg',
765 'ext': 'mp4',
766 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 767 'alt_title': 'Dark Walk',
0e49d9a6
LL
768 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
769 'upload_date': '20151119',
770 'uploader_id': 'IronSoulElf',
fd050249 771 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 772 'uploader': 'IronSoulElf',
7caf9830 773 'license': 'Standard YouTube License',
0cb58b02 774 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
775 },
776 'params': {
777 'skip_download': True,
778 },
779 },
61f92af1
S
780 {
781 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
782 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
783 'only_matching': True,
784 },
313dfc45
LL
785 {
786 # Video with yt:stretch=17:0
787 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
788 'info_dict': {
789 'id': 'Q39EVAstoRM',
790 'ext': 'mp4',
791 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
792 'description': 'md5:ee18a25c350637c8faff806845bddee9',
793 'upload_date': '20151107',
794 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
795 'uploader': 'CH GAMER DROID',
796 },
797 'params': {
798 'skip_download': True,
799 },
be49068d 800 'skip': 'This video does not exist.',
313dfc45 801 },
7caf9830
S
802 {
803 # Video licensed under Creative Commons
804 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
805 'info_dict': {
806 'id': 'M4gD1WSo5mA',
807 'ext': 'mp4',
808 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
809 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
810 'upload_date': '20150127',
811 'uploader_id': 'BerkmanCenter',
fd050249 812 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
7caf9830
S
813 'uploader': 'BerkmanCenter',
814 'license': 'Creative Commons Attribution license (reuse allowed)',
815 },
816 'params': {
817 'skip_download': True,
818 },
819 },
fd050249
S
820 {
821 # Channel-like uploader_url
822 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
823 'info_dict': {
824 'id': 'eQcmzGIKrzg',
825 'ext': 'mp4',
826 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
827 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
828 'upload_date': '20151119',
829 'uploader': 'Bernie 2016',
830 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
831 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
832 'license': 'Creative Commons Attribution license (reuse allowed)',
833 },
834 'params': {
835 'skip_download': True,
836 },
837 },
040ac686
S
838 {
839 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
840 'only_matching': True,
7f29cf54
S
841 },
842 {
843 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
844 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
845 'only_matching': True,
040ac686 846 }
2eb88d95
PH
847 ]
848
e0df6211
PH
849 def __init__(self, *args, **kwargs):
850 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 851 self._player_cache = {}
e0df6211 852
c5e8d7af
PH
853 def report_video_info_webpage_download(self, video_id):
854 """Report attempt to download video info webpage."""
69ea8ca4 855 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 856
c5e8d7af
PH
857 def report_information_extraction(self, video_id):
858 """Report attempt to extract video information."""
69ea8ca4 859 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
860
861 def report_unavailable_format(self, video_id, format):
862 """Report extracted video URL."""
69ea8ca4 863 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
864
865 def report_rtmp_download(self):
866 """Indicate the download will use the RTMP protocol."""
69ea8ca4 867 self.to_screen('RTMP download detected')
c5e8d7af 868
60064c53
PH
869 def _signature_cache_id(self, example_sig):
870 """ Return a string representation of a signature """
78caa52a 871 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
872
873 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 874 id_m = re.match(
50f84a9a 875 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
cf010131 876 player_url)
c081b35c
PH
877 if not id_m:
878 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
879 player_type = id_m.group('ext')
880 player_id = id_m.group('id')
881
c4417ddb 882 # Read from filesystem cache
60064c53
PH
883 func_id = '%s_%s_%s' % (
884 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 885 assert os.path.basename(func_id) == func_id
a0e07d31 886
69ea8ca4 887 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 888 if cache_spec is not None:
78caa52a 889 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 890
6d1a55a5
PH
891 download_note = (
892 'Downloading player %s' % player_url
893 if self._downloader.params.get('verbose') else
894 'Downloading %s player %s' % (player_type, player_id)
895 )
e0df6211
PH
896 if player_type == 'js':
897 code = self._download_webpage(
898 player_url, video_id,
6d1a55a5 899 note=download_note,
69ea8ca4 900 errnote='Download of %s failed' % player_url)
83799698 901 res = self._parse_sig_js(code)
c4417ddb 902 elif player_type == 'swf':
e0df6211
PH
903 urlh = self._request_webpage(
904 player_url, video_id,
6d1a55a5 905 note=download_note,
69ea8ca4 906 errnote='Download of %s failed' % player_url)
e0df6211 907 code = urlh.read()
83799698 908 res = self._parse_sig_swf(code)
e0df6211
PH
909 else:
910 assert False, 'Invalid player type %r' % player_type
911
785521bf
PH
912 test_string = ''.join(map(compat_chr, range(len(example_sig))))
913 cache_res = res(test_string)
914 cache_spec = [ord(c) for c in cache_res]
83799698 915
69ea8ca4 916 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
917 return res
918
60064c53 919 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
920 def gen_sig_code(idxs):
921 def _genslice(start, end, step):
78caa52a 922 starts = '' if start == 0 else str(start)
8bcc8756 923 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 924 steps = '' if step == 1 else (':%d' % step)
78caa52a 925 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
926
927 step = None
7af808a5
PH
928 # Quelch pyflakes warnings - start will be set when step is set
929 start = '(Never used)'
edf3e38e
PH
930 for i, prev in zip(idxs[1:], idxs[:-1]):
931 if step is not None:
932 if i - prev == step:
933 continue
934 yield _genslice(start, prev, step)
935 step = None
936 continue
937 if i - prev in [-1, 1]:
938 step = i - prev
939 start = prev
940 continue
941 else:
78caa52a 942 yield 's[%d]' % prev
edf3e38e 943 if step is None:
78caa52a 944 yield 's[%d]' % i
edf3e38e
PH
945 else:
946 yield _genslice(start, i, step)
947
78caa52a 948 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 949 cache_res = func(test_string)
edf3e38e 950 cache_spec = [ord(c) for c in cache_res]
78caa52a 951 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
952 signature_id_tuple = '(%s)' % (
953 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 954 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 955 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 956 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 957
e0df6211
PH
958 def _parse_sig_js(self, jscode):
959 funcname = self._search_regex(
aaeb86f6 960 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
8bcc8756 961 'Initial JS player signature function name')
2b25cb5d
PH
962
963 jsi = JSInterpreter(jscode)
964 initial_function = jsi.extract_function(funcname)
e0df6211
PH
965 return lambda s: initial_function([s])
966
967 def _parse_sig_swf(self, file_contents):
54256267 968 swfi = SWFInterpreter(file_contents)
78caa52a 969 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 970 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 971 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
972 return lambda s: initial_function([s])
973
83799698 974 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 975 """Turn the encrypted s field into a working signature"""
6b37f0be 976
c8bf86d5 977 if player_url is None:
69ea8ca4 978 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 979
69ea8ca4 980 if player_url.startswith('//'):
78caa52a 981 player_url = 'https:' + player_url
c8bf86d5 982 try:
62af3a0e 983 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
984 if player_id not in self._player_cache:
985 func = self._extract_signature_function(
60064c53 986 video_id, player_url, s
c8bf86d5
PH
987 )
988 self._player_cache[player_id] = func
989 func = self._player_cache[player_id]
990 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 991 self._print_sig_code(func, s)
c8bf86d5
PH
992 return func(s)
993 except Exception as e:
994 tb = traceback.format_exc()
995 raise ExtractorError(
78caa52a 996 'Signature extraction failed: ' + tb, cause=e)
e0df6211 997
360e1ca5 998 def _get_subtitles(self, video_id, webpage):
de7f3446 999 try:
60e47a26 1000 subs_doc = self._download_xml(
38c2e5b8 1001 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1002 video_id, note=False)
1003 except ExtractorError as err:
9b9c5355 1004 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1005 return {}
de7f3446
JMF
1006
1007 sub_lang_list = {}
60e47a26
JMF
1008 for track in subs_doc.findall('track'):
1009 lang = track.attrib['lang_code']
7e660ac1
LD
1010 if lang in sub_lang_list:
1011 continue
360e1ca5 1012 sub_formats = []
23d17e4b 1013 for ext in self._SUBTITLE_FORMATS:
15707c7e 1014 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1015 'lang': lang,
1016 'v': video_id,
1017 'fmt': ext,
1018 'name': track.attrib['name'].encode('utf-8'),
1019 })
1020 sub_formats.append({
1021 'url': 'https://www.youtube.com/api/timedtext?' + params,
1022 'ext': ext,
1023 })
1024 sub_lang_list[lang] = sub_formats
de7f3446 1025 if not sub_lang_list:
69ea8ca4 1026 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1027 return {}
1028 return sub_lang_list
1029
a72778d3
S
1030 def _get_ytplayer_config(self, video_id, webpage):
1031 patterns = (
526b3b07
S
1032 # User data may contain arbitrary character sequences that may affect
1033 # JSON extraction with regex, e.g. when '};' is contained the second
1034 # regex won't capture the whole JSON. Yet working around by trying more
1035 # concrete regex first keeping in mind proper quoted string handling
1036 # to be implemented in future that will replace this workaround (see
1037 # https://github.com/rg3/youtube-dl/issues/7468,
1038 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1039 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1040 r';ytplayer\.config\s*=\s*({.+?});',
1041 )
1042 config = self._search_regex(
1043 patterns, webpage, 'ytplayer.config', default=None)
1044 if config:
1045 return self._parse_json(
1046 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1047
360e1ca5 1048 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1049 """We need the webpage for getting the captions url, pass it as an
1050 argument to speed up the process."""
69ea8ca4 1051 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1052 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1053 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1054 if not player_config:
de7f3446
JMF
1055 self._downloader.report_warning(err_msg)
1056 return {}
de7f3446 1057 try:
0792d563 1058 args = player_config['args']
b78b292f
S
1059 caption_url = args.get('ttsurl')
1060 if caption_url:
1061 timestamp = args['timestamp']
1062 # We get the available subtitles
15707c7e 1063 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1064 'type': 'list',
1065 'tlangs': 1,
1066 'asrs': 1,
1067 })
1068 list_url = caption_url + '&' + list_params
1069 caption_list = self._download_xml(list_url, video_id)
1070 original_lang_node = caption_list.find('track')
1071 if original_lang_node is None:
1072 self._downloader.report_warning('Video doesn\'t have automatic captions')
1073 return {}
1074 original_lang = original_lang_node.attrib['lang_code']
1075 caption_kind = original_lang_node.attrib.get('kind', '')
1076
1077 sub_lang_list = {}
1078 for lang_node in caption_list.findall('target'):
1079 sub_lang = lang_node.attrib['lang_code']
1080 sub_formats = []
1081 for ext in self._SUBTITLE_FORMATS:
15707c7e 1082 params = compat_urllib_parse_urlencode({
b78b292f
S
1083 'lang': original_lang,
1084 'tlang': sub_lang,
1085 'fmt': ext,
1086 'ts': timestamp,
1087 'kind': caption_kind,
1088 })
1089 sub_formats.append({
1090 'url': caption_url + '&' + params,
1091 'ext': ext,
1092 })
1093 sub_lang_list[sub_lang] = sub_formats
1094 return sub_lang_list
1095
1096 # Some videos don't provide ttsurl but rather caption_tracks and
1097 # caption_translation_languages (e.g. 20LmZk1hakA)
1098 caption_tracks = args['caption_tracks']
1099 caption_translation_languages = args['caption_translation_languages']
1100 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
15707c7e 1101 parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
b78b292f 1102 caption_qs = compat_parse_qs(parsed_caption_url.query)
055e6f36
JMF
1103
1104 sub_lang_list = {}
b78b292f
S
1105 for lang in caption_translation_languages.split(','):
1106 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1107 sub_lang = lang_qs.get('lc', [None])[0]
1108 if not sub_lang:
1109 continue
360e1ca5 1110 sub_formats = []
23d17e4b 1111 for ext in self._SUBTITLE_FORMATS:
b78b292f
S
1112 caption_qs.update({
1113 'tlang': [sub_lang],
1114 'fmt': [ext],
360e1ca5 1115 })
b78b292f 1116 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
15707c7e 1117 query=compat_urllib_parse_urlencode(caption_qs, True)))
360e1ca5 1118 sub_formats.append({
b78b292f 1119 'url': sub_url,
360e1ca5
JMF
1120 'ext': ext,
1121 })
1122 sub_lang_list[sub_lang] = sub_formats
055e6f36 1123 return sub_lang_list
de7f3446
JMF
1124 # An extractor error can be raise by the download process if there are
1125 # no automatic captions but there are subtitles
1126 except (KeyError, ExtractorError):
1127 self._downloader.report_warning(err_msg)
1128 return {}
1129
d77ab8e2
S
1130 def _mark_watched(self, video_id, video_info):
1131 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1132 if not playback_url:
1133 return
1134 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1135 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1136
1137 # cpn generation algorithm is reverse engineered from base.js.
1138 # In fact it works even with dummy cpn.
1139 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1140 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1141
1142 qs.update({
1143 'ver': ['2'],
1144 'cpn': [cpn],
1145 })
1146 playback_url = compat_urlparse.urlunparse(
15707c7e 1147 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1148
1149 self._download_webpage(
1150 playback_url, video_id, 'Marking watched',
1151 'Unable to mark watched', fatal=False)
1152
97665381
PH
1153 @classmethod
1154 def extract_id(cls, url):
1155 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1156 if mobj is None:
69ea8ca4 1157 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1158 video_id = mobj.group(2)
1159 return video_id
1160
1d043b93
JMF
1161 def _extract_from_m3u8(self, manifest_url, video_id):
1162 url_map = {}
5f6a1245 1163
1d043b93
JMF
1164 def _get_urls(_manifest):
1165 lines = _manifest.split('\n')
1166 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 1167 lines)
1d043b93 1168 return urls
78caa52a 1169 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
1170 formats_urls = _get_urls(manifest)
1171 for format_url in formats_urls:
890f62e8 1172 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1173 url_map[itag] = format_url
1174 return url_map
1175
1fb07d10
JG
1176 def _extract_annotations(self, video_id):
1177 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1178 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1179
c5e8d7af 1180 def _real_extract(self, url):
cf7e015f
S
1181 url, smuggled_data = unsmuggle_url(url, {})
1182
7e8c0af0 1183 proto = (
78caa52a
PH
1184 'http' if self._downloader.params.get('prefer_insecure', False)
1185 else 'https')
7e8c0af0 1186
7c80519c 1187 start_time = None
297a564b 1188 end_time = None
7c80519c
JMF
1189 parsed_url = compat_urllib_parse_urlparse(url)
1190 for component in [parsed_url.fragment, parsed_url.query]:
1191 query = compat_parse_qs(component)
297a564b 1192 if start_time is None and 't' in query:
7c80519c 1193 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1194 if start_time is None and 'start' in query:
1195 start_time = parse_duration(query['start'][0])
297a564b
JMF
1196 if end_time is None and 'end' in query:
1197 end_time = parse_duration(query['end'][0])
7c80519c 1198
c5e8d7af
PH
1199 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1200 mobj = re.search(self._NEXT_URL_RE, url)
1201 if mobj:
7fd002c0 1202 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1203 video_id = self.extract_id(url)
c5e8d7af
PH
1204
1205 # Get video webpage
aa79ac0c 1206 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1207 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1208
1209 # Attempt to extract SWF player URL
e0df6211 1210 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1211 if mobj is not None:
1212 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1213 else:
1214 player_url = None
1215
d8d24a92
S
1216 dash_mpds = []
1217
1218 def add_dash_mpd(video_info):
1219 dash_mpd = video_info.get('dashmpd')
1220 if dash_mpd and dash_mpd[0] not in dash_mpds:
1221 dash_mpds.append(dash_mpd[0])
1222
c5e8d7af 1223 # Get video info
6449cd80 1224 embed_webpage = None
2fe1ff85 1225 is_live = None
c108eb73 1226 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1227 age_gate = True
1228 # We simulate the access to the video from www.youtube.com/v/{video_id}
1229 # this can be viewed without login into Youtube
beb95e77
CL
1230 url = proto + '://www.youtube.com/embed/%s' % video_id
1231 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1232 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1233 'video_id': video_id,
1234 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1235 'sts': self._search_regex(
beb95e77 1236 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1237 })
7e8c0af0 1238 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1239 video_info_webpage = self._download_webpage(
1240 video_info_url, video_id,
20436c30 1241 note='Refetching age-gated info webpage',
94bd3613 1242 errnote='unable to download video info webpage')
c5e8d7af 1243 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1244 add_dash_mpd(video_info)
c108eb73
JMF
1245 else:
1246 age_gate = False
bc93bdb5 1247 video_info = None
d8d24a92 1248 # Try looking directly into the video webpage
a72778d3
S
1249 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1250 if ytplayer_config:
4e62ebe2 1251 args = ytplayer_config['args']
d8d24a92
S
1252 if args.get('url_encoded_fmt_stream_map'):
1253 # Convert to the same format returned by compat_parse_qs
1254 video_info = dict((k, [v]) for k, v in args.items())
1255 add_dash_mpd(video_info)
2fe1ff85
JMF
1256 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1257 is_live = True
0a3cf9ad
S
1258 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1259 # We also try looking in get_video_info since it may contain different dashmpd
1260 # URL that points to a DASH manifest with possibly different itag set (some itags
1261 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1262 # manifest pointed by get_video_info's dashmpd).
1263 # The general idea is to take a union of itags of both DASH manifests (for example
1264 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1265 self.report_video_info_webpage_download(video_id)
0a3cf9ad 1266 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
1267 video_info_url = (
1268 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1269 % (proto, video_id, el_type))
1270 video_info_webpage = self._download_webpage(
1271 video_info_url,
4e62ebe2
JMF
1272 video_id, note=False,
1273 errnote='unable to download video info webpage')
0a3cf9ad 1274 get_video_info = compat_parse_qs(video_info_webpage)
87dc4511
JMF
1275 if get_video_info.get('use_cipher_signature') != ['True']:
1276 add_dash_mpd(get_video_info)
0a3cf9ad
S
1277 if not video_info:
1278 video_info = get_video_info
1279 if 'token' in get_video_info:
89ea063e
S
1280 # Different get_video_info requests may report different results, e.g.
1281 # some may report video unavailability, but some may serve it without
1282 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1283 # the original webpage as well as el=info and el=embedded get_video_info
1284 # requests report video unavailability due to geo restriction while
1285 # el=detailpage succeeds and returns valid data). This is probably
1286 # due to YouTube measures against IP ranges of hosting providers.
1287 # Working around by preferring the first succeeded video_info containing
1288 # the token if no such video_info yet was found.
44b2264f
S
1289 if 'token' not in video_info:
1290 video_info = get_video_info
4e62ebe2 1291 break
c5e8d7af
PH
1292 if 'token' not in video_info:
1293 if 'reason' in video_info:
af214c3a
YCH
1294 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1295 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
678e436f 1296 if regions_allowed:
af214c3a
YCH
1297 raise ExtractorError('YouTube said: This video is available in %s only' % (
1298 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1299 expected=True)
d11271dd 1300 raise ExtractorError(
78caa52a 1301 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1302 expected=True, video_id=video_id)
c5e8d7af 1303 else:
d11271dd 1304 raise ExtractorError(
78caa52a 1305 '"token" parameter not in video info for unknown reason',
d11271dd 1306 video_id=video_id)
c5e8d7af 1307
cf7e015f
S
1308 # title
1309 if 'title' in video_info:
1310 video_title = video_info['title'][0]
1311 else:
1312 self._downloader.report_warning('Unable to extract video title')
1313 video_title = '_'
1314
1315 # description
1316 video_description = get_element_by_id("eow-description", video_webpage)
1317 if video_description:
1318 video_description = re.sub(r'''(?x)
1319 <a\s+
25cb7a0e 1320 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1321 (?:title|href)="([^"]+)"\s+
25cb7a0e 1322 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1323 class="[^"]*"[^>]*>
23f13e97 1324 [^<]+\.{3}\s*
cf7e015f
S
1325 </a>
1326 ''', r'\1', video_description)
1327 video_description = clean_html(video_description)
1328 else:
1329 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1330 if fd_mobj:
1331 video_description = unescapeHTML(fd_mobj.group(1))
1332 else:
1333 video_description = ''
1334
5e1eddb9
S
1335 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1336 if not self._downloader.params.get('noplaylist'):
1337 entries = []
1338 feed_ids = []
6863631c 1339 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1340 for feed in multifeed_metadata_list.split(','):
6863631c
S
1341 # Unquote should take place before split on comma (,) since textual
1342 # fields may contain comma as well (see
1343 # https://github.com/rg3/youtube-dl/issues/8536)
1344 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1345 entries.append({
1346 '_type': 'url_transparent',
1347 'ie_key': 'Youtube',
1348 'url': smuggle_url(
1349 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1350 {'force_singlefeed': True}),
1351 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1352 })
1353 feed_ids.append(feed_data['id'][0])
1354 self.to_screen(
1355 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1356 % (', '.join(feed_ids), video_id))
1357 return self.playlist_result(entries, video_id, video_title, video_description)
1358 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1359
1d699755
PH
1360 if 'view_count' in video_info:
1361 view_count = int(video_info['view_count'][0])
1362 else:
1363 view_count = None
1364
c5e8d7af
PH
1365 # Check for "rental" videos
1366 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 1367 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
1368
1369 # Start extracting information
1370 self.report_information_extraction(video_id)
1371
1372 # uploader
1373 if 'author' not in video_info:
69ea8ca4 1374 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1375 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1376
1377 # uploader_id
1378 video_uploader_id = None
fd050249
S
1379 video_uploader_url = None
1380 mobj = re.search(
1381 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1382 video_webpage)
c5e8d7af 1383 if mobj is not None:
fd050249
S
1384 video_uploader_id = mobj.group('uploader_id')
1385 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1386 else:
69ea8ca4 1387 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1388
c5e8d7af 1389 # thumbnail image
7763b04e
JMF
1390 # We try first to get a high quality image:
1391 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1392 video_webpage, re.DOTALL)
1393 if m_thumb is not None:
1394 video_thumbnail = m_thumb.group(1)
1395 elif 'thumbnail_url' not in video_info:
69ea8ca4 1396 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1397 video_thumbnail = None
c5e8d7af 1398 else: # don't panic if we can't find it
7fd002c0 1399 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1400
1401 # upload date
9d0b581f
S
1402 upload_date = self._html_search_meta(
1403 'datePublished', video_webpage, 'upload date', default=None)
1404 if not upload_date:
1405 upload_date = self._search_regex(
1406 [r'(?s)id="eow-date.*?>(.*?)</span>',
1407 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1408 video_webpage, 'upload date', default=None)
1409 if upload_date:
1410 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1411 upload_date = unified_strdate(upload_date)
c5e8d7af 1412
7caf9830
S
1413 video_license = self._html_search_regex(
1414 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1415 video_webpage, 'license', default=None)
1416
0cb58b02
S
1417 m_music = re.search(
1418 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1419 video_webpage)
1420 if m_music:
1421 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1422 video_creator = clean_html(m_music.group('creator'))
1423 else:
1424 video_alt_title = video_creator = None
1425
55f7bd2d
PH
1426 m_cat_container = self._search_regex(
1427 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1428 video_webpage, 'categories', default=None)
ec8deefc 1429 if m_cat_container:
ad3bc6ac 1430 category = self._html_search_regex(
01ed5c9b 1431 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1432 default=None)
1433 video_categories = None if category is None else [category]
1434 else:
1435 video_categories = None
ec8deefc 1436
000b6b5a
S
1437 video_tags = [
1438 unescapeHTML(m.group('content'))
1439 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1440
f30a38be 1441 def _extract_count(count_name):
c93d53f5
S
1442 return str_to_int(self._search_regex(
1443 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1444 % re.escape(count_name),
1445 video_webpage, count_name, default=None))
1446
69ea8ca4
PH
1447 like_count = _extract_count('like')
1448 dislike_count = _extract_count('dislike')
336c3a69 1449
c5e8d7af 1450 # subtitles
d82134c3 1451 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1452 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af
PH
1453
1454 if 'length_seconds' not in video_info:
69ea8ca4 1455 self._downloader.report_warning('unable to extract video duration')
b466b702 1456 video_duration = None
c5e8d7af 1457 else:
7fd002c0 1458 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 1459
1fb07d10
JG
1460 # annotations
1461 video_annotations = None
1462 if self._downloader.params.get('writeannotations', False):
5f6a1245 1463 video_annotations = self._extract_annotations(video_id)
1fb07d10 1464
dd27fd17
PH
1465 def _map_to_format_list(urlmap):
1466 formats = []
1467 for itag, video_real_url in urlmap.items():
1468 dct = {
1469 'format_id': itag,
1470 'url': video_real_url,
1471 'player_url': player_url,
1472 }
0b65e5d4
PH
1473 if itag in self._formats:
1474 dct.update(self._formats[itag])
dd27fd17
PH
1475 formats.append(dct)
1476 return formats
1477
c5e8d7af
PH
1478 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1479 self.report_rtmp_download()
dd27fd17
PH
1480 formats = [{
1481 'format_id': '_rtmp',
1482 'protocol': 'rtmp',
1483 'url': video_info['conn'][0],
1484 'player_url': player_url,
1485 }]
24270b03 1486 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1487 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1488 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1489 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1490 formats_spec = {}
82156fdb 1491 fmt_list = video_info.get('fmt_list', [''])[0]
1492 if fmt_list:
1493 for fmt in fmt_list.split(','):
1494 spec = fmt.split('/')
3318832e 1495 if len(spec) > 1:
1496 width_height = spec[1].split('x')
1497 if len(width_height) == 2:
1498 formats_spec[spec[0]] = {
1499 'resolution': spec[1],
1500 'width': int_or_none(width_height[0]),
1501 'height': int_or_none(width_height[1]),
1502 }
c9afb51c 1503 formats = []
00fe14fc 1504 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1505 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1506 if 'itag' not in url_data or 'url' not in url_data:
1507 continue
1508 format_id = url_data['itag'][0]
1509 url = url_data['url'][0]
1510
1511 if 'sig' in url_data:
1512 url += '&signature=' + url_data['sig'][0]
1513 elif 's' in url_data:
1514 encrypted_sig = url_data['s'][0]
6449cd80 1515 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1516
beb95e77 1517 jsplayer_url_json = self._search_regex(
6449cd80
PH
1518 ASSETS_RE,
1519 embed_webpage if age_gate else video_webpage,
1520 'JS player URL (1)', default=None)
1521 if not jsplayer_url_json and not age_gate:
1522 # We need the embed website after all
1523 if embed_webpage is None:
1524 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1525 embed_webpage = self._download_webpage(
1526 embed_url, video_id, 'Downloading embed webpage')
1527 jsplayer_url_json = self._search_regex(
1528 ASSETS_RE, embed_webpage, 'JS player URL')
1529
beb95e77 1530 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1531 if player_url is None:
1532 player_url_json = self._search_regex(
1533 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1534 video_webpage, 'age gate player URL')
201e9eaa
PH
1535 player_url = json.loads(player_url_json)
1536
1537 if self._downloader.params.get('verbose'):
cf010131 1538 if player_url is None:
201e9eaa
PH
1539 player_version = 'unknown'
1540 player_desc = 'unknown'
1541 else:
1542 if player_url.endswith('swf'):
1543 player_version = self._search_regex(
1544 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1545 'flash player', fatal=False)
201e9eaa 1546 player_desc = 'flash player %s' % player_version
cf010131 1547 else:
201e9eaa 1548 player_version = self._search_regex(
50f84a9a 1549 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
201e9eaa
PH
1550 player_url,
1551 'html5 player', fatal=False)
78caa52a 1552 player_desc = 'html5 player %s' % player_version
201e9eaa 1553
60064c53 1554 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1555 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1556 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1557
1558 signature = self._decrypt_signature(
1559 encrypted_sig, video_id, player_url, age_gate)
1560 url += '&signature=' + signature
1561 if 'ratebypass' not in url:
1562 url += '&ratebypass=yes'
c9afb51c 1563
94278f72
YCH
1564 dct = {
1565 'format_id': format_id,
1566 'url': url,
1567 'player_url': player_url,
1568 }
1569 if format_id in self._formats:
1570 dct.update(self._formats[format_id])
3318832e 1571 if format_id in formats_spec:
1572 dct.update(formats_spec[format_id])
94278f72 1573
aabc2be6
S
1574 # Some itags are not included in DASH manifest thus corresponding formats will
1575 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1576 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1577 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1578 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1579
1580 more_fields = {
c9afb51c 1581 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1582 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1583 'width': width,
1584 'height': height,
1585 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1586 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1587 }
94278f72
YCH
1588 for key, value in more_fields.items():
1589 if value:
1590 dct[key] = value
aabc2be6
S
1591 type_ = url_data.get('type', [None])[0]
1592 if type_:
1593 type_split = type_.split(';')
1594 kind_ext = type_split[0].split('/')
1595 if len(kind_ext) == 2:
94278f72
YCH
1596 kind, _ = kind_ext
1597 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1598 if kind in ('audio', 'video'):
1599 codecs = None
1600 for mobj in re.finditer(
1601 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1602 if mobj.group('key') == 'codecs':
1603 codecs = mobj.group('val')
1604 break
1605 if codecs:
1606 codecs = codecs.split(',')
1607 if len(codecs) == 2:
cc28492d 1608 acodec, vcodec = codecs[1], codecs[0]
aabc2be6
S
1609 else:
1610 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1611 dct.update({
1612 'acodec': acodec,
1613 'vcodec': vcodec,
1614 })
aabc2be6 1615 formats.append(dct)
1d043b93
JMF
1616 elif video_info.get('hlsvp'):
1617 manifest_url = video_info['hlsvp'][0]
1618 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1619 formats = _map_to_format_list(url_map)
ac5a69af
YCH
1620 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1621 for a_format in formats:
049d71d8 1622 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
c5e8d7af 1623 else:
8ceabd4d
S
1624 unavailable_message = self._html_search_regex(
1625 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1626 video_webpage, 'unavailable message', default=None)
1627 if unavailable_message:
1628 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1629 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1630
dd27fd17 1631 # Look for the DASH manifest
203fb43f 1632 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1633 dash_mpd_fatal = True
8ff648e4 1634 for mpd_url in dash_mpds:
d8d24a92 1635 dash_formats = {}
774e208f 1636 try:
05d0d131
YCH
1637 def decrypt_sig(mobj):
1638 s = mobj.group(1)
1639 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1640 return '/signature/%s' % dec_s
1641
8ff648e4 1642 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1643
8ff648e4 1644 for df in self._extract_mpd_formats(
1645 mpd_url, video_id, fatal=dash_mpd_fatal,
1646 formats_dict=self._formats):
d8d24a92
S
1647 # Do not overwrite DASH format found in some previous DASH manifest
1648 if df['format_id'] not in dash_formats:
1649 dash_formats[df['format_id']] = df
77c6fb5b
S
1650 # Additional DASH manifests may end up in HTTP Error 403 therefore
1651 # allow them to fail without bug report message if we already have
1652 # some DASH manifest succeeded. This is temporary workaround to reduce
1653 # burst of bug reports until we figure out the reason and whether it
1654 # can be fixed at all.
1655 dash_mpd_fatal = False
774e208f
PH
1656 except (ExtractorError, KeyError) as e:
1657 self.report_warning(
1658 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1659 if dash_formats:
04b3b3df
JMF
1660 # Remove the formats we found through non-DASH, they
1661 # contain less info and it can be wrong, because we use
1662 # fixed values (for example the resolution). See
1663 # https://github.com/rg3/youtube-dl/issues/5774 for an
1664 # example.
d80265cc 1665 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1666 formats.extend(dash_formats.values())
d80044c2 1667
6271f1ca
PH
1668 # Check for malformed aspect ratio
1669 stretched_m = re.search(
1670 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1671 video_webpage)
1672 if stretched_m:
313dfc45
LL
1673 w = float(stretched_m.group('w'))
1674 h = float(stretched_m.group('h'))
5faf9fed
S
1675 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1676 # We will only process correct ratios.
313dfc45 1677 if w > 0 and h > 0:
41f24c32 1678 ratio = w / h
313dfc45
LL
1679 for f in formats:
1680 if f.get('vcodec') != 'none':
1681 f['stretched_ratio'] = ratio
6271f1ca 1682
4bcc7bd1 1683 self._sort_formats(formats)
4ea3be0a 1684
d77ab8e2
S
1685 self.mark_watched(video_id, video_info)
1686
4ea3be0a 1687 return {
8bcc8756
JW
1688 'id': video_id,
1689 'uploader': video_uploader,
1690 'uploader_id': video_uploader_id,
fd050249 1691 'uploader_url': video_uploader_url,
8bcc8756 1692 'upload_date': upload_date,
7caf9830 1693 'license': video_license,
0cb58b02 1694 'creator': video_creator,
8bcc8756 1695 'title': video_title,
0cb58b02 1696 'alt_title': video_alt_title,
8bcc8756
JW
1697 'thumbnail': video_thumbnail,
1698 'description': video_description,
1699 'categories': video_categories,
000b6b5a 1700 'tags': video_tags,
8bcc8756 1701 'subtitles': video_subtitles,
360e1ca5 1702 'automatic_captions': automatic_captions,
8bcc8756
JW
1703 'duration': video_duration,
1704 'age_limit': 18 if age_gate else 0,
1705 'annotations': video_annotations,
7e8c0af0 1706 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1707 'view_count': view_count,
4ea3be0a 1708 'like_count': like_count,
1709 'dislike_count': dislike_count,
2d30521a 1710 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1711 'formats': formats,
2fe1ff85 1712 'is_live': is_live,
7c80519c 1713 'start_time': start_time,
297a564b 1714 'end_time': end_time,
4ea3be0a 1715 }
c5e8d7af 1716
5f6a1245 1717
40805306 1718class YoutubeSharedVideoIE(InfoExtractor):
fd8c8c7d 1719 _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
40805306
YCH
1720 IE_NAME = 'youtube:shared'
1721
1722 _TEST = {
1723 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1724 'info_dict': {
1725 'id': 'uPDB5I9wfp8',
1726 'ext': 'webm',
1727 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1728 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1729 'upload_date': '20160219',
1730 'uploader': 'Pocoyo - Português (BR)',
1731 'uploader_id': 'PocoyoBrazil',
1732 },
1733 'add_ie': ['Youtube'],
1734 'params': {
1735 # There are already too many Youtube downloads
1736 'skip_download': True,
1737 },
1738 }
1739
1740 def _real_extract(self, url):
1741 video_id = self._match_id(url)
1742
1743 webpage = self._download_webpage(url, video_id)
1744
1745 real_video_id = self._html_search_meta(
1746 'videoId', webpage, 'YouTube video id', fatal=True)
1747
1748 return self.url_result(real_video_id, YoutubeIE.ie_key())
1749
1750
8e7aad20 1751class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1752 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1753 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1754 (?:https?://)?
1755 (?:\w+\.)?
1756 youtube\.com/
1757 (?:
ac7553d0 1758 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
2e1b9285 1759 \? (?:.*?[&;])*? (?:p|a|list)=
c5e8d7af
PH
1760 | p/
1761 )
d67cc9fa 1762 (
99209c29 1763 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
5f6a1245 1764 # Top tracks, they can also include dots
d67cc9fa
JMF
1765 |(?:MC)[\w\.]*
1766 )
c5e8d7af
PH
1767 .*
1768 |
99209c29 1769 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
c5e8d7af 1770 )"""
dbb94fb0 1771 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 1772 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 1773 IE_NAME = 'youtube:playlist'
81127aa5
PH
1774 _TESTS = [{
1775 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1776 'info_dict': {
1777 'title': 'ytdl test PL',
a1cf99d0 1778 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1779 },
1780 'playlist_count': 3,
9291475f
PH
1781 }, {
1782 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1783 'info_dict': {
acf757f4 1784 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1785 'title': 'YDL_Empty_List',
1786 },
1787 'playlist_count': 0,
1788 }, {
1789 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1790 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1791 'info_dict': {
1792 'title': '29C3: Not my department',
acf757f4 1793 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1794 },
1795 'playlist_count': 95,
1796 }, {
1797 'note': 'issue #673',
1798 'url': 'PLBB231211A4F62143',
1799 'info_dict': {
f46a8702 1800 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1801 'id': 'PLBB231211A4F62143',
9291475f
PH
1802 },
1803 'playlist_mincount': 26,
1804 }, {
1805 'note': 'Large playlist',
1806 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1807 'info_dict': {
1808 'title': 'Uploads from Cauchemar',
acf757f4 1809 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1810 },
1811 'playlist_mincount': 799,
1812 }, {
1813 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1814 'info_dict': {
1815 'title': 'YDL_safe_search',
acf757f4 1816 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1817 },
1818 'playlist_count': 2,
ac7553d0
PH
1819 }, {
1820 'note': 'embedded',
1821 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1822 'playlist_count': 4,
1823 'info_dict': {
1824 'title': 'JODA15',
acf757f4 1825 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1826 }
6b08cdf6
PH
1827 }, {
1828 'note': 'Embedded SWF player',
1829 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1830 'playlist_count': 4,
1831 'info_dict': {
1832 'title': 'JODA7',
acf757f4 1833 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1834 }
4b7df0d3
JMF
1835 }, {
1836 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1837 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1838 'info_dict': {
acf757f4
PH
1839 'title': 'Uploads from Interstellar Movie',
1840 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3
JMF
1841 },
1842 'playlist_mincout': 21,
81127aa5 1843 }]
c5e8d7af 1844
880e1c52
JMF
1845 def _real_initialize(self):
1846 self._login()
1847
652cdaa2 1848 def _extract_mix(self, playlist_id):
99209c29 1849 # The mixes are generated from a single video
652cdaa2 1850 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
1851 ids = []
1852 last_id = playlist_id[-11:]
1853 for n in itertools.count(1):
1854 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1855 webpage = self._download_webpage(
1856 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
1857 new_ids = orderedSet(re.findall(
1858 r'''(?xs)data-video-username=".*?".*?
1859 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1860 webpage))
1861 # Fetch new pages until all the videos are repeated, it seems that
1862 # there are always 51 unique videos.
1863 new_ids = [_id for _id in new_ids if _id not in ids]
1864 if not new_ids:
1865 break
1866 ids.extend(new_ids)
1867 last_id = ids[-1]
1868
1869 url_results = self._ids_to_results(ids)
1870
bc2f773b 1871 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
1872 title_span = (
1873 search_title('playlist-title') or
1874 search_title('title long-title') or
1875 search_title('title'))
76d1700b 1876 title = clean_html(title_span)
652cdaa2
JMF
1877
1878 return self.playlist_result(url_results, playlist_id, title)
1879
448830ce 1880 def _extract_playlist(self, playlist_id):
dbb94fb0
S
1881 url = self._TEMPLATE_URL % playlist_id
1882 page = self._download_webpage(url, playlist_id)
dbb94fb0 1883
39b62db1
YCH
1884 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1885 match = match.strip()
1886 # Check if the playlist exists or is private
1887 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1888 raise ExtractorError(
1889 'The playlist doesn\'t exist or is private, use --username or '
1890 '--netrc to access it.',
1891 expected=True)
1892 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1893 raise ExtractorError(
1894 'Invalid parameters. Maybe URL is incorrect.',
1895 expected=True)
1896 elif re.match(r'[^<]*Choose your language[^<]*', match):
1897 continue
1898 else:
1899 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 1900
dbb94fb0 1901 playlist_title = self._html_search_regex(
63b4295d 1902 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
78caa52a 1903 page, 'title')
c5e8d7af 1904
648e6a1f 1905 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 1906
ebf1b291 1907 def _check_download_just_video(self, url, playlist_id):
448830ce
S
1908 # Check if it's a video-specific URL
1909 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1910 if 'v' in query_dict:
1911 video_id = query_dict['v'][0]
1912 if self._downloader.params.get('noplaylist'):
1913 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1914 return self.url_result(video_id, 'Youtube', video_id=video_id)
1915 else:
1916 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1917
ebf1b291
S
1918 def _real_extract(self, url):
1919 # Extract playlist id
1920 mobj = re.match(self._VALID_URL, url)
1921 if mobj is None:
1922 raise ExtractorError('Invalid URL: %s' % url)
1923 playlist_id = mobj.group(1) or mobj.group(2)
1924
1925 video = self._check_download_just_video(url, playlist_id)
1926 if video:
1927 return video
1928
466a6145 1929 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
1930 # Mixes require a custom extraction process
1931 return self._extract_mix(playlist_id)
1932
1933 return self._extract_playlist(playlist_id)
1934
c5e8d7af 1935
648e6a1f 1936class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1937 IE_DESC = 'YouTube.com channels'
9ff67727 1938 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 1939 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 1940 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 1941 IE_NAME = 'youtube:channel'
cdc628a4
PH
1942 _TESTS = [{
1943 'note': 'paginated channel',
1944 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1945 'playlist_mincount': 91,
acf757f4 1946 'info_dict': {
9170ca5b
JMF
1947 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1948 'title': 'Uploads from lex will',
acf757f4 1949 }
5c43afd4
JMF
1950 }, {
1951 'note': 'Age restricted channel',
1952 # from https://www.youtube.com/user/DeusExOfficial
1953 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1954 'playlist_mincount': 64,
1955 'info_dict': {
1956 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1957 'title': 'Uploads from Deus Ex',
1958 },
cdc628a4 1959 }]
c5e8d7af 1960
e462474e
S
1961 @classmethod
1962 def suitable(cls, url):
f07e276a
S
1963 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
1964 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 1965
9558dcec
S
1966 def _build_template_url(self, url, channel_id):
1967 return self._TEMPLATE_URL % channel_id
1968
c5e8d7af 1969 def _real_extract(self, url):
9ff67727 1970 channel_id = self._match_id(url)
c5e8d7af 1971
9558dcec 1972 url = self._build_template_url(url, channel_id)
386bdfa6
S
1973
1974 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1975 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1976 # otherwise fallback on channel by page extraction
1977 channel_page = self._download_webpage(
1978 url + '?view=57', channel_id,
1979 'Downloading channel page', fatal=False)
2b3c2546
PH
1980 if channel_page is False:
1981 channel_playlist_id = False
1982 else:
1983 channel_playlist_id = self._html_search_meta(
1984 'channelId', channel_page, 'channel id', default=None)
1985 if not channel_playlist_id:
73c4ac2c
S
1986 channel_url = self._html_search_meta(
1987 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
1988 channel_page, 'channel url', default=None)
1989 if channel_url:
1990 channel_playlist_id = self._search_regex(
1991 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
1992 channel_url, 'channel id', default=None)
386bdfa6
S
1993 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1994 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
1995 return self.url_result(
1996 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 1997
60bf45c8 1998 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
1999 autogenerated = re.search(r'''(?x)
2000 class="[^"]*?(?:
2001 channel-header-autogenerated-label|
2002 yt-channel-title-autogenerated
2003 )[^"]*"''', channel_page) is not None
c5e8d7af 2004
b9643eed
JMF
2005 if autogenerated:
2006 # The videos are contained in a single page
2007 # the ajax pages can't be used, they are empty
b82f815f 2008 entries = [
fb69240c
S
2009 self.url_result(
2010 video_id, 'Youtube', video_id=video_id,
2011 video_title=video_title)
8f02ad4f 2012 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2013 return self.playlist_result(entries, channel_id)
2014
73c4ac2c
S
2015 try:
2016 next(self._entries(channel_page, channel_id))
2017 except StopIteration:
2018 alert_message = self._html_search_regex(
2019 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2020 channel_page, 'alert', default=None, group='alert')
2021 if alert_message:
2022 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2023
648e6a1f 2024 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2025
2026
eb0f3e7e 2027class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2028 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
9558dcec
S
2029 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2030 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2031 IE_NAME = 'youtube:user'
c5e8d7af 2032
cdc628a4
PH
2033 _TESTS = [{
2034 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2035 'playlist_mincount': 320,
2036 'info_dict': {
73c4ac2c
S
2037 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2038 'title': 'Uploads from The Linux Foundation',
cdc628a4 2039 }
9558dcec
S
2040 }, {
2041 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2042 # but not https://www.youtube.com/user/12minuteathlete/videos
2043 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2044 'playlist_mincount': 249,
2045 'info_dict': {
2046 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2047 'title': 'Uploads from 12 Minute Athlete',
2048 }
cdc628a4
PH
2049 }, {
2050 'url': 'ytuser:phihag',
2051 'only_matching': True,
daa0df9e
YCH
2052 }, {
2053 'url': 'https://www.youtube.com/c/gametrailers',
2054 'only_matching': True,
9558dcec
S
2055 }, {
2056 'url': 'https://www.youtube.com/gametrailers',
2057 'only_matching': True,
73c4ac2c
S
2058 }, {
2059 # This channel is not available.
2060 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2061 'only_matching': True,
cdc628a4
PH
2062 }]
2063
e3ea4790 2064 @classmethod
f4b05232 2065 def suitable(cls, url):
e3ea4790
JMF
2066 # Don't return True if the url can be extracted with other youtube
2067 # extractor, the regex would is too permissive and it would match.
f3a58d46 2068 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2069 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2070 return False
2071 else:
2072 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2073
9558dcec
S
2074 def _build_template_url(self, url, channel_id):
2075 mobj = re.match(self._VALID_URL, url)
2076 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2077
b05654f0 2078
f07e276a
S
2079class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2080 IE_DESC = 'YouTube.com live streams'
2081 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
2082 IE_NAME = 'youtube:live'
2083
2084 _TESTS = [{
2085 'url': 'http://www.youtube.com/user/TheYoungTurks/live',
2086 'info_dict': {
2087 'id': 'a48o2S1cPoo',
2088 'ext': 'mp4',
2089 'title': 'The Young Turks - Live Main Show',
2090 'uploader': 'The Young Turks',
2091 'uploader_id': 'TheYoungTurks',
2092 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2093 'upload_date': '20150715',
2094 'license': 'Standard YouTube License',
2095 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2096 'categories': ['News & Politics'],
2097 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2098 'like_count': int,
2099 'dislike_count': int,
2100 },
2101 'params': {
2102 'skip_download': True,
2103 },
2104 }, {
2105 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2106 'only_matching': True,
2107 }]
2108
2109 def _real_extract(self, url):
2110 mobj = re.match(self._VALID_URL, url)
2111 channel_id = mobj.group('id')
2112 base_url = mobj.group('base_url')
2113 webpage = self._download_webpage(url, channel_id, fatal=False)
2114 if webpage:
2115 page_type = self._og_search_property(
2116 'type', webpage, 'page type', default=None)
2117 video_id = self._html_search_meta(
2118 'videoId', webpage, 'video id', default=None)
2119 if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2120 return self.url_result(video_id, YoutubeIE.ie_key())
2121 return self.url_result(base_url)
2122
2123
e462474e
S
2124class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2125 IE_DESC = 'YouTube.com user/channel playlists'
2126 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2127 IE_NAME = 'youtube:playlists'
0c148415 2128
e568c223 2129 _TESTS = [{
0c148415
S
2130 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
2131 'playlist_mincount': 4,
2132 'info_dict': {
2133 'id': 'ThirstForScience',
2134 'title': 'Thirst for Science',
2135 },
e568c223
S
2136 }, {
2137 # with "Load more" button
2138 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2139 'playlist_mincount': 70,
2140 'info_dict': {
2141 'id': 'igorkle1',
2142 'title': 'Игорь Клейнер',
2143 },
e462474e
S
2144 }, {
2145 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2146 'playlist_mincount': 17,
2147 'info_dict': {
2148 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2149 'title': 'Chem Player',
2150 },
e568c223 2151 }]
0c148415
S
2152
2153
b4c08069 2154class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2155 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2156 # there doesn't appear to be a real limit, for example if you search for
2157 # 'python' you get more than 8.000.000 results
2158 _MAX_RESULTS = float('inf')
78caa52a 2159 IE_NAME = 'youtube:search'
b05654f0 2160 _SEARCH_KEY = 'ytsearch'
b4c08069 2161 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2162 _TESTS = []
b05654f0 2163
b05654f0
PH
2164 def _get_n_results(self, query, n):
2165 """Get a specified number of results for a query"""
2166
b4c08069 2167 videos = []
b05654f0
PH
2168 limit = n
2169
b4c08069
JMF
2170 for pagenum in itertools.count(1):
2171 url_query = {
02175a79 2172 'search_query': query.encode('utf-8'),
b4c08069
JMF
2173 'page': pagenum,
2174 'spf': 'navigate',
2175 }
2176 url_query.update(self._EXTRA_QUERY_ARGS)
15707c7e 2177 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
b4c08069 2178 data = self._download_json(
69ea8ca4 2179 result_url, video_id='query "%s"' % query,
b4c08069 2180 note='Downloading page %s' % pagenum,
69ea8ca4 2181 errnote='Unable to download API page')
b4c08069 2182 html_content = data[1]['body']['content']
7cc3570e 2183
b4c08069 2184 if 'class="search-message' in html_content:
07ad22b8 2185 raise ExtractorError(
78caa52a 2186 '[youtube] No video results', expected=True)
b05654f0 2187
b4c08069
JMF
2188 new_videos = self._ids_to_results(orderedSet(re.findall(
2189 r'href="/watch\?v=(.{11})', html_content)))
2190 videos += new_videos
2191 if not new_videos or len(videos) > limit:
2192 break
b05654f0 2193
b4c08069
JMF
2194 if len(videos) > n:
2195 videos = videos[:n]
b05654f0 2196 return self.playlist_result(videos, query)
75dff0ee 2197
c9ae7b95 2198
a3dd9248 2199class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2200 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2201 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2202 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2203 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2204
c9ae7b95 2205
175c2e9e 2206class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
78caa52a
PH
2207 IE_DESC = 'YouTube.com search URLs'
2208 IE_NAME = 'youtube:search_url'
d2c1f79f 2209 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
175c2e9e 2210 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
cdc628a4
PH
2211 _TESTS = [{
2212 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2213 'playlist_mincount': 5,
2214 'info_dict': {
2215 'title': 'youtube-dl test video',
2216 }
d2c1f79f
S
2217 }, {
2218 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2219 'only_matching': True,
cdc628a4 2220 }]
c9ae7b95
PH
2221
2222 def _real_extract(self, url):
2223 mobj = re.match(self._VALID_URL, url)
7fd002c0 2224 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2225 webpage = self._download_webpage(url, query)
175c2e9e 2226 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2227
2228
136dadde 2229class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2230 IE_DESC = 'YouTube.com (multi-season) shows'
cdc628a4 2231 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2232 IE_NAME = 'youtube:show'
cdc628a4 2233 _TESTS = [{
4003bd82 2234 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2235 'playlist_mincount': 5,
cdc628a4
PH
2236 'info_dict': {
2237 'id': 'airdisasters',
2238 'title': 'Air Disasters',
2239 }
2240 }]
75dff0ee
JMF
2241
2242 def _real_extract(self, url):
136dadde
S
2243 playlist_id = self._match_id(url)
2244 return super(YoutubeShowIE, self)._real_extract(
2245 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2246
2247
b2e8bc1b 2248class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2249 """
25f14e9f 2250 Base class for feed extractors
d7ae0639
JMF
2251 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2252 """
b2e8bc1b 2253 _LOGIN_REQUIRED = True
d7ae0639
JMF
2254
2255 @property
2256 def IE_NAME(self):
78caa52a 2257 return 'youtube:%s' % self._FEED_NAME
04cc9617 2258
81f0259b 2259 def _real_initialize(self):
b2e8bc1b 2260 self._login()
81f0259b 2261
04cc9617 2262 def _real_extract(self, url):
25f14e9f
S
2263 page = self._download_webpage(
2264 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2265
2266 # The extraction process is the same as for playlists, but the regex
2267 # for the video ids doesn't contain an index
2268 ids = []
2269 more_widget_html = content_html = page
2bc43303
JMF
2270 for page_num in itertools.count(1):
2271 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2272
2273 # 'recommended' feed has infinite 'load more' and each new portion spins
2274 # the same videos in (sometimes) slightly different order, so we'll check
2275 # for unicity and break when portion has no new videos
2276 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2277 if not new_ids:
2278 break
2279
2bc43303
JMF
2280 ids.extend(new_ids)
2281
2282 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2283 if not mobj:
2284 break
2285
2286 more = self._download_json(
25f14e9f 2287 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2288 'Downloading page #%s' % page_num,
2289 transform_source=uppercase_escape)
2290 content_html = more['content_html']
2291 more_widget_html = more['load_more_widget_html']
2292
25f14e9f
S
2293 return self.playlist_result(
2294 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2295
2296
2297class YoutubeWatchLaterIE(YoutubePlaylistIE):
2298 IE_NAME = 'youtube:watchlater'
2299 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
bc7a9cd8 2300 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2301
bc7a9cd8
S
2302 _TESTS = [{
2303 'url': 'https://www.youtube.com/playlist?list=WL',
2304 'only_matching': True,
2305 }, {
2306 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2307 'only_matching': True,
2308 }]
25f14e9f
S
2309
2310 def _real_extract(self, url):
ebf1b291
S
2311 video = self._check_download_just_video(url, 'WL')
2312 if video:
2313 return video
25f14e9f 2314 return self._extract_playlist('WL')
f459d170 2315
5f6a1245 2316
c626a3d9 2317class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2318 IE_NAME = 'youtube:favorites'
f3a34072 2319 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
c7a7750d 2320 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2321 _LOGIN_REQUIRED = True
2322
2323 def _real_extract(self, url):
2324 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2325 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2326 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2327
2328
25f14e9f
S
2329class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2330 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2331 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2332 _FEED_NAME = 'recommended'
2333 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2334
1ed5b5c9 2335
25f14e9f
S
2336class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2337 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2338 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2339 _FEED_NAME = 'subscriptions'
2340 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2341
1ed5b5c9 2342
25f14e9f
S
2343class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2344 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2345 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2346 _FEED_NAME = 'history'
2347 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2348
2349
15870e90
PH
2350class YoutubeTruncatedURLIE(InfoExtractor):
2351 IE_NAME = 'youtube:truncated_url'
2352 IE_DESC = False # Do not list
975d35db 2353 _VALID_URL = r'''(?x)
b95aab84
PH
2354 (?:https?://)?
2355 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2356 (?:watch\?(?:
c4808c60 2357 feature=[a-z_]+|
b95aab84
PH
2358 annotation_id=annotation_[^&]+|
2359 x-yt-cl=[0-9]+|
c1708b89 2360 hl=[^&]*|
287be8c6 2361 t=[0-9]+
b95aab84
PH
2362 )?
2363 |
2364 attribution_link\?a=[^&]+
2365 )
2366 $
975d35db 2367 '''
15870e90 2368
c4808c60
PH
2369 _TESTS = [{
2370 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2371 'only_matching': True,
dc2fc736
PH
2372 }, {
2373 'url': 'http://www.youtube.com/watch?',
2374 'only_matching': True,
b95aab84
PH
2375 }, {
2376 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2377 'only_matching': True,
2378 }, {
2379 'url': 'https://www.youtube.com/watch?feature=foo',
2380 'only_matching': True,
c1708b89
PH
2381 }, {
2382 'url': 'https://www.youtube.com/watch?hl=en-GB',
2383 'only_matching': True,
287be8c6
PH
2384 }, {
2385 'url': 'https://www.youtube.com/watch?t=2372',
2386 'only_matching': True,
c4808c60
PH
2387 }]
2388
15870e90
PH
2389 def _real_extract(self, url):
2390 raise ExtractorError(
78caa52a
PH
2391 'Did you forget to quote the URL? Remember that & is a meta '
2392 'character in most shells, so you want to put the URL in quotes, '
2393 'like youtube-dl '
2394 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2395 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2396 expected=True)
772fd5cc
PH
2397
2398
2399class YoutubeTruncatedIDIE(InfoExtractor):
2400 IE_NAME = 'youtube:truncated_id'
2401 IE_DESC = False # Do not list
b95aab84 2402 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2403
2404 _TESTS = [{
2405 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2406 'only_matching': True,
2407 }]
2408
2409 def _real_extract(self, url):
2410 video_id = self._match_id(url)
2411 raise ExtractorError(
2412 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2413 expected=True)