]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
release 2017.01.25
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
c5e8d7af 19 compat_parse_qs,
7fd002c0
S
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
c5e8d7af 25 compat_str,
4bb4a188
PH
26)
27from ..utils import (
c5e8d7af 28 clean_html,
9b9c5355 29 error_to_compat_str,
c5e8d7af 30 ExtractorError,
2d30521a 31 float_or_none,
4bb4a188
PH
32 get_element_by_attribute,
33 get_element_by_id,
dd27fd17 34 int_or_none,
94278f72 35 mimetype2ext,
4bb4a188 36 orderedSet,
7c80519c 37 parse_duration,
0cb58b02 38 remove_quotes,
041bc3ad 39 remove_start,
5c2266df 40 sanitized_Request,
cf7e015f 41 smuggle_url,
c93d53f5 42 str_to_int,
c5e8d7af
PH
43 unescapeHTML,
44 unified_strdate,
cf7e015f 45 unsmuggle_url,
81c2f20b 46 uppercase_escape,
6e6bc8da 47 urlencode_postdata,
af214c3a 48 ISO3166Utils,
c5e8d7af
PH
49)
50
5f6a1245 51
de7f3446 52class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e298d3a0 56 _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
b2e8bc1b
JMF
57 _NETRC_MACHINE = 'youtube'
58 # If True it will raise an error if no login info is provided
59 _LOGIN_REQUIRED = False
60
b2e8bc1b 61 def _set_language(self):
810fb84d
PH
62 self._set_cookie(
63 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 64 # YouTube sets the expire time to about two months
810fb84d 65 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 66
25f14e9f
S
67 def _ids_to_results(self, ids):
68 return [
69 self.url_result(vid_id, 'Youtube', video_id=vid_id)
70 for vid_id in ids]
71
b2e8bc1b 72 def _login(self):
83317f69 73 """
74 Attempt to log in to YouTube.
75 True is returned if successful or skipped.
76 False is returned if login failed.
77
78 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
79 """
b2e8bc1b
JMF
80 (username, password) = self._get_login_info()
81 # No authentication to be performed
82 if username is None:
83 if self._LOGIN_REQUIRED:
69ea8ca4 84 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 85 return True
b2e8bc1b 86
7cc3570e
PH
87 login_page = self._download_webpage(
88 self._LOGIN_URL, None,
69ea8ca4
PH
89 note='Downloading login page',
90 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
91 if login_page is False:
92 return
b2e8bc1b 93
1212e997 94 login_form = self._hidden_inputs(login_page)
c5e8d7af 95
1212e997 96 login_form.update({
05bddcc5 97 'checkConnection': 'youtube',
8bcc8756 98 'Email': username,
8bcc8756 99 'Passwd': password,
1212e997 100 })
83317f69 101
7cc3570e 102 login_results = self._download_webpage(
e298d3a0
S
103 self._PASSWORD_CHALLENGE_URL, None,
104 note='Logging in', errnote='unable to log in', fatal=False,
1212e997 105 data=urlencode_postdata(login_form))
7cc3570e
PH
106 if login_results is False:
107 return False
83317f69 108
494ab6db
S
109 error_msg = self._html_search_regex(
110 r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
111 login_results, 'error message', default=None)
112 if error_msg:
113 raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
114
83317f69 115 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 116 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 117
118 # Two-Factor
119 # TODO add SMS and phone call support - these require making a request and then prompting the user
120
e9fb6a4b 121 if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
041bc3ad 122 tfa_code = self._get_tfa_info('2-step verification code')
83317f69 123
041bc3ad
S
124 if not tfa_code:
125 self._downloader.report_warning(
126 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
127 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 128 return False
129
041bc3ad
S
130 tfa_code = remove_start(tfa_code, 'G-')
131
132 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
133
134 tfa_form_strs.update({
9303ce3e 135 'Pin': tfa_code,
136 'TrustDevice': 'on',
041bc3ad
S
137 })
138
6e6bc8da 139 tfa_data = urlencode_postdata(tfa_form_strs)
83317f69 140
5c2266df 141 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
83317f69 142 tfa_results = self._download_webpage(
143 tfa_req, None,
69ea8ca4 144 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 145
146 if tfa_results is False:
147 return False
148
e9fb6a4b 149 if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
041bc3ad 150 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
83317f69 151 return False
e9fb6a4b 152 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 153 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 154 return False
155 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 156 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 157 return False
158
e9fb6a4b 159 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
69ea8ca4 160 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
161 return False
162 return True
163
b2e8bc1b
JMF
164 def _real_initialize(self):
165 if self._downloader is None:
166 return
42939b61 167 self._set_language()
b2e8bc1b
JMF
168 if not self._login():
169 return
c5e8d7af 170
8377574c 171
8e7aad20 172class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 173 # Extract entries from page with "Load more" button
648e6a1f
S
174 def _entries(self, page, playlist_id):
175 more_widget_html = content_html = page
176 for page_num in itertools.count(1):
061a75ed
S
177 for entry in self._process_page(content_html):
178 yield entry
648e6a1f
S
179
180 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
181 if not mobj:
182 break
183
184 more = self._download_json(
185 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
186 'Downloading page #%s' % page_num,
187 transform_source=uppercase_escape)
188 content_html = more['content_html']
189 if not content_html.strip():
190 # Some webpages show a "Load more" button but they don't
191 # have more videos
192 break
193 more_widget_html = more['load_more_widget_html']
194
061a75ed
S
195
196class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
197 def _process_page(self, content):
198 for video_id, video_title in self.extract_videos_from_page(content):
199 yield self.url_result(video_id, 'Youtube', video_id, video_title)
200
648e6a1f
S
201 def extract_videos_from_page(self, page):
202 ids_in_page = []
203 titles_in_page = []
204 for mobj in re.finditer(self._VIDEO_RE, page):
205 # The link with index 0 is not the first video of the playlist (not sure if still actual)
206 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
207 continue
208 video_id = mobj.group('id')
209 video_title = unescapeHTML(mobj.group('title'))
210 if video_title:
211 video_title = video_title.strip()
212 try:
213 idx = ids_in_page.index(video_id)
214 if video_title and not titles_in_page[idx]:
215 titles_in_page[idx] = video_title
216 except ValueError:
217 ids_in_page.append(video_id)
218 titles_in_page.append(video_title)
219 return zip(ids_in_page, titles_in_page)
220
221
061a75ed
S
222class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
223 def _process_page(self, content):
6dee688e
S
224 for playlist_id in orderedSet(re.findall(
225 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
226 content)):
061a75ed
S
227 yield self.url_result(
228 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
229
0c148415
S
230 def _real_extract(self, url):
231 playlist_id = self._match_id(url)
232 webpage = self._download_webpage(url, playlist_id)
0c148415 233 title = self._og_search_title(webpage, fatal=False)
061a75ed 234 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
235
236
360e1ca5 237class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 238 IE_DESC = 'YouTube.com'
cb7dfeea 239 _VALID_URL = r"""(?x)^
c5e8d7af 240 (
edb53e2d 241 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 242 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 243 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 244 (?:www\.)?pwnyoutube\.com/|
f7000f3a 245 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
246 tube\.majestyc\.net/|
247 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
248 (?:.*?\#/)? # handle anchor (#/) redirect urls
249 (?: # the various things that can precede the ID:
ac7553d0 250 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 251 |(?: # or the v= param in all its forms
f7000f3a 252 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 253 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 254 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
255 v=
256 )
f4b05232 257 ))
cbaed4bb
S
258 |(?:
259 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
260 vid\.plus| # or vid.plus/xxxx
261 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 262 )/
edb53e2d 263 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 264 )
c5e8d7af 265 )? # all until now is optional -> you can pass the naked ID
8963d9c2 266 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
feaa5ad7 267 (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
268 (?(1).+)? # if we found the ID, everything can follow
269 $"""
c5e8d7af 270 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 271 _formats = {
c2d3cb4c 272 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
273 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
274 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
275 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
276 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
277 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
278 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
279 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 280 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 281 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
282 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
283 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
284 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
285 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
286 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 287 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 288 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
289 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 290
291
292 # 3D videos
c2d3cb4c 293 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
294 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
295 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
296 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 297 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
298 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
299 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 300
96fb5605 301 # Apple HTTP Live Streaming
11f12195 302 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 303 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
304 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
305 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
306 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
307 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 308 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
309 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
310
311 # DASH mp4 video
c2d3cb4c 312 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
313 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
314 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
315 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
316 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
317 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
318 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
8409b368 319 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
a6c2c244
YCH
320 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
321 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
322 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
323 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
836a086c 324
f6f1fc92 325 # Dash mp4 audio
c2d3cb4c 326 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
327 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
328 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
2c347352
S
329 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
330 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
331
332 # Dash webm
a6c2c244
YCH
333 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
334 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
335 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
336 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
337 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
338 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
339 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
340 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
341 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
342 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
343 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
344 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
345 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
346 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
347 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
4c6b4764 348 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
a6c2c244
YCH
349 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
350 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
351 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
352 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
353 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
354 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
2c62dc26
PH
355
356 # Dash webm audio
a6c2c244
YCH
357 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
358 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 359
0857baad 360 # Dash webm audio with opus inside
a6c2c244
YCH
361 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
362 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
363 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
0857baad 364
ce6b9a2d
PH
365 # RTMP (unnamed)
366 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 367 }
23d17e4b 368 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 369
78caa52a 370 IE_NAME = 'youtube'
2eb88d95
PH
371 _TESTS = [
372 {
2d3d2997 373 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
374 'info_dict': {
375 'id': 'BaW_jenozKc',
376 'ext': 'mp4',
377 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
378 'uploader': 'Philipp Hagemeister',
379 'uploader_id': 'phihag',
ec85ded8 380 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 381 'upload_date': '20121002',
7caf9830 382 'license': 'Standard YouTube License',
4bc3a23e
PH
383 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
384 'categories': ['Science & Technology'],
000b6b5a 385 'tags': ['youtube-dl'],
3e7c1224
PH
386 'like_count': int,
387 'dislike_count': int,
7c80519c 388 'start_time': 1,
297a564b 389 'end_time': 9,
2eb88d95 390 }
0e853ca4 391 },
0e853ca4 392 {
2d3d2997 393 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
394 'note': 'Test generic use_cipher_signature video (#897)',
395 'info_dict': {
396 'id': 'UxxajLWwzqY',
397 'ext': 'mp4',
398 'upload_date': '20120506',
399 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 400 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 401 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
402 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
403 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
404 'iconic ep', 'iconic', 'love', 'it'],
4bc3a23e
PH
405 'uploader': 'Icona Pop',
406 'uploader_id': 'IconaPop',
ec85ded8 407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 408 'license': 'Standard YouTube License',
0cb58b02 409 'creator': 'Icona Pop',
2eb88d95 410 }
c108eb73
JMF
411 },
412 {
4bc3a23e
PH
413 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
414 'note': 'Test VEVO video with age protection (#956)',
415 'info_dict': {
416 'id': '07FYdnEawAQ',
417 'ext': 'mp4',
418 'upload_date': '20130703',
419 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 420 'alt_title': 'Tunnel Vision',
4bc3a23e
PH
421 'description': 'md5:64249768eec3bc4276236606ea996373',
422 'uploader': 'justintimberlakeVEVO',
423 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 424 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 425 'license': 'Standard YouTube License',
0cb58b02 426 'creator': 'Justin Timberlake',
34952f09 427 'age_limit': 18,
c108eb73
JMF
428 }
429 },
fccd3771 430 {
4bc3a23e
PH
431 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
432 'note': 'Embed-only video (#1746)',
433 'info_dict': {
434 'id': 'yZIXLfi8CZQ',
435 'ext': 'mp4',
436 'upload_date': '20120608',
437 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
438 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
439 'uploader': 'SET India',
94bfcd23 440 'uploader_id': 'setindia',
ec85ded8 441 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 442 'license': 'Standard YouTube License',
94bfcd23 443 'age_limit': 18,
fccd3771
PH
444 }
445 },
11b56058 446 {
2d3d2997 447 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
448 'note': 'Use the first video ID in the URL',
449 'info_dict': {
450 'id': 'BaW_jenozKc',
451 'ext': 'mp4',
452 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
453 'uploader': 'Philipp Hagemeister',
454 'uploader_id': 'phihag',
ec85ded8 455 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 456 'upload_date': '20121002',
7caf9830 457 'license': 'Standard YouTube License',
11b56058
PM
458 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
459 'categories': ['Science & Technology'],
460 'tags': ['youtube-dl'],
461 'like_count': int,
462 'dislike_count': int,
34a7de29
S
463 },
464 'params': {
465 'skip_download': True,
466 },
11b56058 467 },
dd27fd17 468 {
2d3d2997 469 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
470 'note': '256k DASH audio (format 141) via DASH manifest',
471 'info_dict': {
472 'id': 'a9LDPn-MO4I',
473 'ext': 'm4a',
474 'upload_date': '20121002',
475 'uploader_id': '8KVIDEO',
ec85ded8 476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
477 'description': '',
478 'uploader': '8KVIDEO',
7caf9830 479 'license': 'Standard YouTube License',
4bc3a23e 480 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 481 },
4bc3a23e
PH
482 'params': {
483 'youtube_include_dash_manifest': True,
484 'format': '141',
4919603f 485 },
de3c7fe0 486 'skip': 'format 141 not served anymore',
dd27fd17 487 },
3489b7d2
JMF
488 # DASH manifest with encrypted signature
489 {
78caa52a
PH
490 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
491 'info_dict': {
492 'id': 'IB3lcPjvWLA',
493 'ext': 'm4a',
b766eb27
JMF
494 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
495 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
78caa52a
PH
496 'uploader': 'AfrojackVEVO',
497 'uploader_id': 'AfrojackVEVO',
498 'upload_date': '20131011',
7caf9830 499 'license': 'Standard YouTube License',
3489b7d2 500 },
4bc3a23e 501 'params': {
78caa52a 502 'youtube_include_dash_manifest': True,
de3c7fe0 503 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
504 },
505 },
aaeb86f6
S
506 # JS player signature function name containing $
507 {
508 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
509 'info_dict': {
510 'id': 'nfWlot6h_JM',
511 'ext': 'm4a',
512 'title': 'Taylor Swift - Shake It Off',
0cb58b02 513 'alt_title': 'Shake It Off',
f57b7835 514 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
aaeb86f6
S
515 'uploader': 'TaylorSwiftVEVO',
516 'uploader_id': 'TaylorSwiftVEVO',
517 'upload_date': '20140818',
7caf9830 518 'license': 'Standard YouTube License',
0cb58b02 519 'creator': 'Taylor Swift',
aaeb86f6
S
520 },
521 'params': {
522 'youtube_include_dash_manifest': True,
de3c7fe0 523 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
524 },
525 },
aa79ac0c
PH
526 # Controversy video
527 {
528 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
529 'info_dict': {
530 'id': 'T4XJQO3qol8',
531 'ext': 'mp4',
532 'upload_date': '20100909',
533 'uploader': 'The Amazing Atheist',
534 'uploader_id': 'TheAmazingAtheist',
ec85ded8 535 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 536 'license': 'Standard YouTube License',
aa79ac0c
PH
537 'title': 'Burning Everyone\'s Koran',
538 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
539 }
c522adb1
JMF
540 },
541 # Normal age-gate video (No vevo, embed allowed)
542 {
2d3d2997 543 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
544 'info_dict': {
545 'id': 'HtVdAasjOgU',
546 'ext': 'mp4',
547 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 548 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
c522adb1
JMF
549 'uploader': 'The Witcher',
550 'uploader_id': 'WitcherGame',
ec85ded8 551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 552 'upload_date': '20140605',
7caf9830 553 'license': 'Standard YouTube License',
34952f09 554 'age_limit': 18,
c522adb1
JMF
555 },
556 },
fccae2b9
S
557 # Age-gate video with encrypted signature
558 {
2d3d2997 559 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
560 'info_dict': {
561 'id': '6kLq3WMV1nU',
562 'ext': 'mp4',
563 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
564 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
565 'uploader': 'LloydVEVO',
566 'uploader_id': 'LloydVEVO',
ec85ded8 567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 568 'upload_date': '20110629',
7caf9830 569 'license': 'Standard YouTube License',
34952f09 570 'age_limit': 18,
fccae2b9
S
571 },
572 },
774e208f
PH
573 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
574 {
575 'url': '__2ABJjxzNo',
576 'info_dict': {
577 'id': '__2ABJjxzNo',
578 'ext': 'mp4',
579 'upload_date': '20100430',
580 'uploader_id': 'deadmau5',
ec85ded8 581 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 582 'creator': 'deadmau5',
774e208f
PH
583 'description': 'md5:12c56784b8032162bb936a5f76d55360',
584 'uploader': 'deadmau5',
7caf9830 585 'license': 'Standard YouTube License',
774e208f 586 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 587 'alt_title': 'Some Chords',
774e208f
PH
588 },
589 'expected_warnings': [
590 'DASH manifest missing',
591 ]
e52a40ab
PH
592 },
593 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
594 {
595 'url': 'lqQg6PlCWgI',
596 'info_dict': {
597 'id': 'lqQg6PlCWgI',
598 'ext': 'mp4',
90227264 599 'upload_date': '20150827',
cbe2bd91 600 'uploader_id': 'olympic',
ec85ded8 601 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 602 'license': 'Standard YouTube License',
cbe2bd91 603 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 604 'uploader': 'Olympic',
cbe2bd91
PH
605 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
606 },
607 'params': {
608 'skip_download': 'requires avconv',
e52a40ab 609 }
cbe2bd91 610 },
6271f1ca
PH
611 # Non-square pixels
612 {
613 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
614 'info_dict': {
615 'id': '_b-2C3KPAM0',
616 'ext': 'mp4',
617 'stretched_ratio': 16 / 9.,
618 'upload_date': '20110310',
619 'uploader_id': 'AllenMeow',
ec85ded8 620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
621 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
622 'uploader': '孫艾倫',
7caf9830 623 'license': 'Standard YouTube License',
6271f1ca
PH
624 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
625 },
06b491eb
S
626 },
627 # url_encoded_fmt_stream_map is empty string
628 {
629 'url': 'qEJwOuvDf7I',
630 'info_dict': {
631 'id': 'qEJwOuvDf7I',
f57b7835 632 'ext': 'webm',
06b491eb
S
633 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
634 'description': '',
635 'upload_date': '20150404',
636 'uploader_id': 'spbelect',
637 'uploader': 'Наблюдатели Петербурга',
638 },
639 'params': {
640 'skip_download': 'requires avconv',
e323cf3f
S
641 },
642 'skip': 'This live event has ended.',
06b491eb 643 },
da77d856
S
644 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
645 {
646 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
647 'info_dict': {
648 'id': 'FIl7x6_3R5Y',
649 'ext': 'mp4',
650 'title': 'md5:7b81415841e02ecd4313668cde88737a',
651 'description': 'md5:116377fd2963b81ec4ce64b542173306',
652 'upload_date': '20150625',
653 'uploader_id': 'dorappi2000',
ec85ded8 654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 655 'uploader': 'dorappi2000',
7caf9830 656 'license': 'Standard YouTube License',
be49068d 657 'formats': 'mincount:32',
da77d856 658 },
2ee8f5d8 659 },
8a1a26ce
YCH
660 # DASH manifest with segment_list
661 {
662 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
663 'md5': '8ce563a1d667b599d21064e982ab9e31',
664 'info_dict': {
665 'id': 'CsmdDsKjzN8',
666 'ext': 'mp4',
17ee98e1 667 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
668 'uploader': 'Airtek',
669 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
670 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 671 'license': 'Standard YouTube License',
8a1a26ce
YCH
672 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
673 },
674 'params': {
675 'youtube_include_dash_manifest': True,
676 'format': '135', # bestvideo
be49068d
S
677 },
678 'skip': 'This live event has ended.',
2ee8f5d8 679 },
cf7e015f
S
680 {
681 # Multifeed videos (multiple cameras), URL is for Main Camera
682 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
683 'info_dict': {
684 'id': 'jqWvoWXjCVs',
685 'title': 'teamPGP: Rocket League Noob Stream',
686 'description': 'md5:dc7872fb300e143831327f1bae3af010',
687 },
688 'playlist': [{
689 'info_dict': {
690 'id': 'jqWvoWXjCVs',
691 'ext': 'mp4',
692 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
693 'description': 'md5:dc7872fb300e143831327f1bae3af010',
694 'upload_date': '20150721',
695 'uploader': 'Beer Games Beer',
696 'uploader_id': 'beergamesbeer',
ec85ded8 697 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 698 'license': 'Standard YouTube License',
cf7e015f
S
699 },
700 }, {
701 'info_dict': {
702 'id': '6h8e8xoXJzg',
703 'ext': 'mp4',
704 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
705 'description': 'md5:dc7872fb300e143831327f1bae3af010',
706 'upload_date': '20150721',
707 'uploader': 'Beer Games Beer',
708 'uploader_id': 'beergamesbeer',
ec85ded8 709 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 710 'license': 'Standard YouTube License',
cf7e015f
S
711 },
712 }, {
713 'info_dict': {
714 'id': 'PUOgX5z9xZw',
715 'ext': 'mp4',
716 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
717 'description': 'md5:dc7872fb300e143831327f1bae3af010',
718 'upload_date': '20150721',
719 'uploader': 'Beer Games Beer',
720 'uploader_id': 'beergamesbeer',
ec85ded8 721 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 722 'license': 'Standard YouTube License',
cf7e015f
S
723 },
724 }, {
725 'info_dict': {
726 'id': 'teuwxikvS5k',
727 'ext': 'mp4',
728 'title': 'teamPGP: Rocket League Noob Stream (zim)',
729 'description': 'md5:dc7872fb300e143831327f1bae3af010',
730 'upload_date': '20150721',
731 'uploader': 'Beer Games Beer',
732 'uploader_id': 'beergamesbeer',
ec85ded8 733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 734 'license': 'Standard YouTube License',
cf7e015f
S
735 },
736 }],
737 'params': {
738 'skip_download': True,
739 },
cbaed4bb 740 },
f9f49d87
S
741 {
742 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
743 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
744 'info_dict': {
745 'id': 'gVfLd0zydlo',
746 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
747 },
748 'playlist_count': 2,
be49068d 749 'skip': 'Not multifeed anymore',
f9f49d87 750 },
cbaed4bb 751 {
2d3d2997 752 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 753 'only_matching': True,
0e49d9a6 754 },
6d4fc66b 755 {
2d3d2997 756 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
757 'only_matching': True,
758 },
0e49d9a6 759 {
61f92af1 760 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
761 # Also tests cut-off URL expansion in video description (see
762 # https://github.com/rg3/youtube-dl/issues/1892,
763 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
764 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
765 'info_dict': {
766 'id': 'lsguqyKfVQg',
767 'ext': 'mp4',
768 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 769 'alt_title': 'Dark Walk',
0e49d9a6
LL
770 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
771 'upload_date': '20151119',
772 'uploader_id': 'IronSoulElf',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 774 'uploader': 'IronSoulElf',
7caf9830 775 'license': 'Standard YouTube License',
0cb58b02 776 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
777 },
778 'params': {
779 'skip_download': True,
780 },
781 },
61f92af1
S
782 {
783 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
784 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
785 'only_matching': True,
786 },
313dfc45
LL
787 {
788 # Video with yt:stretch=17:0
789 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
790 'info_dict': {
791 'id': 'Q39EVAstoRM',
792 'ext': 'mp4',
793 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
794 'description': 'md5:ee18a25c350637c8faff806845bddee9',
795 'upload_date': '20151107',
796 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
797 'uploader': 'CH GAMER DROID',
798 },
799 'params': {
800 'skip_download': True,
801 },
be49068d 802 'skip': 'This video does not exist.',
313dfc45 803 },
7caf9830
S
804 {
805 # Video licensed under Creative Commons
806 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
807 'info_dict': {
808 'id': 'M4gD1WSo5mA',
809 'ext': 'mp4',
810 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
811 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
812 'upload_date': '20150127',
813 'uploader_id': 'BerkmanCenter',
ec85ded8 814 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
7caf9830
S
815 'uploader': 'BerkmanCenter',
816 'license': 'Creative Commons Attribution license (reuse allowed)',
817 },
818 'params': {
819 'skip_download': True,
820 },
821 },
fd050249
S
822 {
823 # Channel-like uploader_url
824 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
825 'info_dict': {
826 'id': 'eQcmzGIKrzg',
827 'ext': 'mp4',
828 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
829 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
830 'upload_date': '20151119',
831 'uploader': 'Bernie 2016',
832 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 833 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
834 'license': 'Creative Commons Attribution license (reuse allowed)',
835 },
836 'params': {
837 'skip_download': True,
838 },
839 },
040ac686
S
840 {
841 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
842 'only_matching': True,
7f29cf54
S
843 },
844 {
845 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
846 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
847 'only_matching': True,
6496ccb4
S
848 },
849 {
850 # Rental video preview
851 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
852 'info_dict': {
853 'id': 'uGpuVWrhIzE',
854 'ext': 'mp4',
855 'title': 'Piku - Trailer',
856 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
857 'upload_date': '20150811',
858 'uploader': 'FlixMatrix',
859 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 860 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
861 'license': 'Standard YouTube License',
862 },
863 'params': {
864 'skip_download': True,
865 },
022a5d66 866 },
12afdc2a
S
867 {
868 # YouTube Red video with episode data
869 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
870 'info_dict': {
871 'id': 'iqKdEhx-dD4',
872 'ext': 'mp4',
873 'title': 'Isolation - Mind Field (Ep 1)',
874 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822',
875 'upload_date': '20170118',
876 'uploader': 'Vsauce',
877 'uploader_id': 'Vsauce',
878 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
879 'license': 'Standard YouTube License',
880 'series': 'Mind Field',
881 'season_number': 1,
882 'episode_number': 1,
883 },
884 'params': {
885 'skip_download': True,
886 },
887 'expected_warnings': [
888 'Skipping DASH manifest',
889 ],
890 },
022a5d66
S
891 {
892 # itag 212
893 'url': '1t24XAntNCY',
894 'only_matching': True,
040ac686 895 }
2eb88d95
PH
896 ]
897
e0df6211
PH
898 def __init__(self, *args, **kwargs):
899 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 900 self._player_cache = {}
e0df6211 901
c5e8d7af
PH
902 def report_video_info_webpage_download(self, video_id):
903 """Report attempt to download video info webpage."""
69ea8ca4 904 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 905
c5e8d7af
PH
906 def report_information_extraction(self, video_id):
907 """Report attempt to extract video information."""
69ea8ca4 908 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
909
910 def report_unavailable_format(self, video_id, format):
911 """Report extracted video URL."""
69ea8ca4 912 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
913
914 def report_rtmp_download(self):
915 """Indicate the download will use the RTMP protocol."""
69ea8ca4 916 self.to_screen('RTMP download detected')
c5e8d7af 917
60064c53
PH
918 def _signature_cache_id(self, example_sig):
919 """ Return a string representation of a signature """
78caa52a 920 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
921
922 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 923 id_m = re.match(
50f84a9a 924 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
cf010131 925 player_url)
c081b35c
PH
926 if not id_m:
927 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
928 player_type = id_m.group('ext')
929 player_id = id_m.group('id')
930
c4417ddb 931 # Read from filesystem cache
60064c53
PH
932 func_id = '%s_%s_%s' % (
933 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 934 assert os.path.basename(func_id) == func_id
a0e07d31 935
69ea8ca4 936 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 937 if cache_spec is not None:
78caa52a 938 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 939
6d1a55a5
PH
940 download_note = (
941 'Downloading player %s' % player_url
942 if self._downloader.params.get('verbose') else
943 'Downloading %s player %s' % (player_type, player_id)
944 )
e0df6211
PH
945 if player_type == 'js':
946 code = self._download_webpage(
947 player_url, video_id,
6d1a55a5 948 note=download_note,
69ea8ca4 949 errnote='Download of %s failed' % player_url)
83799698 950 res = self._parse_sig_js(code)
c4417ddb 951 elif player_type == 'swf':
e0df6211
PH
952 urlh = self._request_webpage(
953 player_url, video_id,
6d1a55a5 954 note=download_note,
69ea8ca4 955 errnote='Download of %s failed' % player_url)
e0df6211 956 code = urlh.read()
83799698 957 res = self._parse_sig_swf(code)
e0df6211
PH
958 else:
959 assert False, 'Invalid player type %r' % player_type
960
785521bf
PH
961 test_string = ''.join(map(compat_chr, range(len(example_sig))))
962 cache_res = res(test_string)
963 cache_spec = [ord(c) for c in cache_res]
83799698 964
69ea8ca4 965 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
966 return res
967
60064c53 968 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
969 def gen_sig_code(idxs):
970 def _genslice(start, end, step):
78caa52a 971 starts = '' if start == 0 else str(start)
8bcc8756 972 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 973 steps = '' if step == 1 else (':%d' % step)
78caa52a 974 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
975
976 step = None
7af808a5
PH
977 # Quelch pyflakes warnings - start will be set when step is set
978 start = '(Never used)'
edf3e38e
PH
979 for i, prev in zip(idxs[1:], idxs[:-1]):
980 if step is not None:
981 if i - prev == step:
982 continue
983 yield _genslice(start, prev, step)
984 step = None
985 continue
986 if i - prev in [-1, 1]:
987 step = i - prev
988 start = prev
989 continue
990 else:
78caa52a 991 yield 's[%d]' % prev
edf3e38e 992 if step is None:
78caa52a 993 yield 's[%d]' % i
edf3e38e
PH
994 else:
995 yield _genslice(start, i, step)
996
78caa52a 997 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 998 cache_res = func(test_string)
edf3e38e 999 cache_spec = [ord(c) for c in cache_res]
78caa52a 1000 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1001 signature_id_tuple = '(%s)' % (
1002 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1003 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1004 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1005 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1006
e0df6211
PH
1007 def _parse_sig_js(self, jscode):
1008 funcname = self._search_regex(
aaeb86f6 1009 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
8bcc8756 1010 'Initial JS player signature function name')
2b25cb5d
PH
1011
1012 jsi = JSInterpreter(jscode)
1013 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1014 return lambda s: initial_function([s])
1015
1016 def _parse_sig_swf(self, file_contents):
54256267 1017 swfi = SWFInterpreter(file_contents)
78caa52a 1018 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1019 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1020 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1021 return lambda s: initial_function([s])
1022
83799698 1023 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1024 """Turn the encrypted s field into a working signature"""
6b37f0be 1025
c8bf86d5 1026 if player_url is None:
69ea8ca4 1027 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1028
69ea8ca4 1029 if player_url.startswith('//'):
78caa52a 1030 player_url = 'https:' + player_url
c8bf86d5 1031 try:
62af3a0e 1032 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1033 if player_id not in self._player_cache:
1034 func = self._extract_signature_function(
60064c53 1035 video_id, player_url, s
c8bf86d5
PH
1036 )
1037 self._player_cache[player_id] = func
1038 func = self._player_cache[player_id]
1039 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1040 self._print_sig_code(func, s)
c8bf86d5
PH
1041 return func(s)
1042 except Exception as e:
1043 tb = traceback.format_exc()
1044 raise ExtractorError(
78caa52a 1045 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1046
360e1ca5 1047 def _get_subtitles(self, video_id, webpage):
de7f3446 1048 try:
60e47a26 1049 subs_doc = self._download_xml(
38c2e5b8 1050 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1051 video_id, note=False)
1052 except ExtractorError as err:
9b9c5355 1053 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1054 return {}
de7f3446
JMF
1055
1056 sub_lang_list = {}
60e47a26
JMF
1057 for track in subs_doc.findall('track'):
1058 lang = track.attrib['lang_code']
7e660ac1
LD
1059 if lang in sub_lang_list:
1060 continue
360e1ca5 1061 sub_formats = []
23d17e4b 1062 for ext in self._SUBTITLE_FORMATS:
15707c7e 1063 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1064 'lang': lang,
1065 'v': video_id,
1066 'fmt': ext,
1067 'name': track.attrib['name'].encode('utf-8'),
1068 })
1069 sub_formats.append({
1070 'url': 'https://www.youtube.com/api/timedtext?' + params,
1071 'ext': ext,
1072 })
1073 sub_lang_list[lang] = sub_formats
de7f3446 1074 if not sub_lang_list:
69ea8ca4 1075 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1076 return {}
1077 return sub_lang_list
1078
a72778d3
S
1079 def _get_ytplayer_config(self, video_id, webpage):
1080 patterns = (
526b3b07
S
1081 # User data may contain arbitrary character sequences that may affect
1082 # JSON extraction with regex, e.g. when '};' is contained the second
1083 # regex won't capture the whole JSON. Yet working around by trying more
1084 # concrete regex first keeping in mind proper quoted string handling
1085 # to be implemented in future that will replace this workaround (see
1086 # https://github.com/rg3/youtube-dl/issues/7468,
1087 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1088 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1089 r';ytplayer\.config\s*=\s*({.+?});',
1090 )
1091 config = self._search_regex(
1092 patterns, webpage, 'ytplayer.config', default=None)
1093 if config:
1094 return self._parse_json(
1095 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1096
360e1ca5 1097 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1098 """We need the webpage for getting the captions url, pass it as an
1099 argument to speed up the process."""
69ea8ca4 1100 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1101 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1102 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1103 if not player_config:
de7f3446
JMF
1104 self._downloader.report_warning(err_msg)
1105 return {}
de7f3446 1106 try:
0792d563 1107 args = player_config['args']
b78b292f
S
1108 caption_url = args.get('ttsurl')
1109 if caption_url:
1110 timestamp = args['timestamp']
1111 # We get the available subtitles
15707c7e 1112 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1113 'type': 'list',
1114 'tlangs': 1,
1115 'asrs': 1,
1116 })
1117 list_url = caption_url + '&' + list_params
1118 caption_list = self._download_xml(list_url, video_id)
1119 original_lang_node = caption_list.find('track')
1120 if original_lang_node is None:
1121 self._downloader.report_warning('Video doesn\'t have automatic captions')
1122 return {}
1123 original_lang = original_lang_node.attrib['lang_code']
1124 caption_kind = original_lang_node.attrib.get('kind', '')
1125
1126 sub_lang_list = {}
1127 for lang_node in caption_list.findall('target'):
1128 sub_lang = lang_node.attrib['lang_code']
1129 sub_formats = []
1130 for ext in self._SUBTITLE_FORMATS:
15707c7e 1131 params = compat_urllib_parse_urlencode({
b78b292f
S
1132 'lang': original_lang,
1133 'tlang': sub_lang,
1134 'fmt': ext,
1135 'ts': timestamp,
1136 'kind': caption_kind,
1137 })
1138 sub_formats.append({
1139 'url': caption_url + '&' + params,
1140 'ext': ext,
1141 })
1142 sub_lang_list[sub_lang] = sub_formats
1143 return sub_lang_list
1144
1145 # Some videos don't provide ttsurl but rather caption_tracks and
1146 # caption_translation_languages (e.g. 20LmZk1hakA)
1147 caption_tracks = args['caption_tracks']
1148 caption_translation_languages = args['caption_translation_languages']
1149 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
15707c7e 1150 parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
b78b292f 1151 caption_qs = compat_parse_qs(parsed_caption_url.query)
055e6f36
JMF
1152
1153 sub_lang_list = {}
b78b292f
S
1154 for lang in caption_translation_languages.split(','):
1155 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1156 sub_lang = lang_qs.get('lc', [None])[0]
1157 if not sub_lang:
1158 continue
360e1ca5 1159 sub_formats = []
23d17e4b 1160 for ext in self._SUBTITLE_FORMATS:
b78b292f
S
1161 caption_qs.update({
1162 'tlang': [sub_lang],
1163 'fmt': [ext],
360e1ca5 1164 })
b78b292f 1165 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
15707c7e 1166 query=compat_urllib_parse_urlencode(caption_qs, True)))
360e1ca5 1167 sub_formats.append({
b78b292f 1168 'url': sub_url,
360e1ca5
JMF
1169 'ext': ext,
1170 })
1171 sub_lang_list[sub_lang] = sub_formats
055e6f36 1172 return sub_lang_list
de7f3446
JMF
1173 # An extractor error can be raise by the download process if there are
1174 # no automatic captions but there are subtitles
1175 except (KeyError, ExtractorError):
1176 self._downloader.report_warning(err_msg)
1177 return {}
1178
d77ab8e2
S
1179 def _mark_watched(self, video_id, video_info):
1180 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1181 if not playback_url:
1182 return
1183 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1184 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1185
1186 # cpn generation algorithm is reverse engineered from base.js.
1187 # In fact it works even with dummy cpn.
1188 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1189 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1190
1191 qs.update({
1192 'ver': ['2'],
1193 'cpn': [cpn],
1194 })
1195 playback_url = compat_urlparse.urlunparse(
15707c7e 1196 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1197
1198 self._download_webpage(
1199 playback_url, video_id, 'Marking watched',
1200 'Unable to mark watched', fatal=False)
1201
97665381
PH
1202 @classmethod
1203 def extract_id(cls, url):
1204 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1205 if mobj is None:
69ea8ca4 1206 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1207 video_id = mobj.group(2)
1208 return video_id
1209
1d043b93
JMF
1210 def _extract_from_m3u8(self, manifest_url, video_id):
1211 url_map = {}
5f6a1245 1212
1d043b93
JMF
1213 def _get_urls(_manifest):
1214 lines = _manifest.split('\n')
1215 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 1216 lines)
1d043b93 1217 return urls
78caa52a 1218 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
1219 formats_urls = _get_urls(manifest)
1220 for format_url in formats_urls:
890f62e8 1221 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1222 url_map[itag] = format_url
1223 return url_map
1224
1fb07d10
JG
1225 def _extract_annotations(self, video_id):
1226 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1227 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1228
c5e8d7af 1229 def _real_extract(self, url):
cf7e015f
S
1230 url, smuggled_data = unsmuggle_url(url, {})
1231
7e8c0af0 1232 proto = (
78caa52a
PH
1233 'http' if self._downloader.params.get('prefer_insecure', False)
1234 else 'https')
7e8c0af0 1235
7c80519c 1236 start_time = None
297a564b 1237 end_time = None
7c80519c
JMF
1238 parsed_url = compat_urllib_parse_urlparse(url)
1239 for component in [parsed_url.fragment, parsed_url.query]:
1240 query = compat_parse_qs(component)
297a564b 1241 if start_time is None and 't' in query:
7c80519c 1242 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1243 if start_time is None and 'start' in query:
1244 start_time = parse_duration(query['start'][0])
297a564b
JMF
1245 if end_time is None and 'end' in query:
1246 end_time = parse_duration(query['end'][0])
7c80519c 1247
c5e8d7af
PH
1248 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1249 mobj = re.search(self._NEXT_URL_RE, url)
1250 if mobj:
7fd002c0 1251 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1252 video_id = self.extract_id(url)
c5e8d7af
PH
1253
1254 # Get video webpage
aa79ac0c 1255 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1256 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1257
1258 # Attempt to extract SWF player URL
e0df6211 1259 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1260 if mobj is not None:
1261 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1262 else:
1263 player_url = None
1264
d8d24a92
S
1265 dash_mpds = []
1266
1267 def add_dash_mpd(video_info):
1268 dash_mpd = video_info.get('dashmpd')
1269 if dash_mpd and dash_mpd[0] not in dash_mpds:
1270 dash_mpds.append(dash_mpd[0])
1271
c5e8d7af 1272 # Get video info
6449cd80 1273 embed_webpage = None
2fe1ff85 1274 is_live = None
c108eb73 1275 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1276 age_gate = True
1277 # We simulate the access to the video from www.youtube.com/v/{video_id}
1278 # this can be viewed without login into Youtube
beb95e77
CL
1279 url = proto + '://www.youtube.com/embed/%s' % video_id
1280 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1281 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1282 'video_id': video_id,
1283 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1284 'sts': self._search_regex(
beb95e77 1285 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1286 })
7e8c0af0 1287 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1288 video_info_webpage = self._download_webpage(
1289 video_info_url, video_id,
20436c30 1290 note='Refetching age-gated info webpage',
94bd3613 1291 errnote='unable to download video info webpage')
c5e8d7af 1292 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1293 add_dash_mpd(video_info)
c108eb73
JMF
1294 else:
1295 age_gate = False
bc93bdb5 1296 video_info = None
d8d24a92 1297 # Try looking directly into the video webpage
a72778d3
S
1298 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1299 if ytplayer_config:
4e62ebe2 1300 args = ytplayer_config['args']
d8d24a92
S
1301 if args.get('url_encoded_fmt_stream_map'):
1302 # Convert to the same format returned by compat_parse_qs
1303 video_info = dict((k, [v]) for k, v in args.items())
1304 add_dash_mpd(video_info)
6496ccb4
S
1305 # Rental video is not rented but preview is available (e.g.
1306 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1307 # https://github.com/rg3/youtube-dl/issues/10532)
1308 if not video_info and args.get('ypc_vid'):
1309 return self.url_result(
1310 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1311 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1312 is_live = True
0a3cf9ad
S
1313 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1314 # We also try looking in get_video_info since it may contain different dashmpd
1315 # URL that points to a DASH manifest with possibly different itag set (some itags
1316 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1317 # manifest pointed by get_video_info's dashmpd).
1318 # The general idea is to take a union of itags of both DASH manifests (for example
1319 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1320 self.report_video_info_webpage_download(video_id)
0a3cf9ad 1321 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
1322 video_info_url = (
1323 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1324 % (proto, video_id, el_type))
1325 video_info_webpage = self._download_webpage(
1326 video_info_url,
4e62ebe2
JMF
1327 video_id, note=False,
1328 errnote='unable to download video info webpage')
0a3cf9ad 1329 get_video_info = compat_parse_qs(video_info_webpage)
87dc4511
JMF
1330 if get_video_info.get('use_cipher_signature') != ['True']:
1331 add_dash_mpd(get_video_info)
0a3cf9ad
S
1332 if not video_info:
1333 video_info = get_video_info
1334 if 'token' in get_video_info:
89ea063e
S
1335 # Different get_video_info requests may report different results, e.g.
1336 # some may report video unavailability, but some may serve it without
1337 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1338 # the original webpage as well as el=info and el=embedded get_video_info
1339 # requests report video unavailability due to geo restriction while
1340 # el=detailpage succeeds and returns valid data). This is probably
1341 # due to YouTube measures against IP ranges of hosting providers.
1342 # Working around by preferring the first succeeded video_info containing
1343 # the token if no such video_info yet was found.
44b2264f
S
1344 if 'token' not in video_info:
1345 video_info = get_video_info
4e62ebe2 1346 break
c5e8d7af
PH
1347 if 'token' not in video_info:
1348 if 'reason' in video_info:
af214c3a
YCH
1349 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1350 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
678e436f 1351 if regions_allowed:
af214c3a
YCH
1352 raise ExtractorError('YouTube said: This video is available in %s only' % (
1353 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1354 expected=True)
d11271dd 1355 raise ExtractorError(
78caa52a 1356 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1357 expected=True, video_id=video_id)
c5e8d7af 1358 else:
d11271dd 1359 raise ExtractorError(
78caa52a 1360 '"token" parameter not in video info for unknown reason',
d11271dd 1361 video_id=video_id)
c5e8d7af 1362
cf7e015f
S
1363 # title
1364 if 'title' in video_info:
1365 video_title = video_info['title'][0]
1366 else:
1367 self._downloader.report_warning('Unable to extract video title')
1368 video_title = '_'
1369
1370 # description
1371 video_description = get_element_by_id("eow-description", video_webpage)
1372 if video_description:
1373 video_description = re.sub(r'''(?x)
1374 <a\s+
25cb7a0e 1375 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1376 (?:title|href)="([^"]+)"\s+
25cb7a0e 1377 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1378 class="[^"]*"[^>]*>
23f13e97 1379 [^<]+\.{3}\s*
cf7e015f
S
1380 </a>
1381 ''', r'\1', video_description)
1382 video_description = clean_html(video_description)
1383 else:
1384 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1385 if fd_mobj:
1386 video_description = unescapeHTML(fd_mobj.group(1))
1387 else:
1388 video_description = ''
1389
5e1eddb9
S
1390 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1391 if not self._downloader.params.get('noplaylist'):
1392 entries = []
1393 feed_ids = []
6863631c 1394 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1395 for feed in multifeed_metadata_list.split(','):
6863631c
S
1396 # Unquote should take place before split on comma (,) since textual
1397 # fields may contain comma as well (see
1398 # https://github.com/rg3/youtube-dl/issues/8536)
1399 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1400 entries.append({
1401 '_type': 'url_transparent',
1402 'ie_key': 'Youtube',
1403 'url': smuggle_url(
1404 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1405 {'force_singlefeed': True}),
1406 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1407 })
1408 feed_ids.append(feed_data['id'][0])
1409 self.to_screen(
1410 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1411 % (', '.join(feed_ids), video_id))
1412 return self.playlist_result(entries, video_id, video_title, video_description)
1413 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1414
1d699755
PH
1415 if 'view_count' in video_info:
1416 view_count = int(video_info['view_count'][0])
1417 else:
1418 view_count = None
1419
c5e8d7af
PH
1420 # Check for "rental" videos
1421 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 1422 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
1423
1424 # Start extracting information
1425 self.report_information_extraction(video_id)
1426
1427 # uploader
1428 if 'author' not in video_info:
69ea8ca4 1429 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1430 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1431
1432 # uploader_id
1433 video_uploader_id = None
fd050249
S
1434 video_uploader_url = None
1435 mobj = re.search(
1436 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1437 video_webpage)
c5e8d7af 1438 if mobj is not None:
fd050249
S
1439 video_uploader_id = mobj.group('uploader_id')
1440 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1441 else:
69ea8ca4 1442 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1443
c5e8d7af 1444 # thumbnail image
7763b04e
JMF
1445 # We try first to get a high quality image:
1446 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1447 video_webpage, re.DOTALL)
1448 if m_thumb is not None:
1449 video_thumbnail = m_thumb.group(1)
1450 elif 'thumbnail_url' not in video_info:
69ea8ca4 1451 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1452 video_thumbnail = None
c5e8d7af 1453 else: # don't panic if we can't find it
7fd002c0 1454 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1455
1456 # upload date
9d0b581f
S
1457 upload_date = self._html_search_meta(
1458 'datePublished', video_webpage, 'upload date', default=None)
1459 if not upload_date:
1460 upload_date = self._search_regex(
1461 [r'(?s)id="eow-date.*?>(.*?)</span>',
1462 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1463 video_webpage, 'upload date', default=None)
1464 if upload_date:
1465 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1466 upload_date = unified_strdate(upload_date)
c5e8d7af 1467
7caf9830
S
1468 video_license = self._html_search_regex(
1469 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1470 video_webpage, 'license', default=None)
1471
0cb58b02
S
1472 m_music = re.search(
1473 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1474 video_webpage)
1475 if m_music:
1476 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1477 video_creator = clean_html(m_music.group('creator'))
1478 else:
1479 video_alt_title = video_creator = None
1480
12afdc2a
S
1481 m_episode = re.search(
1482 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1483 video_webpage)
1484 if m_episode:
1485 series = m_episode.group('series')
1486 season_number = int(m_episode.group('season'))
1487 episode_number = int(m_episode.group('episode'))
1488 else:
1489 series = season_number = episode_number = None
1490
55f7bd2d
PH
1491 m_cat_container = self._search_regex(
1492 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1493 video_webpage, 'categories', default=None)
ec8deefc 1494 if m_cat_container:
ad3bc6ac 1495 category = self._html_search_regex(
01ed5c9b 1496 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1497 default=None)
1498 video_categories = None if category is None else [category]
1499 else:
1500 video_categories = None
ec8deefc 1501
000b6b5a
S
1502 video_tags = [
1503 unescapeHTML(m.group('content'))
1504 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1505
f30a38be 1506 def _extract_count(count_name):
c93d53f5
S
1507 return str_to_int(self._search_regex(
1508 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1509 % re.escape(count_name),
1510 video_webpage, count_name, default=None))
1511
69ea8ca4
PH
1512 like_count = _extract_count('like')
1513 dislike_count = _extract_count('dislike')
336c3a69 1514
c5e8d7af 1515 # subtitles
d82134c3 1516 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1517 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af
PH
1518
1519 if 'length_seconds' not in video_info:
69ea8ca4 1520 self._downloader.report_warning('unable to extract video duration')
b466b702 1521 video_duration = None
c5e8d7af 1522 else:
7fd002c0 1523 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 1524
1fb07d10
JG
1525 # annotations
1526 video_annotations = None
1527 if self._downloader.params.get('writeannotations', False):
5f6a1245 1528 video_annotations = self._extract_annotations(video_id)
1fb07d10 1529
dd27fd17
PH
1530 def _map_to_format_list(urlmap):
1531 formats = []
1532 for itag, video_real_url in urlmap.items():
1533 dct = {
1534 'format_id': itag,
1535 'url': video_real_url,
1536 'player_url': player_url,
1537 }
0b65e5d4
PH
1538 if itag in self._formats:
1539 dct.update(self._formats[itag])
dd27fd17
PH
1540 formats.append(dct)
1541 return formats
1542
c5e8d7af
PH
1543 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1544 self.report_rtmp_download()
dd27fd17
PH
1545 formats = [{
1546 'format_id': '_rtmp',
1547 'protocol': 'rtmp',
1548 'url': video_info['conn'][0],
1549 'player_url': player_url,
1550 }]
24270b03 1551 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1552 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1553 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1554 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1555 formats_spec = {}
82156fdb 1556 fmt_list = video_info.get('fmt_list', [''])[0]
1557 if fmt_list:
1558 for fmt in fmt_list.split(','):
1559 spec = fmt.split('/')
3318832e 1560 if len(spec) > 1:
1561 width_height = spec[1].split('x')
1562 if len(width_height) == 2:
1563 formats_spec[spec[0]] = {
1564 'resolution': spec[1],
1565 'width': int_or_none(width_height[0]),
1566 'height': int_or_none(width_height[1]),
1567 }
c9afb51c 1568 formats = []
00fe14fc 1569 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1570 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1571 if 'itag' not in url_data or 'url' not in url_data:
1572 continue
1573 format_id = url_data['itag'][0]
1574 url = url_data['url'][0]
1575
1576 if 'sig' in url_data:
1577 url += '&signature=' + url_data['sig'][0]
1578 elif 's' in url_data:
1579 encrypted_sig = url_data['s'][0]
6449cd80 1580 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1581
beb95e77 1582 jsplayer_url_json = self._search_regex(
6449cd80
PH
1583 ASSETS_RE,
1584 embed_webpage if age_gate else video_webpage,
1585 'JS player URL (1)', default=None)
1586 if not jsplayer_url_json and not age_gate:
1587 # We need the embed website after all
1588 if embed_webpage is None:
1589 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1590 embed_webpage = self._download_webpage(
1591 embed_url, video_id, 'Downloading embed webpage')
1592 jsplayer_url_json = self._search_regex(
1593 ASSETS_RE, embed_webpage, 'JS player URL')
1594
beb95e77 1595 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1596 if player_url is None:
1597 player_url_json = self._search_regex(
1598 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1599 video_webpage, 'age gate player URL')
201e9eaa
PH
1600 player_url = json.loads(player_url_json)
1601
1602 if self._downloader.params.get('verbose'):
cf010131 1603 if player_url is None:
201e9eaa
PH
1604 player_version = 'unknown'
1605 player_desc = 'unknown'
1606 else:
1607 if player_url.endswith('swf'):
1608 player_version = self._search_regex(
1609 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1610 'flash player', fatal=False)
201e9eaa 1611 player_desc = 'flash player %s' % player_version
cf010131 1612 else:
201e9eaa 1613 player_version = self._search_regex(
50f84a9a 1614 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
201e9eaa
PH
1615 player_url,
1616 'html5 player', fatal=False)
78caa52a 1617 player_desc = 'html5 player %s' % player_version
201e9eaa 1618
60064c53 1619 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1620 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1621 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1622
1623 signature = self._decrypt_signature(
1624 encrypted_sig, video_id, player_url, age_gate)
1625 url += '&signature=' + signature
1626 if 'ratebypass' not in url:
1627 url += '&ratebypass=yes'
c9afb51c 1628
94278f72
YCH
1629 dct = {
1630 'format_id': format_id,
1631 'url': url,
1632 'player_url': player_url,
1633 }
1634 if format_id in self._formats:
1635 dct.update(self._formats[format_id])
3318832e 1636 if format_id in formats_spec:
1637 dct.update(formats_spec[format_id])
94278f72 1638
aabc2be6
S
1639 # Some itags are not included in DASH manifest thus corresponding formats will
1640 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1641 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1642 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1643 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1644
1645 more_fields = {
c9afb51c 1646 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1647 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1648 'width': width,
1649 'height': height,
1650 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1651 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1652 }
94278f72
YCH
1653 for key, value in more_fields.items():
1654 if value:
1655 dct[key] = value
aabc2be6
S
1656 type_ = url_data.get('type', [None])[0]
1657 if type_:
1658 type_split = type_.split(';')
1659 kind_ext = type_split[0].split('/')
1660 if len(kind_ext) == 2:
94278f72
YCH
1661 kind, _ = kind_ext
1662 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1663 if kind in ('audio', 'video'):
1664 codecs = None
1665 for mobj in re.finditer(
1666 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1667 if mobj.group('key') == 'codecs':
1668 codecs = mobj.group('val')
1669 break
1670 if codecs:
1671 codecs = codecs.split(',')
1672 if len(codecs) == 2:
cc28492d 1673 acodec, vcodec = codecs[1], codecs[0]
aabc2be6
S
1674 else:
1675 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1676 dct.update({
1677 'acodec': acodec,
1678 'vcodec': vcodec,
1679 })
aabc2be6 1680 formats.append(dct)
1d043b93
JMF
1681 elif video_info.get('hlsvp'):
1682 manifest_url = video_info['hlsvp'][0]
1683 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1684 formats = _map_to_format_list(url_map)
ac5a69af
YCH
1685 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1686 for a_format in formats:
049d71d8 1687 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
c5e8d7af 1688 else:
8ceabd4d
S
1689 unavailable_message = self._html_search_regex(
1690 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1691 video_webpage, 'unavailable message', default=None)
1692 if unavailable_message:
1693 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1694 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1695
dd27fd17 1696 # Look for the DASH manifest
203fb43f 1697 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1698 dash_mpd_fatal = True
8ff648e4 1699 for mpd_url in dash_mpds:
d8d24a92 1700 dash_formats = {}
774e208f 1701 try:
05d0d131
YCH
1702 def decrypt_sig(mobj):
1703 s = mobj.group(1)
1704 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1705 return '/signature/%s' % dec_s
1706
8ff648e4 1707 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1708
8ff648e4 1709 for df in self._extract_mpd_formats(
1710 mpd_url, video_id, fatal=dash_mpd_fatal,
1711 formats_dict=self._formats):
d8d24a92
S
1712 # Do not overwrite DASH format found in some previous DASH manifest
1713 if df['format_id'] not in dash_formats:
1714 dash_formats[df['format_id']] = df
77c6fb5b
S
1715 # Additional DASH manifests may end up in HTTP Error 403 therefore
1716 # allow them to fail without bug report message if we already have
1717 # some DASH manifest succeeded. This is temporary workaround to reduce
1718 # burst of bug reports until we figure out the reason and whether it
1719 # can be fixed at all.
1720 dash_mpd_fatal = False
774e208f
PH
1721 except (ExtractorError, KeyError) as e:
1722 self.report_warning(
1723 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1724 if dash_formats:
04b3b3df
JMF
1725 # Remove the formats we found through non-DASH, they
1726 # contain less info and it can be wrong, because we use
1727 # fixed values (for example the resolution). See
1728 # https://github.com/rg3/youtube-dl/issues/5774 for an
1729 # example.
d80265cc 1730 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1731 formats.extend(dash_formats.values())
d80044c2 1732
6271f1ca
PH
1733 # Check for malformed aspect ratio
1734 stretched_m = re.search(
1735 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1736 video_webpage)
1737 if stretched_m:
313dfc45
LL
1738 w = float(stretched_m.group('w'))
1739 h = float(stretched_m.group('h'))
5faf9fed
S
1740 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1741 # We will only process correct ratios.
313dfc45 1742 if w > 0 and h > 0:
41f24c32 1743 ratio = w / h
313dfc45
LL
1744 for f in formats:
1745 if f.get('vcodec') != 'none':
1746 f['stretched_ratio'] = ratio
6271f1ca 1747
4bcc7bd1 1748 self._sort_formats(formats)
4ea3be0a 1749
d77ab8e2
S
1750 self.mark_watched(video_id, video_info)
1751
4ea3be0a 1752 return {
8bcc8756
JW
1753 'id': video_id,
1754 'uploader': video_uploader,
1755 'uploader_id': video_uploader_id,
fd050249 1756 'uploader_url': video_uploader_url,
8bcc8756 1757 'upload_date': upload_date,
7caf9830 1758 'license': video_license,
0cb58b02 1759 'creator': video_creator,
8bcc8756 1760 'title': video_title,
0cb58b02 1761 'alt_title': video_alt_title,
8bcc8756
JW
1762 'thumbnail': video_thumbnail,
1763 'description': video_description,
1764 'categories': video_categories,
000b6b5a 1765 'tags': video_tags,
8bcc8756 1766 'subtitles': video_subtitles,
360e1ca5 1767 'automatic_captions': automatic_captions,
8bcc8756
JW
1768 'duration': video_duration,
1769 'age_limit': 18 if age_gate else 0,
1770 'annotations': video_annotations,
7e8c0af0 1771 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1772 'view_count': view_count,
4ea3be0a 1773 'like_count': like_count,
1774 'dislike_count': dislike_count,
2d30521a 1775 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1776 'formats': formats,
2fe1ff85 1777 'is_live': is_live,
7c80519c 1778 'start_time': start_time,
297a564b 1779 'end_time': end_time,
12afdc2a
S
1780 'series': series,
1781 'season_number': season_number,
1782 'episode_number': episode_number,
4ea3be0a 1783 }
c5e8d7af 1784
5f6a1245 1785
40805306 1786class YoutubeSharedVideoIE(InfoExtractor):
fd8c8c7d 1787 _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
40805306
YCH
1788 IE_NAME = 'youtube:shared'
1789
1790 _TEST = {
1791 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1792 'info_dict': {
1793 'id': 'uPDB5I9wfp8',
1794 'ext': 'webm',
1795 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1796 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1797 'upload_date': '20160219',
1798 'uploader': 'Pocoyo - Português (BR)',
1799 'uploader_id': 'PocoyoBrazil',
1800 },
1801 'add_ie': ['Youtube'],
1802 'params': {
1803 # There are already too many Youtube downloads
1804 'skip_download': True,
1805 },
1806 }
1807
1808 def _real_extract(self, url):
1809 video_id = self._match_id(url)
1810
1811 webpage = self._download_webpage(url, video_id)
1812
1813 real_video_id = self._html_search_meta(
1814 'videoId', webpage, 'YouTube video id', fatal=True)
1815
1816 return self.url_result(real_video_id, YoutubeIE.ie_key())
1817
1818
8e7aad20 1819class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1820 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1821 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1822 (?:https?://)?
1823 (?:\w+\.)?
c5e8d7af 1824 (?:
feaa5ad7
S
1825 youtube\.com/
1826 (?:
1827 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1828 \? (?:.*?[&;])*? (?:p|a|list)=
1829 | p/
1830 )|
1831 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 1832 )
d67cc9fa 1833 (
99209c29 1834 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
5f6a1245 1835 # Top tracks, they can also include dots
d67cc9fa
JMF
1836 |(?:MC)[\w\.]*
1837 )
c5e8d7af
PH
1838 .*
1839 |
99209c29 1840 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
c5e8d7af 1841 )"""
c867adc6 1842 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
648e6a1f 1843 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 1844 IE_NAME = 'youtube:playlist'
81127aa5
PH
1845 _TESTS = [{
1846 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1847 'info_dict': {
1848 'title': 'ytdl test PL',
a1cf99d0 1849 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1850 },
1851 'playlist_count': 3,
9291475f
PH
1852 }, {
1853 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1854 'info_dict': {
acf757f4 1855 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1856 'title': 'YDL_Empty_List',
1857 },
1858 'playlist_count': 0,
4201ba13 1859 'skip': 'This playlist is private',
9291475f
PH
1860 }, {
1861 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1862 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1863 'info_dict': {
1864 'title': '29C3: Not my department',
acf757f4 1865 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1866 },
1867 'playlist_count': 95,
1868 }, {
1869 'note': 'issue #673',
1870 'url': 'PLBB231211A4F62143',
1871 'info_dict': {
f46a8702 1872 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1873 'id': 'PLBB231211A4F62143',
9291475f
PH
1874 },
1875 'playlist_mincount': 26,
1876 }, {
1877 'note': 'Large playlist',
1878 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1879 'info_dict': {
1880 'title': 'Uploads from Cauchemar',
acf757f4 1881 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1882 },
1883 'playlist_mincount': 799,
1884 }, {
1885 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1886 'info_dict': {
1887 'title': 'YDL_safe_search',
acf757f4 1888 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1889 },
1890 'playlist_count': 2,
4201ba13 1891 'skip': 'This playlist is private',
ac7553d0
PH
1892 }, {
1893 'note': 'embedded',
2d3d2997 1894 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
1895 'playlist_count': 4,
1896 'info_dict': {
1897 'title': 'JODA15',
acf757f4 1898 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1899 }
6b08cdf6
PH
1900 }, {
1901 'note': 'Embedded SWF player',
2d3d2997 1902 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
1903 'playlist_count': 4,
1904 'info_dict': {
1905 'title': 'JODA7',
acf757f4 1906 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1907 }
4b7df0d3
JMF
1908 }, {
1909 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1910 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1911 'info_dict': {
acf757f4
PH
1912 'title': 'Uploads from Interstellar Movie',
1913 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 1914 },
481cc733 1915 'playlist_mincount': 21,
dacb3a86
S
1916 }, {
1917 # Playlist URL that does not actually serve a playlist
1918 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
1919 'info_dict': {
1920 'id': 'FqZTN594JQw',
1921 'ext': 'webm',
1922 'title': "Smiley's People 01 detective, Adventure Series, Action",
1923 'uploader': 'STREEM',
1924 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 1925 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
1926 'upload_date': '20150526',
1927 'license': 'Standard YouTube License',
1928 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
1929 'categories': ['People & Blogs'],
1930 'tags': list,
1931 'like_count': int,
1932 'dislike_count': int,
1933 },
1934 'params': {
1935 'skip_download': True,
1936 },
1937 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
1938 }, {
1939 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
1940 'info_dict': {
1941 'id': 'yeWKywCrFtk',
1942 'ext': 'mp4',
1943 'title': 'Small Scale Baler and Braiding Rugs',
1944 'uploader': 'Backus-Page House Museum',
1945 'uploader_id': 'backuspagemuseum',
ec85ded8 1946 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
1947 'upload_date': '20161008',
1948 'license': 'Standard YouTube License',
1949 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
1950 'categories': ['Nonprofits & Activism'],
1951 'tags': list,
1952 'like_count': int,
1953 'dislike_count': int,
1954 },
1955 'params': {
1956 'noplaylist': True,
1957 'skip_download': True,
1958 },
feaa5ad7
S
1959 }, {
1960 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
1961 'only_matching': True,
81127aa5 1962 }]
c5e8d7af 1963
880e1c52
JMF
1964 def _real_initialize(self):
1965 self._login()
1966
652cdaa2 1967 def _extract_mix(self, playlist_id):
99209c29 1968 # The mixes are generated from a single video
652cdaa2 1969 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
1970 ids = []
1971 last_id = playlist_id[-11:]
1972 for n in itertools.count(1):
1973 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1974 webpage = self._download_webpage(
1975 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
1976 new_ids = orderedSet(re.findall(
1977 r'''(?xs)data-video-username=".*?".*?
1978 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1979 webpage))
1980 # Fetch new pages until all the videos are repeated, it seems that
1981 # there are always 51 unique videos.
1982 new_ids = [_id for _id in new_ids if _id not in ids]
1983 if not new_ids:
1984 break
1985 ids.extend(new_ids)
1986 last_id = ids[-1]
1987
1988 url_results = self._ids_to_results(ids)
1989
bc2f773b 1990 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
1991 title_span = (
1992 search_title('playlist-title') or
1993 search_title('title long-title') or
1994 search_title('title'))
76d1700b 1995 title = clean_html(title_span)
652cdaa2
JMF
1996
1997 return self.playlist_result(url_results, playlist_id, title)
1998
448830ce 1999 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2000 url = self._TEMPLATE_URL % playlist_id
2001 page = self._download_webpage(url, playlist_id)
dbb94fb0 2002
8bc0800d
G
2003 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2004 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2005 match = match.strip()
2006 # Check if the playlist exists or is private
4201ba13
S
2007 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2008 if mobj:
2009 reason = mobj.group('reason')
2010 message = 'This playlist %s' % reason
2011 if 'private' in reason:
2012 message += ', use --username or --netrc to access it'
2013 message += '.'
2014 raise ExtractorError(message, expected=True)
39b62db1
YCH
2015 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2016 raise ExtractorError(
2017 'Invalid parameters. Maybe URL is incorrect.',
2018 expected=True)
2019 elif re.match(r'[^<]*Choose your language[^<]*', match):
2020 continue
2021 else:
2022 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2023
dbb94fb0 2024 playlist_title = self._html_search_regex(
63b4295d 2025 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2026 page, 'title', default=None)
c5e8d7af 2027
dacb3a86
S
2028 has_videos = True
2029
2030 if not playlist_title:
2031 try:
2032 # Some playlist URLs don't actually serve a playlist (e.g.
2033 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2034 next(self._entries(page, playlist_id))
2035 except StopIteration:
2036 has_videos = False
2037
2038 return has_videos, self.playlist_result(
2039 self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 2040
ebf1b291 2041 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2042 # Check if it's a video-specific URL
2043 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733
S
2044 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2045 r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
2046 'video id', default=None)
2047 if video_id:
448830ce
S
2048 if self._downloader.params.get('noplaylist'):
2049 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2050 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2051 else:
2052 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2053 return video_id, None
2054 return None, None
448830ce 2055
ebf1b291
S
2056 def _real_extract(self, url):
2057 # Extract playlist id
2058 mobj = re.match(self._VALID_URL, url)
2059 if mobj is None:
2060 raise ExtractorError('Invalid URL: %s' % url)
2061 playlist_id = mobj.group(1) or mobj.group(2)
2062
dacb3a86 2063 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2064 if video:
2065 return video
2066
466a6145 2067 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2068 # Mixes require a custom extraction process
2069 return self._extract_mix(playlist_id)
2070
dacb3a86
S
2071 has_videos, playlist = self._extract_playlist(playlist_id)
2072 if has_videos or not video_id:
2073 return playlist
2074
2075 # Some playlist URLs don't actually serve a playlist (see
2076 # https://github.com/rg3/youtube-dl/issues/10537).
2077 # Fallback to plain video extraction if there is a video id
2078 # along with playlist id.
2079 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2080
c5e8d7af 2081
648e6a1f 2082class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2083 IE_DESC = 'YouTube.com channels'
9ff67727 2084 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2085 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2086 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2087 IE_NAME = 'youtube:channel'
cdc628a4
PH
2088 _TESTS = [{
2089 'note': 'paginated channel',
2090 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2091 'playlist_mincount': 91,
acf757f4 2092 'info_dict': {
9170ca5b
JMF
2093 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2094 'title': 'Uploads from lex will',
acf757f4 2095 }
5c43afd4
JMF
2096 }, {
2097 'note': 'Age restricted channel',
2098 # from https://www.youtube.com/user/DeusExOfficial
2099 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2100 'playlist_mincount': 64,
2101 'info_dict': {
2102 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2103 'title': 'Uploads from Deus Ex',
2104 },
cdc628a4 2105 }]
c5e8d7af 2106
e462474e
S
2107 @classmethod
2108 def suitable(cls, url):
f07e276a
S
2109 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2110 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2111
9558dcec
S
2112 def _build_template_url(self, url, channel_id):
2113 return self._TEMPLATE_URL % channel_id
2114
c5e8d7af 2115 def _real_extract(self, url):
9ff67727 2116 channel_id = self._match_id(url)
c5e8d7af 2117
9558dcec 2118 url = self._build_template_url(url, channel_id)
386bdfa6
S
2119
2120 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2121 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2122 # otherwise fallback on channel by page extraction
2123 channel_page = self._download_webpage(
2124 url + '?view=57', channel_id,
2125 'Downloading channel page', fatal=False)
2b3c2546
PH
2126 if channel_page is False:
2127 channel_playlist_id = False
2128 else:
2129 channel_playlist_id = self._html_search_meta(
2130 'channelId', channel_page, 'channel id', default=None)
2131 if not channel_playlist_id:
73c4ac2c
S
2132 channel_url = self._html_search_meta(
2133 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2134 channel_page, 'channel url', default=None)
2135 if channel_url:
2136 channel_playlist_id = self._search_regex(
2137 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2138 channel_url, 'channel id', default=None)
386bdfa6
S
2139 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2140 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2141 return self.url_result(
2142 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2143
60bf45c8 2144 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2145 autogenerated = re.search(r'''(?x)
2146 class="[^"]*?(?:
2147 channel-header-autogenerated-label|
2148 yt-channel-title-autogenerated
2149 )[^"]*"''', channel_page) is not None
c5e8d7af 2150
b9643eed
JMF
2151 if autogenerated:
2152 # The videos are contained in a single page
2153 # the ajax pages can't be used, they are empty
b82f815f 2154 entries = [
fb69240c
S
2155 self.url_result(
2156 video_id, 'Youtube', video_id=video_id,
2157 video_title=video_title)
8f02ad4f 2158 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2159 return self.playlist_result(entries, channel_id)
2160
73c4ac2c
S
2161 try:
2162 next(self._entries(channel_page, channel_id))
2163 except StopIteration:
2164 alert_message = self._html_search_regex(
2165 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2166 channel_page, 'alert', default=None, group='alert')
2167 if alert_message:
2168 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2169
648e6a1f 2170 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2171
2172
eb0f3e7e 2173class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2174 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
9558dcec
S
2175 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2176 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2177 IE_NAME = 'youtube:user'
c5e8d7af 2178
cdc628a4
PH
2179 _TESTS = [{
2180 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2181 'playlist_mincount': 320,
2182 'info_dict': {
73c4ac2c
S
2183 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2184 'title': 'Uploads from The Linux Foundation',
cdc628a4 2185 }
9558dcec
S
2186 }, {
2187 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2188 # but not https://www.youtube.com/user/12minuteathlete/videos
2189 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2190 'playlist_mincount': 249,
2191 'info_dict': {
2192 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2193 'title': 'Uploads from 12 Minute Athlete',
2194 }
cdc628a4
PH
2195 }, {
2196 'url': 'ytuser:phihag',
2197 'only_matching': True,
daa0df9e
YCH
2198 }, {
2199 'url': 'https://www.youtube.com/c/gametrailers',
2200 'only_matching': True,
9558dcec
S
2201 }, {
2202 'url': 'https://www.youtube.com/gametrailers',
2203 'only_matching': True,
73c4ac2c
S
2204 }, {
2205 # This channel is not available.
2206 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2207 'only_matching': True,
cdc628a4
PH
2208 }]
2209
e3ea4790 2210 @classmethod
f4b05232 2211 def suitable(cls, url):
e3ea4790
JMF
2212 # Don't return True if the url can be extracted with other youtube
2213 # extractor, the regex would is too permissive and it would match.
f3a58d46 2214 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2215 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2216 return False
2217 else:
2218 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2219
9558dcec
S
2220 def _build_template_url(self, url, channel_id):
2221 mobj = re.match(self._VALID_URL, url)
2222 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2223
b05654f0 2224
f07e276a
S
2225class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2226 IE_DESC = 'YouTube.com live streams'
073d5bf5 2227 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2228 IE_NAME = 'youtube:live'
2229
2230 _TESTS = [{
2d3d2997 2231 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2232 'info_dict': {
2233 'id': 'a48o2S1cPoo',
2234 'ext': 'mp4',
2235 'title': 'The Young Turks - Live Main Show',
2236 'uploader': 'The Young Turks',
2237 'uploader_id': 'TheYoungTurks',
ec85ded8 2238 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2239 'upload_date': '20150715',
2240 'license': 'Standard YouTube License',
2241 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2242 'categories': ['News & Politics'],
2243 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2244 'like_count': int,
2245 'dislike_count': int,
2246 },
2247 'params': {
2248 'skip_download': True,
2249 },
2250 }, {
2d3d2997 2251 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2252 'only_matching': True,
c1b2a085
S
2253 }, {
2254 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2255 'only_matching': True,
073d5bf5
S
2256 }, {
2257 'url': 'https://www.youtube.com/TheYoungTurks/live',
2258 'only_matching': True,
f07e276a
S
2259 }]
2260
2261 def _real_extract(self, url):
2262 mobj = re.match(self._VALID_URL, url)
2263 channel_id = mobj.group('id')
2264 base_url = mobj.group('base_url')
2265 webpage = self._download_webpage(url, channel_id, fatal=False)
2266 if webpage:
2267 page_type = self._og_search_property(
2268 'type', webpage, 'page type', default=None)
2269 video_id = self._html_search_meta(
2270 'videoId', webpage, 'video id', default=None)
2271 if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2272 return self.url_result(video_id, YoutubeIE.ie_key())
2273 return self.url_result(base_url)
2274
2275
e462474e
S
2276class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2277 IE_DESC = 'YouTube.com user/channel playlists'
2278 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2279 IE_NAME = 'youtube:playlists'
0c148415 2280
e568c223 2281 _TESTS = [{
2d3d2997 2282 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2283 'playlist_mincount': 4,
2284 'info_dict': {
2285 'id': 'ThirstForScience',
2286 'title': 'Thirst for Science',
2287 },
e568c223
S
2288 }, {
2289 # with "Load more" button
2d3d2997 2290 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2291 'playlist_mincount': 70,
2292 'info_dict': {
2293 'id': 'igorkle1',
2294 'title': 'Игорь Клейнер',
2295 },
e462474e
S
2296 }, {
2297 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2298 'playlist_mincount': 17,
2299 'info_dict': {
2300 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2301 'title': 'Chem Player',
2302 },
e568c223 2303 }]
0c148415
S
2304
2305
b4c08069 2306class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2307 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2308 # there doesn't appear to be a real limit, for example if you search for
2309 # 'python' you get more than 8.000.000 results
2310 _MAX_RESULTS = float('inf')
78caa52a 2311 IE_NAME = 'youtube:search'
b05654f0 2312 _SEARCH_KEY = 'ytsearch'
b4c08069 2313 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2314 _TESTS = []
b05654f0 2315
b05654f0
PH
2316 def _get_n_results(self, query, n):
2317 """Get a specified number of results for a query"""
2318
b4c08069 2319 videos = []
b05654f0
PH
2320 limit = n
2321
b4c08069
JMF
2322 for pagenum in itertools.count(1):
2323 url_query = {
02175a79 2324 'search_query': query.encode('utf-8'),
b4c08069
JMF
2325 'page': pagenum,
2326 'spf': 'navigate',
2327 }
2328 url_query.update(self._EXTRA_QUERY_ARGS)
15707c7e 2329 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
b4c08069 2330 data = self._download_json(
69ea8ca4 2331 result_url, video_id='query "%s"' % query,
b4c08069 2332 note='Downloading page %s' % pagenum,
69ea8ca4 2333 errnote='Unable to download API page')
b4c08069 2334 html_content = data[1]['body']['content']
7cc3570e 2335
b4c08069 2336 if 'class="search-message' in html_content:
07ad22b8 2337 raise ExtractorError(
78caa52a 2338 '[youtube] No video results', expected=True)
b05654f0 2339
b4c08069
JMF
2340 new_videos = self._ids_to_results(orderedSet(re.findall(
2341 r'href="/watch\?v=(.{11})', html_content)))
2342 videos += new_videos
2343 if not new_videos or len(videos) > limit:
2344 break
b05654f0 2345
b4c08069
JMF
2346 if len(videos) > n:
2347 videos = videos[:n]
b05654f0 2348 return self.playlist_result(videos, query)
75dff0ee 2349
c9ae7b95 2350
a3dd9248 2351class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2352 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2353 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2354 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2355 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2356
c9ae7b95 2357
175c2e9e 2358class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
78caa52a
PH
2359 IE_DESC = 'YouTube.com search URLs'
2360 IE_NAME = 'youtube:search_url'
d2c1f79f 2361 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
175c2e9e 2362 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
cdc628a4
PH
2363 _TESTS = [{
2364 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2365 'playlist_mincount': 5,
2366 'info_dict': {
2367 'title': 'youtube-dl test video',
2368 }
d2c1f79f
S
2369 }, {
2370 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2371 'only_matching': True,
cdc628a4 2372 }]
c9ae7b95
PH
2373
2374 def _real_extract(self, url):
2375 mobj = re.match(self._VALID_URL, url)
7fd002c0 2376 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2377 webpage = self._download_webpage(url, query)
175c2e9e 2378 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2379
2380
136dadde 2381class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2382 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2383 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2384 IE_NAME = 'youtube:show'
cdc628a4 2385 _TESTS = [{
4003bd82 2386 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2387 'playlist_mincount': 5,
cdc628a4
PH
2388 'info_dict': {
2389 'id': 'airdisasters',
2390 'title': 'Air Disasters',
2391 }
2392 }]
75dff0ee
JMF
2393
2394 def _real_extract(self, url):
136dadde
S
2395 playlist_id = self._match_id(url)
2396 return super(YoutubeShowIE, self)._real_extract(
2397 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2398
2399
b2e8bc1b 2400class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2401 """
25f14e9f 2402 Base class for feed extractors
d7ae0639
JMF
2403 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2404 """
b2e8bc1b 2405 _LOGIN_REQUIRED = True
d7ae0639
JMF
2406
2407 @property
2408 def IE_NAME(self):
78caa52a 2409 return 'youtube:%s' % self._FEED_NAME
04cc9617 2410
81f0259b 2411 def _real_initialize(self):
b2e8bc1b 2412 self._login()
81f0259b 2413
04cc9617 2414 def _real_extract(self, url):
25f14e9f
S
2415 page = self._download_webpage(
2416 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2417
2418 # The extraction process is the same as for playlists, but the regex
2419 # for the video ids doesn't contain an index
2420 ids = []
2421 more_widget_html = content_html = page
2bc43303
JMF
2422 for page_num in itertools.count(1):
2423 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2424
2425 # 'recommended' feed has infinite 'load more' and each new portion spins
2426 # the same videos in (sometimes) slightly different order, so we'll check
2427 # for unicity and break when portion has no new videos
2428 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2429 if not new_ids:
2430 break
2431
2bc43303
JMF
2432 ids.extend(new_ids)
2433
2434 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2435 if not mobj:
2436 break
2437
2438 more = self._download_json(
25f14e9f 2439 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2440 'Downloading page #%s' % page_num,
2441 transform_source=uppercase_escape)
2442 content_html = more['content_html']
2443 more_widget_html = more['load_more_widget_html']
2444
25f14e9f
S
2445 return self.playlist_result(
2446 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2447
2448
2449class YoutubeWatchLaterIE(YoutubePlaylistIE):
2450 IE_NAME = 'youtube:watchlater'
2451 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2452 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2453
bc7a9cd8
S
2454 _TESTS = [{
2455 'url': 'https://www.youtube.com/playlist?list=WL',
2456 'only_matching': True,
2457 }, {
2458 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2459 'only_matching': True,
2460 }]
25f14e9f
S
2461
2462 def _real_extract(self, url):
7e5dc339 2463 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2464 if video:
2465 return video
dacb3a86
S
2466 _, playlist = self._extract_playlist('WL')
2467 return playlist
f459d170 2468
5f6a1245 2469
c626a3d9 2470class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2471 IE_NAME = 'youtube:favorites'
f3a34072 2472 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2473 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2474 _LOGIN_REQUIRED = True
2475
2476 def _real_extract(self, url):
2477 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2478 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2479 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2480
2481
25f14e9f
S
2482class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2483 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2484 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2485 _FEED_NAME = 'recommended'
2486 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2487
1ed5b5c9 2488
25f14e9f
S
2489class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2490 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2491 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2492 _FEED_NAME = 'subscriptions'
2493 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2494
1ed5b5c9 2495
25f14e9f
S
2496class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2497 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2498 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2499 _FEED_NAME = 'history'
2500 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2501
2502
15870e90
PH
2503class YoutubeTruncatedURLIE(InfoExtractor):
2504 IE_NAME = 'youtube:truncated_url'
2505 IE_DESC = False # Do not list
975d35db 2506 _VALID_URL = r'''(?x)
b95aab84
PH
2507 (?:https?://)?
2508 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2509 (?:watch\?(?:
c4808c60 2510 feature=[a-z_]+|
b95aab84
PH
2511 annotation_id=annotation_[^&]+|
2512 x-yt-cl=[0-9]+|
c1708b89 2513 hl=[^&]*|
287be8c6 2514 t=[0-9]+
b95aab84
PH
2515 )?
2516 |
2517 attribution_link\?a=[^&]+
2518 )
2519 $
975d35db 2520 '''
15870e90 2521
c4808c60 2522 _TESTS = [{
2d3d2997 2523 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2524 'only_matching': True,
dc2fc736 2525 }, {
2d3d2997 2526 'url': 'https://www.youtube.com/watch?',
dc2fc736 2527 'only_matching': True,
b95aab84
PH
2528 }, {
2529 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2530 'only_matching': True,
2531 }, {
2532 'url': 'https://www.youtube.com/watch?feature=foo',
2533 'only_matching': True,
c1708b89
PH
2534 }, {
2535 'url': 'https://www.youtube.com/watch?hl=en-GB',
2536 'only_matching': True,
287be8c6
PH
2537 }, {
2538 'url': 'https://www.youtube.com/watch?t=2372',
2539 'only_matching': True,
c4808c60
PH
2540 }]
2541
15870e90
PH
2542 def _real_extract(self, url):
2543 raise ExtractorError(
78caa52a
PH
2544 'Did you forget to quote the URL? Remember that & is a meta '
2545 'character in most shells, so you want to put the URL in quotes, '
2546 'like youtube-dl '
2d3d2997 2547 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2548 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2549 expected=True)
772fd5cc
PH
2550
2551
2552class YoutubeTruncatedIDIE(InfoExtractor):
2553 IE_NAME = 'youtube:truncated_id'
2554 IE_DESC = False # Do not list
b95aab84 2555 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2556
2557 _TESTS = [{
2558 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2559 'only_matching': True,
2560 }]
2561
2562 def _real_extract(self, url):
2563 video_id = self._match_id(url)
2564 raise ExtractorError(
2565 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2566 expected=True)