]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
ChangeLog: update after #12085
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
c5e8d7af 19 compat_parse_qs,
7fd002c0
S
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
c5e8d7af 25 compat_str,
4bb4a188
PH
26)
27from ..utils import (
c5e8d7af 28 clean_html,
9b9c5355 29 error_to_compat_str,
c5e8d7af 30 ExtractorError,
2d30521a 31 float_or_none,
4bb4a188
PH
32 get_element_by_attribute,
33 get_element_by_id,
dd27fd17 34 int_or_none,
94278f72 35 mimetype2ext,
4bb4a188 36 orderedSet,
7c80519c 37 parse_duration,
0cb58b02 38 remove_quotes,
041bc3ad 39 remove_start,
5c2266df 40 sanitized_Request,
cf7e015f 41 smuggle_url,
c93d53f5 42 str_to_int,
556dbe7f 43 try_get,
c5e8d7af
PH
44 unescapeHTML,
45 unified_strdate,
cf7e015f 46 unsmuggle_url,
81c2f20b 47 uppercase_escape,
6e6bc8da 48 urlencode_postdata,
af214c3a 49 ISO3166Utils,
c5e8d7af
PH
50)
51
5f6a1245 52
de7f3446 53class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
54 """Provide base functions for Youtube extractors"""
55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e298d3a0 57 _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
b2e8bc1b
JMF
58 _NETRC_MACHINE = 'youtube'
59 # If True it will raise an error if no login info is provided
60 _LOGIN_REQUIRED = False
61
b2e8bc1b 62 def _set_language(self):
810fb84d
PH
63 self._set_cookie(
64 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 65 # YouTube sets the expire time to about two months
810fb84d 66 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 67
25f14e9f
S
68 def _ids_to_results(self, ids):
69 return [
70 self.url_result(vid_id, 'Youtube', video_id=vid_id)
71 for vid_id in ids]
72
b2e8bc1b 73 def _login(self):
83317f69 74 """
75 Attempt to log in to YouTube.
76 True is returned if successful or skipped.
77 False is returned if login failed.
78
79 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
80 """
b2e8bc1b
JMF
81 (username, password) = self._get_login_info()
82 # No authentication to be performed
83 if username is None:
84 if self._LOGIN_REQUIRED:
69ea8ca4 85 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 86 return True
b2e8bc1b 87
7cc3570e
PH
88 login_page = self._download_webpage(
89 self._LOGIN_URL, None,
69ea8ca4
PH
90 note='Downloading login page',
91 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
92 if login_page is False:
93 return
b2e8bc1b 94
1212e997 95 login_form = self._hidden_inputs(login_page)
c5e8d7af 96
1212e997 97 login_form.update({
05bddcc5 98 'checkConnection': 'youtube',
8bcc8756 99 'Email': username,
8bcc8756 100 'Passwd': password,
1212e997 101 })
83317f69 102
7cc3570e 103 login_results = self._download_webpage(
e298d3a0
S
104 self._PASSWORD_CHALLENGE_URL, None,
105 note='Logging in', errnote='unable to log in', fatal=False,
1212e997 106 data=urlencode_postdata(login_form))
7cc3570e
PH
107 if login_results is False:
108 return False
83317f69 109
494ab6db
S
110 error_msg = self._html_search_regex(
111 r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
112 login_results, 'error message', default=None)
113 if error_msg:
114 raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
115
83317f69 116 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 117 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 118
119 # Two-Factor
120 # TODO add SMS and phone call support - these require making a request and then prompting the user
121
e9fb6a4b 122 if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
041bc3ad 123 tfa_code = self._get_tfa_info('2-step verification code')
83317f69 124
041bc3ad
S
125 if not tfa_code:
126 self._downloader.report_warning(
127 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
128 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 129 return False
130
041bc3ad
S
131 tfa_code = remove_start(tfa_code, 'G-')
132
133 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
134
135 tfa_form_strs.update({
9303ce3e 136 'Pin': tfa_code,
137 'TrustDevice': 'on',
041bc3ad
S
138 })
139
6e6bc8da 140 tfa_data = urlencode_postdata(tfa_form_strs)
83317f69 141
5c2266df 142 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
83317f69 143 tfa_results = self._download_webpage(
144 tfa_req, None,
69ea8ca4 145 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 146
147 if tfa_results is False:
148 return False
149
e9fb6a4b 150 if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
041bc3ad 151 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
83317f69 152 return False
e9fb6a4b 153 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 154 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 155 return False
156 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 157 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 158 return False
159
e9fb6a4b 160 if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
69ea8ca4 161 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
162 return False
163 return True
164
b2e8bc1b
JMF
165 def _real_initialize(self):
166 if self._downloader is None:
167 return
42939b61 168 self._set_language()
b2e8bc1b
JMF
169 if not self._login():
170 return
c5e8d7af 171
8377574c 172
8e7aad20 173class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 174 # Extract entries from page with "Load more" button
648e6a1f
S
175 def _entries(self, page, playlist_id):
176 more_widget_html = content_html = page
177 for page_num in itertools.count(1):
061a75ed
S
178 for entry in self._process_page(content_html):
179 yield entry
648e6a1f
S
180
181 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
182 if not mobj:
183 break
184
185 more = self._download_json(
186 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
187 'Downloading page #%s' % page_num,
188 transform_source=uppercase_escape)
189 content_html = more['content_html']
190 if not content_html.strip():
191 # Some webpages show a "Load more" button but they don't
192 # have more videos
193 break
194 more_widget_html = more['load_more_widget_html']
195
061a75ed
S
196
197class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
198 def _process_page(self, content):
199 for video_id, video_title in self.extract_videos_from_page(content):
200 yield self.url_result(video_id, 'Youtube', video_id, video_title)
201
648e6a1f
S
202 def extract_videos_from_page(self, page):
203 ids_in_page = []
204 titles_in_page = []
205 for mobj in re.finditer(self._VIDEO_RE, page):
206 # The link with index 0 is not the first video of the playlist (not sure if still actual)
207 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
208 continue
209 video_id = mobj.group('id')
210 video_title = unescapeHTML(mobj.group('title'))
211 if video_title:
212 video_title = video_title.strip()
213 try:
214 idx = ids_in_page.index(video_id)
215 if video_title and not titles_in_page[idx]:
216 titles_in_page[idx] = video_title
217 except ValueError:
218 ids_in_page.append(video_id)
219 titles_in_page.append(video_title)
220 return zip(ids_in_page, titles_in_page)
221
222
061a75ed
S
223class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
224 def _process_page(self, content):
6dee688e
S
225 for playlist_id in orderedSet(re.findall(
226 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
227 content)):
061a75ed
S
228 yield self.url_result(
229 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
230
0c148415
S
231 def _real_extract(self, url):
232 playlist_id = self._match_id(url)
233 webpage = self._download_webpage(url, playlist_id)
0c148415 234 title = self._og_search_title(webpage, fatal=False)
061a75ed 235 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
236
237
360e1ca5 238class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 239 IE_DESC = 'YouTube.com'
cb7dfeea 240 _VALID_URL = r"""(?x)^
c5e8d7af 241 (
edb53e2d 242 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 243 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 244 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 245 (?:www\.)?pwnyoutube\.com/|
f7000f3a 246 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
247 tube\.majestyc\.net/|
248 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
249 (?:.*?\#/)? # handle anchor (#/) redirect urls
250 (?: # the various things that can precede the ID:
ac7553d0 251 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 252 |(?: # or the v= param in all its forms
f7000f3a 253 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 254 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 255 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
256 v=
257 )
f4b05232 258 ))
cbaed4bb
S
259 |(?:
260 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
261 vid\.plus| # or vid.plus/xxxx
262 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 263 )/
edb53e2d 264 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 265 )
c5e8d7af 266 )? # all until now is optional -> you can pass the naked ID
8963d9c2 267 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
feaa5ad7 268 (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
269 (?(1).+)? # if we found the ID, everything can follow
270 $"""
c5e8d7af 271 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 272 _formats = {
c2d3cb4c 273 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
274 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
275 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
276 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
277 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
278 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
279 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
280 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 281 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 282 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
283 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
284 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
285 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
286 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
287 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 288 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 289 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
290 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 291
292
293 # 3D videos
c2d3cb4c 294 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
295 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
296 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
297 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 298 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
299 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
300 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 301
96fb5605 302 # Apple HTTP Live Streaming
11f12195 303 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 304 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
305 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
306 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
307 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
308 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 309 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
310 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
311
312 # DASH mp4 video
c2d3cb4c 313 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
314 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
315 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
316 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
317 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
318 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
319 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
8409b368 320 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
a6c2c244
YCH
321 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
322 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
323 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
324 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
836a086c 325
f6f1fc92 326 # Dash mp4 audio
c2d3cb4c 327 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
328 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
329 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
2c347352
S
330 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
331 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
605fd639
RA
332 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
333 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
334
335 # Dash webm
a6c2c244
YCH
336 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
337 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
338 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
339 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
340 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
341 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
342 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
343 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
344 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
345 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
346 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
347 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
348 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
349 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
350 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
4c6b4764 351 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
a6c2c244
YCH
352 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
353 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
354 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
355 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
356 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
357 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
2c62dc26
PH
358
359 # Dash webm audio
a6c2c244
YCH
360 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
361 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 362
0857baad 363 # Dash webm audio with opus inside
a6c2c244
YCH
364 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
365 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
366 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
0857baad 367
ce6b9a2d
PH
368 # RTMP (unnamed)
369 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 370 }
23d17e4b 371 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 372
78caa52a 373 IE_NAME = 'youtube'
2eb88d95
PH
374 _TESTS = [
375 {
2d3d2997 376 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
377 'info_dict': {
378 'id': 'BaW_jenozKc',
379 'ext': 'mp4',
380 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
381 'uploader': 'Philipp Hagemeister',
382 'uploader_id': 'phihag',
ec85ded8 383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 384 'upload_date': '20121002',
7caf9830 385 'license': 'Standard YouTube License',
4bc3a23e
PH
386 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
387 'categories': ['Science & Technology'],
000b6b5a 388 'tags': ['youtube-dl'],
556dbe7f 389 'duration': 10,
3e7c1224
PH
390 'like_count': int,
391 'dislike_count': int,
7c80519c 392 'start_time': 1,
297a564b 393 'end_time': 9,
2eb88d95 394 }
0e853ca4 395 },
0e853ca4 396 {
2d3d2997 397 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
398 'note': 'Test generic use_cipher_signature video (#897)',
399 'info_dict': {
400 'id': 'UxxajLWwzqY',
401 'ext': 'mp4',
402 'upload_date': '20120506',
403 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 404 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 405 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
406 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
407 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
408 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 409 'duration': 180,
4bc3a23e
PH
410 'uploader': 'Icona Pop',
411 'uploader_id': 'IconaPop',
ec85ded8 412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 413 'license': 'Standard YouTube License',
0cb58b02 414 'creator': 'Icona Pop',
2eb88d95 415 }
c108eb73
JMF
416 },
417 {
4bc3a23e
PH
418 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
419 'note': 'Test VEVO video with age protection (#956)',
420 'info_dict': {
421 'id': '07FYdnEawAQ',
422 'ext': 'mp4',
423 'upload_date': '20130703',
424 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 425 'alt_title': 'Tunnel Vision',
4bc3a23e 426 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 427 'duration': 419,
4bc3a23e
PH
428 'uploader': 'justintimberlakeVEVO',
429 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 431 'license': 'Standard YouTube License',
0cb58b02 432 'creator': 'Justin Timberlake',
34952f09 433 'age_limit': 18,
c108eb73
JMF
434 }
435 },
fccd3771 436 {
4bc3a23e
PH
437 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
438 'note': 'Embed-only video (#1746)',
439 'info_dict': {
440 'id': 'yZIXLfi8CZQ',
441 'ext': 'mp4',
442 'upload_date': '20120608',
443 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
444 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
445 'uploader': 'SET India',
94bfcd23 446 'uploader_id': 'setindia',
ec85ded8 447 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 448 'license': 'Standard YouTube License',
94bfcd23 449 'age_limit': 18,
fccd3771
PH
450 }
451 },
11b56058 452 {
2d3d2997 453 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
454 'note': 'Use the first video ID in the URL',
455 'info_dict': {
456 'id': 'BaW_jenozKc',
457 'ext': 'mp4',
458 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
459 'uploader': 'Philipp Hagemeister',
460 'uploader_id': 'phihag',
ec85ded8 461 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 462 'upload_date': '20121002',
7caf9830 463 'license': 'Standard YouTube License',
11b56058
PM
464 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
465 'categories': ['Science & Technology'],
466 'tags': ['youtube-dl'],
556dbe7f 467 'duration': 10,
11b56058
PM
468 'like_count': int,
469 'dislike_count': int,
34a7de29
S
470 },
471 'params': {
472 'skip_download': True,
473 },
11b56058 474 },
dd27fd17 475 {
2d3d2997 476 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
477 'note': '256k DASH audio (format 141) via DASH manifest',
478 'info_dict': {
479 'id': 'a9LDPn-MO4I',
480 'ext': 'm4a',
481 'upload_date': '20121002',
482 'uploader_id': '8KVIDEO',
ec85ded8 483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
484 'description': '',
485 'uploader': '8KVIDEO',
7caf9830 486 'license': 'Standard YouTube License',
4bc3a23e 487 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 488 },
4bc3a23e
PH
489 'params': {
490 'youtube_include_dash_manifest': True,
491 'format': '141',
4919603f 492 },
de3c7fe0 493 'skip': 'format 141 not served anymore',
dd27fd17 494 },
3489b7d2
JMF
495 # DASH manifest with encrypted signature
496 {
78caa52a
PH
497 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
498 'info_dict': {
499 'id': 'IB3lcPjvWLA',
500 'ext': 'm4a',
b766eb27
JMF
501 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
502 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
556dbe7f 503 'duration': 244,
78caa52a
PH
504 'uploader': 'AfrojackVEVO',
505 'uploader_id': 'AfrojackVEVO',
506 'upload_date': '20131011',
7caf9830 507 'license': 'Standard YouTube License',
3489b7d2 508 },
4bc3a23e 509 'params': {
78caa52a 510 'youtube_include_dash_manifest': True,
de3c7fe0 511 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
512 },
513 },
aaeb86f6
S
514 # JS player signature function name containing $
515 {
516 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
517 'info_dict': {
518 'id': 'nfWlot6h_JM',
519 'ext': 'm4a',
520 'title': 'Taylor Swift - Shake It Off',
0cb58b02 521 'alt_title': 'Shake It Off',
f57b7835 522 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 523 'duration': 242,
aaeb86f6
S
524 'uploader': 'TaylorSwiftVEVO',
525 'uploader_id': 'TaylorSwiftVEVO',
526 'upload_date': '20140818',
7caf9830 527 'license': 'Standard YouTube License',
0cb58b02 528 'creator': 'Taylor Swift',
aaeb86f6
S
529 },
530 'params': {
531 'youtube_include_dash_manifest': True,
de3c7fe0 532 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
533 },
534 },
aa79ac0c
PH
535 # Controversy video
536 {
537 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
538 'info_dict': {
539 'id': 'T4XJQO3qol8',
540 'ext': 'mp4',
556dbe7f 541 'duration': 219,
aa79ac0c
PH
542 'upload_date': '20100909',
543 'uploader': 'The Amazing Atheist',
544 'uploader_id': 'TheAmazingAtheist',
ec85ded8 545 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 546 'license': 'Standard YouTube License',
aa79ac0c
PH
547 'title': 'Burning Everyone\'s Koran',
548 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
549 }
c522adb1
JMF
550 },
551 # Normal age-gate video (No vevo, embed allowed)
552 {
2d3d2997 553 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
554 'info_dict': {
555 'id': 'HtVdAasjOgU',
556 'ext': 'mp4',
557 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 558 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 559 'duration': 142,
c522adb1
JMF
560 'uploader': 'The Witcher',
561 'uploader_id': 'WitcherGame',
ec85ded8 562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 563 'upload_date': '20140605',
7caf9830 564 'license': 'Standard YouTube License',
34952f09 565 'age_limit': 18,
c522adb1
JMF
566 },
567 },
fccae2b9
S
568 # Age-gate video with encrypted signature
569 {
2d3d2997 570 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
571 'info_dict': {
572 'id': '6kLq3WMV1nU',
573 'ext': 'mp4',
574 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
575 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
556dbe7f 576 'duration': 247,
fccae2b9
S
577 'uploader': 'LloydVEVO',
578 'uploader_id': 'LloydVEVO',
ec85ded8 579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 580 'upload_date': '20110629',
7caf9830 581 'license': 'Standard YouTube License',
34952f09 582 'age_limit': 18,
fccae2b9
S
583 },
584 },
774e208f
PH
585 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
586 {
587 'url': '__2ABJjxzNo',
588 'info_dict': {
589 'id': '__2ABJjxzNo',
590 'ext': 'mp4',
556dbe7f 591 'duration': 266,
774e208f
PH
592 'upload_date': '20100430',
593 'uploader_id': 'deadmau5',
ec85ded8 594 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 595 'creator': 'deadmau5',
774e208f
PH
596 'description': 'md5:12c56784b8032162bb936a5f76d55360',
597 'uploader': 'deadmau5',
7caf9830 598 'license': 'Standard YouTube License',
774e208f 599 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 600 'alt_title': 'Some Chords',
774e208f
PH
601 },
602 'expected_warnings': [
603 'DASH manifest missing',
604 ]
e52a40ab
PH
605 },
606 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
607 {
608 'url': 'lqQg6PlCWgI',
609 'info_dict': {
610 'id': 'lqQg6PlCWgI',
611 'ext': 'mp4',
556dbe7f 612 'duration': 6085,
90227264 613 'upload_date': '20150827',
cbe2bd91 614 'uploader_id': 'olympic',
ec85ded8 615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 616 'license': 'Standard YouTube License',
cbe2bd91 617 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 618 'uploader': 'Olympic',
cbe2bd91
PH
619 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
620 },
621 'params': {
622 'skip_download': 'requires avconv',
e52a40ab 623 }
cbe2bd91 624 },
6271f1ca
PH
625 # Non-square pixels
626 {
627 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
628 'info_dict': {
629 'id': '_b-2C3KPAM0',
630 'ext': 'mp4',
631 'stretched_ratio': 16 / 9.,
556dbe7f 632 'duration': 85,
6271f1ca
PH
633 'upload_date': '20110310',
634 'uploader_id': 'AllenMeow',
ec85ded8 635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
636 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
637 'uploader': '孫艾倫',
7caf9830 638 'license': 'Standard YouTube License',
6271f1ca
PH
639 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
640 },
06b491eb
S
641 },
642 # url_encoded_fmt_stream_map is empty string
643 {
644 'url': 'qEJwOuvDf7I',
645 'info_dict': {
646 'id': 'qEJwOuvDf7I',
f57b7835 647 'ext': 'webm',
06b491eb
S
648 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
649 'description': '',
650 'upload_date': '20150404',
651 'uploader_id': 'spbelect',
652 'uploader': 'Наблюдатели Петербурга',
653 },
654 'params': {
655 'skip_download': 'requires avconv',
e323cf3f
S
656 },
657 'skip': 'This live event has ended.',
06b491eb 658 },
da77d856
S
659 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
660 {
661 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
662 'info_dict': {
663 'id': 'FIl7x6_3R5Y',
664 'ext': 'mp4',
665 'title': 'md5:7b81415841e02ecd4313668cde88737a',
666 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 667 'duration': 220,
da77d856
S
668 'upload_date': '20150625',
669 'uploader_id': 'dorappi2000',
ec85ded8 670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 671 'uploader': 'dorappi2000',
7caf9830 672 'license': 'Standard YouTube License',
be49068d 673 'formats': 'mincount:32',
da77d856 674 },
2ee8f5d8 675 },
8a1a26ce
YCH
676 # DASH manifest with segment_list
677 {
678 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
679 'md5': '8ce563a1d667b599d21064e982ab9e31',
680 'info_dict': {
681 'id': 'CsmdDsKjzN8',
682 'ext': 'mp4',
17ee98e1 683 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
684 'uploader': 'Airtek',
685 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
686 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 687 'license': 'Standard YouTube License',
8a1a26ce
YCH
688 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
689 },
690 'params': {
691 'youtube_include_dash_manifest': True,
692 'format': '135', # bestvideo
be49068d
S
693 },
694 'skip': 'This live event has ended.',
2ee8f5d8 695 },
cf7e015f
S
696 {
697 # Multifeed videos (multiple cameras), URL is for Main Camera
698 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
699 'info_dict': {
700 'id': 'jqWvoWXjCVs',
701 'title': 'teamPGP: Rocket League Noob Stream',
702 'description': 'md5:dc7872fb300e143831327f1bae3af010',
703 },
704 'playlist': [{
705 'info_dict': {
706 'id': 'jqWvoWXjCVs',
707 'ext': 'mp4',
708 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
709 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 710 'duration': 7335,
cf7e015f
S
711 'upload_date': '20150721',
712 'uploader': 'Beer Games Beer',
713 'uploader_id': 'beergamesbeer',
ec85ded8 714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 715 'license': 'Standard YouTube License',
cf7e015f
S
716 },
717 }, {
718 'info_dict': {
719 'id': '6h8e8xoXJzg',
720 'ext': 'mp4',
721 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
722 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 723 'duration': 7337,
cf7e015f
S
724 'upload_date': '20150721',
725 'uploader': 'Beer Games Beer',
726 'uploader_id': 'beergamesbeer',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 728 'license': 'Standard YouTube License',
cf7e015f
S
729 },
730 }, {
731 'info_dict': {
732 'id': 'PUOgX5z9xZw',
733 'ext': 'mp4',
734 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
735 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 736 'duration': 7337,
cf7e015f
S
737 'upload_date': '20150721',
738 'uploader': 'Beer Games Beer',
739 'uploader_id': 'beergamesbeer',
ec85ded8 740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 741 'license': 'Standard YouTube License',
cf7e015f
S
742 },
743 }, {
744 'info_dict': {
745 'id': 'teuwxikvS5k',
746 'ext': 'mp4',
747 'title': 'teamPGP: Rocket League Noob Stream (zim)',
748 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 749 'duration': 7334,
cf7e015f
S
750 'upload_date': '20150721',
751 'uploader': 'Beer Games Beer',
752 'uploader_id': 'beergamesbeer',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 754 'license': 'Standard YouTube License',
cf7e015f
S
755 },
756 }],
757 'params': {
758 'skip_download': True,
759 },
cbaed4bb 760 },
f9f49d87
S
761 {
762 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
763 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
764 'info_dict': {
765 'id': 'gVfLd0zydlo',
766 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
767 },
768 'playlist_count': 2,
be49068d 769 'skip': 'Not multifeed anymore',
f9f49d87 770 },
cbaed4bb 771 {
2d3d2997 772 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 773 'only_matching': True,
0e49d9a6 774 },
6d4fc66b 775 {
2d3d2997 776 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
777 'only_matching': True,
778 },
0e49d9a6 779 {
61f92af1 780 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
781 # Also tests cut-off URL expansion in video description (see
782 # https://github.com/rg3/youtube-dl/issues/1892,
783 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
784 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
785 'info_dict': {
786 'id': 'lsguqyKfVQg',
787 'ext': 'mp4',
788 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 789 'alt_title': 'Dark Walk',
0e49d9a6 790 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 791 'duration': 133,
0e49d9a6
LL
792 'upload_date': '20151119',
793 'uploader_id': 'IronSoulElf',
ec85ded8 794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 795 'uploader': 'IronSoulElf',
7caf9830 796 'license': 'Standard YouTube License',
0cb58b02 797 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
798 },
799 'params': {
800 'skip_download': True,
801 },
802 },
61f92af1
S
803 {
804 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
805 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
806 'only_matching': True,
807 },
313dfc45
LL
808 {
809 # Video with yt:stretch=17:0
810 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
811 'info_dict': {
812 'id': 'Q39EVAstoRM',
813 'ext': 'mp4',
814 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
815 'description': 'md5:ee18a25c350637c8faff806845bddee9',
816 'upload_date': '20151107',
817 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
818 'uploader': 'CH GAMER DROID',
819 },
820 'params': {
821 'skip_download': True,
822 },
be49068d 823 'skip': 'This video does not exist.',
313dfc45 824 },
7caf9830
S
825 {
826 # Video licensed under Creative Commons
827 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
828 'info_dict': {
829 'id': 'M4gD1WSo5mA',
830 'ext': 'mp4',
831 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
832 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 833 'duration': 721,
7caf9830
S
834 'upload_date': '20150127',
835 'uploader_id': 'BerkmanCenter',
ec85ded8 836 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 837 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
838 'license': 'Creative Commons Attribution license (reuse allowed)',
839 },
840 'params': {
841 'skip_download': True,
842 },
843 },
fd050249
S
844 {
845 # Channel-like uploader_url
846 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
847 'info_dict': {
848 'id': 'eQcmzGIKrzg',
849 'ext': 'mp4',
850 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
851 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 852 'duration': 4060,
fd050249
S
853 'upload_date': '20151119',
854 'uploader': 'Bernie 2016',
855 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 856 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
857 'license': 'Creative Commons Attribution license (reuse allowed)',
858 },
859 'params': {
860 'skip_download': True,
861 },
862 },
040ac686
S
863 {
864 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
865 'only_matching': True,
7f29cf54
S
866 },
867 {
868 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
869 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
870 'only_matching': True,
6496ccb4
S
871 },
872 {
873 # Rental video preview
874 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
875 'info_dict': {
876 'id': 'uGpuVWrhIzE',
877 'ext': 'mp4',
878 'title': 'Piku - Trailer',
879 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
880 'upload_date': '20150811',
881 'uploader': 'FlixMatrix',
882 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 883 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
884 'license': 'Standard YouTube License',
885 },
886 'params': {
887 'skip_download': True,
888 },
022a5d66 889 },
12afdc2a
S
890 {
891 # YouTube Red video with episode data
892 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
893 'info_dict': {
894 'id': 'iqKdEhx-dD4',
895 'ext': 'mp4',
896 'title': 'Isolation - Mind Field (Ep 1)',
556dbe7f
S
897 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
898 'duration': 2085,
12afdc2a
S
899 'upload_date': '20170118',
900 'uploader': 'Vsauce',
901 'uploader_id': 'Vsauce',
902 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
903 'license': 'Standard YouTube License',
904 'series': 'Mind Field',
905 'season_number': 1,
906 'episode_number': 1,
907 },
908 'params': {
909 'skip_download': True,
910 },
911 'expected_warnings': [
912 'Skipping DASH manifest',
913 ],
914 },
022a5d66
S
915 {
916 # itag 212
917 'url': '1t24XAntNCY',
918 'only_matching': True,
040ac686 919 }
2eb88d95
PH
920 ]
921
e0df6211
PH
922 def __init__(self, *args, **kwargs):
923 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 924 self._player_cache = {}
e0df6211 925
c5e8d7af
PH
926 def report_video_info_webpage_download(self, video_id):
927 """Report attempt to download video info webpage."""
69ea8ca4 928 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 929
c5e8d7af
PH
930 def report_information_extraction(self, video_id):
931 """Report attempt to extract video information."""
69ea8ca4 932 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
933
934 def report_unavailable_format(self, video_id, format):
935 """Report extracted video URL."""
69ea8ca4 936 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
937
938 def report_rtmp_download(self):
939 """Indicate the download will use the RTMP protocol."""
69ea8ca4 940 self.to_screen('RTMP download detected')
c5e8d7af 941
60064c53
PH
942 def _signature_cache_id(self, example_sig):
943 """ Return a string representation of a signature """
78caa52a 944 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
945
946 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 947 id_m = re.match(
50f84a9a 948 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
cf010131 949 player_url)
c081b35c
PH
950 if not id_m:
951 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
952 player_type = id_m.group('ext')
953 player_id = id_m.group('id')
954
c4417ddb 955 # Read from filesystem cache
60064c53
PH
956 func_id = '%s_%s_%s' % (
957 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 958 assert os.path.basename(func_id) == func_id
a0e07d31 959
69ea8ca4 960 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 961 if cache_spec is not None:
78caa52a 962 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 963
6d1a55a5
PH
964 download_note = (
965 'Downloading player %s' % player_url
966 if self._downloader.params.get('verbose') else
967 'Downloading %s player %s' % (player_type, player_id)
968 )
e0df6211
PH
969 if player_type == 'js':
970 code = self._download_webpage(
971 player_url, video_id,
6d1a55a5 972 note=download_note,
69ea8ca4 973 errnote='Download of %s failed' % player_url)
83799698 974 res = self._parse_sig_js(code)
c4417ddb 975 elif player_type == 'swf':
e0df6211
PH
976 urlh = self._request_webpage(
977 player_url, video_id,
6d1a55a5 978 note=download_note,
69ea8ca4 979 errnote='Download of %s failed' % player_url)
e0df6211 980 code = urlh.read()
83799698 981 res = self._parse_sig_swf(code)
e0df6211
PH
982 else:
983 assert False, 'Invalid player type %r' % player_type
984
785521bf
PH
985 test_string = ''.join(map(compat_chr, range(len(example_sig))))
986 cache_res = res(test_string)
987 cache_spec = [ord(c) for c in cache_res]
83799698 988
69ea8ca4 989 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
990 return res
991
60064c53 992 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
993 def gen_sig_code(idxs):
994 def _genslice(start, end, step):
78caa52a 995 starts = '' if start == 0 else str(start)
8bcc8756 996 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 997 steps = '' if step == 1 else (':%d' % step)
78caa52a 998 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
999
1000 step = None
7af808a5
PH
1001 # Quelch pyflakes warnings - start will be set when step is set
1002 start = '(Never used)'
edf3e38e
PH
1003 for i, prev in zip(idxs[1:], idxs[:-1]):
1004 if step is not None:
1005 if i - prev == step:
1006 continue
1007 yield _genslice(start, prev, step)
1008 step = None
1009 continue
1010 if i - prev in [-1, 1]:
1011 step = i - prev
1012 start = prev
1013 continue
1014 else:
78caa52a 1015 yield 's[%d]' % prev
edf3e38e 1016 if step is None:
78caa52a 1017 yield 's[%d]' % i
edf3e38e
PH
1018 else:
1019 yield _genslice(start, i, step)
1020
78caa52a 1021 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1022 cache_res = func(test_string)
edf3e38e 1023 cache_spec = [ord(c) for c in cache_res]
78caa52a 1024 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1025 signature_id_tuple = '(%s)' % (
1026 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1027 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1028 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1029 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1030
e0df6211
PH
1031 def _parse_sig_js(self, jscode):
1032 funcname = self._search_regex(
3c90cc8b
S
1033 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1034 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1035 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1036
1037 jsi = JSInterpreter(jscode)
1038 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1039 return lambda s: initial_function([s])
1040
1041 def _parse_sig_swf(self, file_contents):
54256267 1042 swfi = SWFInterpreter(file_contents)
78caa52a 1043 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1044 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1045 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1046 return lambda s: initial_function([s])
1047
83799698 1048 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1049 """Turn the encrypted s field into a working signature"""
6b37f0be 1050
c8bf86d5 1051 if player_url is None:
69ea8ca4 1052 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1053
69ea8ca4 1054 if player_url.startswith('//'):
78caa52a 1055 player_url = 'https:' + player_url
3c90cc8b
S
1056 elif not re.match(r'https?://', player_url):
1057 player_url = compat_urlparse.urljoin(
1058 'https://www.youtube.com', player_url)
c8bf86d5 1059 try:
62af3a0e 1060 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1061 if player_id not in self._player_cache:
1062 func = self._extract_signature_function(
60064c53 1063 video_id, player_url, s
c8bf86d5
PH
1064 )
1065 self._player_cache[player_id] = func
1066 func = self._player_cache[player_id]
1067 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1068 self._print_sig_code(func, s)
c8bf86d5
PH
1069 return func(s)
1070 except Exception as e:
1071 tb = traceback.format_exc()
1072 raise ExtractorError(
78caa52a 1073 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1074
360e1ca5 1075 def _get_subtitles(self, video_id, webpage):
de7f3446 1076 try:
60e47a26 1077 subs_doc = self._download_xml(
38c2e5b8 1078 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1079 video_id, note=False)
1080 except ExtractorError as err:
9b9c5355 1081 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1082 return {}
de7f3446
JMF
1083
1084 sub_lang_list = {}
60e47a26
JMF
1085 for track in subs_doc.findall('track'):
1086 lang = track.attrib['lang_code']
7e660ac1
LD
1087 if lang in sub_lang_list:
1088 continue
360e1ca5 1089 sub_formats = []
23d17e4b 1090 for ext in self._SUBTITLE_FORMATS:
15707c7e 1091 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1092 'lang': lang,
1093 'v': video_id,
1094 'fmt': ext,
1095 'name': track.attrib['name'].encode('utf-8'),
1096 })
1097 sub_formats.append({
1098 'url': 'https://www.youtube.com/api/timedtext?' + params,
1099 'ext': ext,
1100 })
1101 sub_lang_list[lang] = sub_formats
de7f3446 1102 if not sub_lang_list:
69ea8ca4 1103 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1104 return {}
1105 return sub_lang_list
1106
a72778d3
S
1107 def _get_ytplayer_config(self, video_id, webpage):
1108 patterns = (
526b3b07
S
1109 # User data may contain arbitrary character sequences that may affect
1110 # JSON extraction with regex, e.g. when '};' is contained the second
1111 # regex won't capture the whole JSON. Yet working around by trying more
1112 # concrete regex first keeping in mind proper quoted string handling
1113 # to be implemented in future that will replace this workaround (see
1114 # https://github.com/rg3/youtube-dl/issues/7468,
1115 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1116 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1117 r';ytplayer\.config\s*=\s*({.+?});',
1118 )
1119 config = self._search_regex(
1120 patterns, webpage, 'ytplayer.config', default=None)
1121 if config:
1122 return self._parse_json(
1123 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1124
360e1ca5 1125 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1126 """We need the webpage for getting the captions url, pass it as an
1127 argument to speed up the process."""
69ea8ca4 1128 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1129 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1130 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1131 if not player_config:
de7f3446
JMF
1132 self._downloader.report_warning(err_msg)
1133 return {}
de7f3446 1134 try:
0792d563 1135 args = player_config['args']
b78b292f
S
1136 caption_url = args.get('ttsurl')
1137 if caption_url:
1138 timestamp = args['timestamp']
1139 # We get the available subtitles
15707c7e 1140 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1141 'type': 'list',
1142 'tlangs': 1,
1143 'asrs': 1,
1144 })
1145 list_url = caption_url + '&' + list_params
1146 caption_list = self._download_xml(list_url, video_id)
1147 original_lang_node = caption_list.find('track')
1148 if original_lang_node is None:
1149 self._downloader.report_warning('Video doesn\'t have automatic captions')
1150 return {}
1151 original_lang = original_lang_node.attrib['lang_code']
1152 caption_kind = original_lang_node.attrib.get('kind', '')
1153
1154 sub_lang_list = {}
1155 for lang_node in caption_list.findall('target'):
1156 sub_lang = lang_node.attrib['lang_code']
1157 sub_formats = []
1158 for ext in self._SUBTITLE_FORMATS:
15707c7e 1159 params = compat_urllib_parse_urlencode({
b78b292f
S
1160 'lang': original_lang,
1161 'tlang': sub_lang,
1162 'fmt': ext,
1163 'ts': timestamp,
1164 'kind': caption_kind,
1165 })
1166 sub_formats.append({
1167 'url': caption_url + '&' + params,
1168 'ext': ext,
1169 })
1170 sub_lang_list[sub_lang] = sub_formats
1171 return sub_lang_list
1172
1173 # Some videos don't provide ttsurl but rather caption_tracks and
1174 # caption_translation_languages (e.g. 20LmZk1hakA)
1175 caption_tracks = args['caption_tracks']
1176 caption_translation_languages = args['caption_translation_languages']
1177 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
15707c7e 1178 parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
b78b292f 1179 caption_qs = compat_parse_qs(parsed_caption_url.query)
055e6f36
JMF
1180
1181 sub_lang_list = {}
b78b292f
S
1182 for lang in caption_translation_languages.split(','):
1183 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1184 sub_lang = lang_qs.get('lc', [None])[0]
1185 if not sub_lang:
1186 continue
360e1ca5 1187 sub_formats = []
23d17e4b 1188 for ext in self._SUBTITLE_FORMATS:
b78b292f
S
1189 caption_qs.update({
1190 'tlang': [sub_lang],
1191 'fmt': [ext],
360e1ca5 1192 })
b78b292f 1193 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
15707c7e 1194 query=compat_urllib_parse_urlencode(caption_qs, True)))
360e1ca5 1195 sub_formats.append({
b78b292f 1196 'url': sub_url,
360e1ca5
JMF
1197 'ext': ext,
1198 })
1199 sub_lang_list[sub_lang] = sub_formats
055e6f36 1200 return sub_lang_list
de7f3446
JMF
1201 # An extractor error can be raise by the download process if there are
1202 # no automatic captions but there are subtitles
1203 except (KeyError, ExtractorError):
1204 self._downloader.report_warning(err_msg)
1205 return {}
1206
d77ab8e2
S
1207 def _mark_watched(self, video_id, video_info):
1208 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1209 if not playback_url:
1210 return
1211 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1212 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1213
1214 # cpn generation algorithm is reverse engineered from base.js.
1215 # In fact it works even with dummy cpn.
1216 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1217 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1218
1219 qs.update({
1220 'ver': ['2'],
1221 'cpn': [cpn],
1222 })
1223 playback_url = compat_urlparse.urlunparse(
15707c7e 1224 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1225
1226 self._download_webpage(
1227 playback_url, video_id, 'Marking watched',
1228 'Unable to mark watched', fatal=False)
1229
97665381
PH
1230 @classmethod
1231 def extract_id(cls, url):
1232 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1233 if mobj is None:
69ea8ca4 1234 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1235 video_id = mobj.group(2)
1236 return video_id
1237
1d043b93
JMF
1238 def _extract_from_m3u8(self, manifest_url, video_id):
1239 url_map = {}
5f6a1245 1240
1d043b93
JMF
1241 def _get_urls(_manifest):
1242 lines = _manifest.split('\n')
1243 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 1244 lines)
1d043b93 1245 return urls
78caa52a 1246 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
1247 formats_urls = _get_urls(manifest)
1248 for format_url in formats_urls:
890f62e8 1249 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1250 url_map[itag] = format_url
1251 return url_map
1252
1fb07d10
JG
1253 def _extract_annotations(self, video_id):
1254 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1255 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1256
c5e8d7af 1257 def _real_extract(self, url):
cf7e015f
S
1258 url, smuggled_data = unsmuggle_url(url, {})
1259
7e8c0af0 1260 proto = (
78caa52a
PH
1261 'http' if self._downloader.params.get('prefer_insecure', False)
1262 else 'https')
7e8c0af0 1263
7c80519c 1264 start_time = None
297a564b 1265 end_time = None
7c80519c
JMF
1266 parsed_url = compat_urllib_parse_urlparse(url)
1267 for component in [parsed_url.fragment, parsed_url.query]:
1268 query = compat_parse_qs(component)
297a564b 1269 if start_time is None and 't' in query:
7c80519c 1270 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1271 if start_time is None and 'start' in query:
1272 start_time = parse_duration(query['start'][0])
297a564b
JMF
1273 if end_time is None and 'end' in query:
1274 end_time = parse_duration(query['end'][0])
7c80519c 1275
c5e8d7af
PH
1276 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1277 mobj = re.search(self._NEXT_URL_RE, url)
1278 if mobj:
7fd002c0 1279 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1280 video_id = self.extract_id(url)
c5e8d7af
PH
1281
1282 # Get video webpage
aa79ac0c 1283 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1284 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1285
1286 # Attempt to extract SWF player URL
e0df6211 1287 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1288 if mobj is not None:
1289 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1290 else:
1291 player_url = None
1292
d8d24a92
S
1293 dash_mpds = []
1294
1295 def add_dash_mpd(video_info):
1296 dash_mpd = video_info.get('dashmpd')
1297 if dash_mpd and dash_mpd[0] not in dash_mpds:
1298 dash_mpds.append(dash_mpd[0])
1299
c5e8d7af 1300 # Get video info
6449cd80 1301 embed_webpage = None
2fe1ff85 1302 is_live = None
c108eb73 1303 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1304 age_gate = True
1305 # We simulate the access to the video from www.youtube.com/v/{video_id}
1306 # this can be viewed without login into Youtube
beb95e77
CL
1307 url = proto + '://www.youtube.com/embed/%s' % video_id
1308 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1309 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1310 'video_id': video_id,
1311 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1312 'sts': self._search_regex(
beb95e77 1313 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1314 })
7e8c0af0 1315 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1316 video_info_webpage = self._download_webpage(
1317 video_info_url, video_id,
20436c30 1318 note='Refetching age-gated info webpage',
94bd3613 1319 errnote='unable to download video info webpage')
c5e8d7af 1320 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1321 add_dash_mpd(video_info)
c108eb73
JMF
1322 else:
1323 age_gate = False
bc93bdb5 1324 video_info = None
d8d24a92 1325 # Try looking directly into the video webpage
a72778d3
S
1326 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1327 if ytplayer_config:
4e62ebe2 1328 args = ytplayer_config['args']
d8d24a92
S
1329 if args.get('url_encoded_fmt_stream_map'):
1330 # Convert to the same format returned by compat_parse_qs
1331 video_info = dict((k, [v]) for k, v in args.items())
1332 add_dash_mpd(video_info)
6496ccb4
S
1333 # Rental video is not rented but preview is available (e.g.
1334 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1335 # https://github.com/rg3/youtube-dl/issues/10532)
1336 if not video_info and args.get('ypc_vid'):
1337 return self.url_result(
1338 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1339 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1340 is_live = True
0a3cf9ad
S
1341 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1342 # We also try looking in get_video_info since it may contain different dashmpd
1343 # URL that points to a DASH manifest with possibly different itag set (some itags
1344 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1345 # manifest pointed by get_video_info's dashmpd).
1346 # The general idea is to take a union of itags of both DASH manifests (for example
1347 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1348 self.report_video_info_webpage_download(video_id)
0a3cf9ad 1349 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
1350 video_info_url = (
1351 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1352 % (proto, video_id, el_type))
1353 video_info_webpage = self._download_webpage(
1354 video_info_url,
4e62ebe2
JMF
1355 video_id, note=False,
1356 errnote='unable to download video info webpage')
0a3cf9ad 1357 get_video_info = compat_parse_qs(video_info_webpage)
87dc4511
JMF
1358 if get_video_info.get('use_cipher_signature') != ['True']:
1359 add_dash_mpd(get_video_info)
0a3cf9ad
S
1360 if not video_info:
1361 video_info = get_video_info
1362 if 'token' in get_video_info:
89ea063e
S
1363 # Different get_video_info requests may report different results, e.g.
1364 # some may report video unavailability, but some may serve it without
1365 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1366 # the original webpage as well as el=info and el=embedded get_video_info
1367 # requests report video unavailability due to geo restriction while
1368 # el=detailpage succeeds and returns valid data). This is probably
1369 # due to YouTube measures against IP ranges of hosting providers.
1370 # Working around by preferring the first succeeded video_info containing
1371 # the token if no such video_info yet was found.
44b2264f
S
1372 if 'token' not in video_info:
1373 video_info = get_video_info
4e62ebe2 1374 break
c5e8d7af
PH
1375 if 'token' not in video_info:
1376 if 'reason' in video_info:
af214c3a
YCH
1377 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1378 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
678e436f 1379 if regions_allowed:
af214c3a
YCH
1380 raise ExtractorError('YouTube said: This video is available in %s only' % (
1381 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1382 expected=True)
d11271dd 1383 raise ExtractorError(
78caa52a 1384 'YouTube said: %s' % video_info['reason'][0],
d11271dd 1385 expected=True, video_id=video_id)
c5e8d7af 1386 else:
d11271dd 1387 raise ExtractorError(
78caa52a 1388 '"token" parameter not in video info for unknown reason',
d11271dd 1389 video_id=video_id)
c5e8d7af 1390
cf7e015f
S
1391 # title
1392 if 'title' in video_info:
1393 video_title = video_info['title'][0]
1394 else:
1395 self._downloader.report_warning('Unable to extract video title')
1396 video_title = '_'
1397
1398 # description
1399 video_description = get_element_by_id("eow-description", video_webpage)
1400 if video_description:
1401 video_description = re.sub(r'''(?x)
1402 <a\s+
25cb7a0e 1403 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1404 (?:title|href)="([^"]+)"\s+
25cb7a0e 1405 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1406 class="[^"]*"[^>]*>
23f13e97 1407 [^<]+\.{3}\s*
cf7e015f
S
1408 </a>
1409 ''', r'\1', video_description)
1410 video_description = clean_html(video_description)
1411 else:
1412 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1413 if fd_mobj:
1414 video_description = unescapeHTML(fd_mobj.group(1))
1415 else:
1416 video_description = ''
1417
5e1eddb9
S
1418 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1419 if not self._downloader.params.get('noplaylist'):
1420 entries = []
1421 feed_ids = []
6863631c 1422 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1423 for feed in multifeed_metadata_list.split(','):
6863631c
S
1424 # Unquote should take place before split on comma (,) since textual
1425 # fields may contain comma as well (see
1426 # https://github.com/rg3/youtube-dl/issues/8536)
1427 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1428 entries.append({
1429 '_type': 'url_transparent',
1430 'ie_key': 'Youtube',
1431 'url': smuggle_url(
1432 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1433 {'force_singlefeed': True}),
1434 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1435 })
1436 feed_ids.append(feed_data['id'][0])
1437 self.to_screen(
1438 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1439 % (', '.join(feed_ids), video_id))
1440 return self.playlist_result(entries, video_id, video_title, video_description)
1441 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1442
1d699755
PH
1443 if 'view_count' in video_info:
1444 view_count = int(video_info['view_count'][0])
1445 else:
1446 view_count = None
1447
c5e8d7af
PH
1448 # Check for "rental" videos
1449 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 1450 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
1451
1452 # Start extracting information
1453 self.report_information_extraction(video_id)
1454
1455 # uploader
1456 if 'author' not in video_info:
69ea8ca4 1457 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1458 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1459
1460 # uploader_id
1461 video_uploader_id = None
fd050249
S
1462 video_uploader_url = None
1463 mobj = re.search(
1464 r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1465 video_webpage)
c5e8d7af 1466 if mobj is not None:
fd050249
S
1467 video_uploader_id = mobj.group('uploader_id')
1468 video_uploader_url = mobj.group('uploader_url')
c5e8d7af 1469 else:
69ea8ca4 1470 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af 1471
c5e8d7af 1472 # thumbnail image
7763b04e
JMF
1473 # We try first to get a high quality image:
1474 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1475 video_webpage, re.DOTALL)
1476 if m_thumb is not None:
1477 video_thumbnail = m_thumb.group(1)
1478 elif 'thumbnail_url' not in video_info:
69ea8ca4 1479 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1480 video_thumbnail = None
c5e8d7af 1481 else: # don't panic if we can't find it
7fd002c0 1482 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1483
1484 # upload date
9d0b581f
S
1485 upload_date = self._html_search_meta(
1486 'datePublished', video_webpage, 'upload date', default=None)
1487 if not upload_date:
1488 upload_date = self._search_regex(
1489 [r'(?s)id="eow-date.*?>(.*?)</span>',
1490 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1491 video_webpage, 'upload date', default=None)
1492 if upload_date:
1493 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1494 upload_date = unified_strdate(upload_date)
c5e8d7af 1495
7caf9830
S
1496 video_license = self._html_search_regex(
1497 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1498 video_webpage, 'license', default=None)
1499
0cb58b02
S
1500 m_music = re.search(
1501 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1502 video_webpage)
1503 if m_music:
1504 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1505 video_creator = clean_html(m_music.group('creator'))
1506 else:
1507 video_alt_title = video_creator = None
1508
12afdc2a
S
1509 m_episode = re.search(
1510 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1511 video_webpage)
1512 if m_episode:
1513 series = m_episode.group('series')
1514 season_number = int(m_episode.group('season'))
1515 episode_number = int(m_episode.group('episode'))
1516 else:
1517 series = season_number = episode_number = None
1518
55f7bd2d
PH
1519 m_cat_container = self._search_regex(
1520 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1521 video_webpage, 'categories', default=None)
ec8deefc 1522 if m_cat_container:
ad3bc6ac 1523 category = self._html_search_regex(
01ed5c9b 1524 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1525 default=None)
1526 video_categories = None if category is None else [category]
1527 else:
1528 video_categories = None
ec8deefc 1529
000b6b5a
S
1530 video_tags = [
1531 unescapeHTML(m.group('content'))
1532 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1533
f30a38be 1534 def _extract_count(count_name):
c93d53f5
S
1535 return str_to_int(self._search_regex(
1536 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1537 % re.escape(count_name),
1538 video_webpage, count_name, default=None))
1539
69ea8ca4
PH
1540 like_count = _extract_count('like')
1541 dislike_count = _extract_count('dislike')
336c3a69 1542
c5e8d7af 1543 # subtitles
d82134c3 1544 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1545 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af 1546
556dbe7f
S
1547 video_duration = try_get(
1548 video_info, lambda x: int_or_none(x['length_seconds'][0]))
1549 if not video_duration:
1550 video_duration = parse_duration(self._html_search_meta(
1551 'duration', video_webpage, 'video duration'))
c5e8d7af 1552
1fb07d10
JG
1553 # annotations
1554 video_annotations = None
1555 if self._downloader.params.get('writeannotations', False):
5f6a1245 1556 video_annotations = self._extract_annotations(video_id)
1fb07d10 1557
dd27fd17
PH
1558 def _map_to_format_list(urlmap):
1559 formats = []
1560 for itag, video_real_url in urlmap.items():
1561 dct = {
1562 'format_id': itag,
1563 'url': video_real_url,
1564 'player_url': player_url,
1565 }
0b65e5d4
PH
1566 if itag in self._formats:
1567 dct.update(self._formats[itag])
dd27fd17
PH
1568 formats.append(dct)
1569 return formats
1570
c5e8d7af
PH
1571 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1572 self.report_rtmp_download()
dd27fd17
PH
1573 formats = [{
1574 'format_id': '_rtmp',
1575 'protocol': 'rtmp',
1576 'url': video_info['conn'][0],
1577 'player_url': player_url,
1578 }]
24270b03 1579 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1580 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1581 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1582 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1583 formats_spec = {}
82156fdb 1584 fmt_list = video_info.get('fmt_list', [''])[0]
1585 if fmt_list:
1586 for fmt in fmt_list.split(','):
1587 spec = fmt.split('/')
3318832e 1588 if len(spec) > 1:
1589 width_height = spec[1].split('x')
1590 if len(width_height) == 2:
1591 formats_spec[spec[0]] = {
1592 'resolution': spec[1],
1593 'width': int_or_none(width_height[0]),
1594 'height': int_or_none(width_height[1]),
1595 }
c9afb51c 1596 formats = []
00fe14fc 1597 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1598 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1599 if 'itag' not in url_data or 'url' not in url_data:
1600 continue
1601 format_id = url_data['itag'][0]
1602 url = url_data['url'][0]
1603
1604 if 'sig' in url_data:
1605 url += '&signature=' + url_data['sig'][0]
1606 elif 's' in url_data:
1607 encrypted_sig = url_data['s'][0]
6449cd80 1608 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1609
beb95e77 1610 jsplayer_url_json = self._search_regex(
6449cd80
PH
1611 ASSETS_RE,
1612 embed_webpage if age_gate else video_webpage,
1613 'JS player URL (1)', default=None)
1614 if not jsplayer_url_json and not age_gate:
1615 # We need the embed website after all
1616 if embed_webpage is None:
1617 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1618 embed_webpage = self._download_webpage(
1619 embed_url, video_id, 'Downloading embed webpage')
1620 jsplayer_url_json = self._search_regex(
1621 ASSETS_RE, embed_webpage, 'JS player URL')
1622
beb95e77 1623 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1624 if player_url is None:
1625 player_url_json = self._search_regex(
1626 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1627 video_webpage, 'age gate player URL')
201e9eaa
PH
1628 player_url = json.loads(player_url_json)
1629
1630 if self._downloader.params.get('verbose'):
cf010131 1631 if player_url is None:
201e9eaa
PH
1632 player_version = 'unknown'
1633 player_desc = 'unknown'
1634 else:
1635 if player_url.endswith('swf'):
1636 player_version = self._search_regex(
1637 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1638 'flash player', fatal=False)
201e9eaa 1639 player_desc = 'flash player %s' % player_version
cf010131 1640 else:
201e9eaa 1641 player_version = self._search_regex(
50f84a9a 1642 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
201e9eaa
PH
1643 player_url,
1644 'html5 player', fatal=False)
78caa52a 1645 player_desc = 'html5 player %s' % player_version
201e9eaa 1646
60064c53 1647 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1648 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1649 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1650
1651 signature = self._decrypt_signature(
1652 encrypted_sig, video_id, player_url, age_gate)
1653 url += '&signature=' + signature
1654 if 'ratebypass' not in url:
1655 url += '&ratebypass=yes'
c9afb51c 1656
94278f72
YCH
1657 dct = {
1658 'format_id': format_id,
1659 'url': url,
1660 'player_url': player_url,
1661 }
1662 if format_id in self._formats:
1663 dct.update(self._formats[format_id])
3318832e 1664 if format_id in formats_spec:
1665 dct.update(formats_spec[format_id])
94278f72 1666
aabc2be6
S
1667 # Some itags are not included in DASH manifest thus corresponding formats will
1668 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1669 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1670 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1671 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72
YCH
1672
1673 more_fields = {
c9afb51c 1674 'filesize': int_or_none(url_data.get('clen', [None])[0]),
aabc2be6 1675 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1676 'width': width,
1677 'height': height,
1678 'fps': int_or_none(url_data.get('fps', [None])[0]),
aabc2be6 1679 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
c9afb51c 1680 }
94278f72
YCH
1681 for key, value in more_fields.items():
1682 if value:
1683 dct[key] = value
aabc2be6
S
1684 type_ = url_data.get('type', [None])[0]
1685 if type_:
1686 type_split = type_.split(';')
1687 kind_ext = type_split[0].split('/')
1688 if len(kind_ext) == 2:
94278f72
YCH
1689 kind, _ = kind_ext
1690 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1691 if kind in ('audio', 'video'):
1692 codecs = None
1693 for mobj in re.finditer(
1694 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1695 if mobj.group('key') == 'codecs':
1696 codecs = mobj.group('val')
1697 break
1698 if codecs:
1699 codecs = codecs.split(',')
1700 if len(codecs) == 2:
cc28492d 1701 acodec, vcodec = codecs[1], codecs[0]
aabc2be6
S
1702 else:
1703 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1704 dct.update({
1705 'acodec': acodec,
1706 'vcodec': vcodec,
1707 })
aabc2be6 1708 formats.append(dct)
1d043b93
JMF
1709 elif video_info.get('hlsvp'):
1710 manifest_url = video_info['hlsvp'][0]
1711 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1712 formats = _map_to_format_list(url_map)
ac5a69af
YCH
1713 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1714 for a_format in formats:
049d71d8 1715 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
c5e8d7af 1716 else:
8ceabd4d
S
1717 unavailable_message = self._html_search_regex(
1718 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1719 video_webpage, 'unavailable message', default=None)
1720 if unavailable_message:
1721 raise ExtractorError(unavailable_message, expected=True)
69ea8ca4 1722 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1723
dd27fd17 1724 # Look for the DASH manifest
203fb43f 1725 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1726 dash_mpd_fatal = True
8ff648e4 1727 for mpd_url in dash_mpds:
d8d24a92 1728 dash_formats = {}
774e208f 1729 try:
05d0d131
YCH
1730 def decrypt_sig(mobj):
1731 s = mobj.group(1)
1732 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1733 return '/signature/%s' % dec_s
1734
8ff648e4 1735 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 1736
8ff648e4 1737 for df in self._extract_mpd_formats(
1738 mpd_url, video_id, fatal=dash_mpd_fatal,
1739 formats_dict=self._formats):
d8d24a92
S
1740 # Do not overwrite DASH format found in some previous DASH manifest
1741 if df['format_id'] not in dash_formats:
1742 dash_formats[df['format_id']] = df
77c6fb5b
S
1743 # Additional DASH manifests may end up in HTTP Error 403 therefore
1744 # allow them to fail without bug report message if we already have
1745 # some DASH manifest succeeded. This is temporary workaround to reduce
1746 # burst of bug reports until we figure out the reason and whether it
1747 # can be fixed at all.
1748 dash_mpd_fatal = False
774e208f
PH
1749 except (ExtractorError, KeyError) as e:
1750 self.report_warning(
1751 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1752 if dash_formats:
04b3b3df
JMF
1753 # Remove the formats we found through non-DASH, they
1754 # contain less info and it can be wrong, because we use
1755 # fixed values (for example the resolution). See
1756 # https://github.com/rg3/youtube-dl/issues/5774 for an
1757 # example.
d80265cc 1758 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1759 formats.extend(dash_formats.values())
d80044c2 1760
6271f1ca
PH
1761 # Check for malformed aspect ratio
1762 stretched_m = re.search(
1763 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1764 video_webpage)
1765 if stretched_m:
313dfc45
LL
1766 w = float(stretched_m.group('w'))
1767 h = float(stretched_m.group('h'))
5faf9fed
S
1768 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1769 # We will only process correct ratios.
313dfc45 1770 if w > 0 and h > 0:
41f24c32 1771 ratio = w / h
313dfc45
LL
1772 for f in formats:
1773 if f.get('vcodec') != 'none':
1774 f['stretched_ratio'] = ratio
6271f1ca 1775
4bcc7bd1 1776 self._sort_formats(formats)
4ea3be0a 1777
d77ab8e2
S
1778 self.mark_watched(video_id, video_info)
1779
4ea3be0a 1780 return {
8bcc8756
JW
1781 'id': video_id,
1782 'uploader': video_uploader,
1783 'uploader_id': video_uploader_id,
fd050249 1784 'uploader_url': video_uploader_url,
8bcc8756 1785 'upload_date': upload_date,
7caf9830 1786 'license': video_license,
0cb58b02 1787 'creator': video_creator,
8bcc8756 1788 'title': video_title,
0cb58b02 1789 'alt_title': video_alt_title,
8bcc8756
JW
1790 'thumbnail': video_thumbnail,
1791 'description': video_description,
1792 'categories': video_categories,
000b6b5a 1793 'tags': video_tags,
8bcc8756 1794 'subtitles': video_subtitles,
360e1ca5 1795 'automatic_captions': automatic_captions,
8bcc8756
JW
1796 'duration': video_duration,
1797 'age_limit': 18 if age_gate else 0,
1798 'annotations': video_annotations,
7e8c0af0 1799 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1800 'view_count': view_count,
4ea3be0a 1801 'like_count': like_count,
1802 'dislike_count': dislike_count,
2d30521a 1803 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1804 'formats': formats,
2fe1ff85 1805 'is_live': is_live,
7c80519c 1806 'start_time': start_time,
297a564b 1807 'end_time': end_time,
12afdc2a
S
1808 'series': series,
1809 'season_number': season_number,
1810 'episode_number': episode_number,
4ea3be0a 1811 }
c5e8d7af 1812
5f6a1245 1813
40805306 1814class YoutubeSharedVideoIE(InfoExtractor):
fd8c8c7d 1815 _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
40805306
YCH
1816 IE_NAME = 'youtube:shared'
1817
1818 _TEST = {
1819 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1820 'info_dict': {
1821 'id': 'uPDB5I9wfp8',
1822 'ext': 'webm',
1823 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1824 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1825 'upload_date': '20160219',
1826 'uploader': 'Pocoyo - Português (BR)',
1827 'uploader_id': 'PocoyoBrazil',
1828 },
1829 'add_ie': ['Youtube'],
1830 'params': {
1831 # There are already too many Youtube downloads
1832 'skip_download': True,
1833 },
1834 }
1835
1836 def _real_extract(self, url):
1837 video_id = self._match_id(url)
1838
1839 webpage = self._download_webpage(url, video_id)
1840
1841 real_video_id = self._html_search_meta(
1842 'videoId', webpage, 'YouTube video id', fatal=True)
1843
1844 return self.url_result(real_video_id, YoutubeIE.ie_key())
1845
1846
8e7aad20 1847class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 1848 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1849 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1850 (?:https?://)?
1851 (?:\w+\.)?
c5e8d7af 1852 (?:
feaa5ad7
S
1853 youtube\.com/
1854 (?:
1855 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1856 \? (?:.*?[&;])*? (?:p|a|list)=
1857 | p/
1858 )|
1859 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 1860 )
d67cc9fa 1861 (
a6857510 1862 (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
5f6a1245 1863 # Top tracks, they can also include dots
d67cc9fa
JMF
1864 |(?:MC)[\w\.]*
1865 )
c5e8d7af
PH
1866 .*
1867 |
a6857510 1868 ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
c5e8d7af 1869 )"""
c867adc6 1870 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
648e6a1f 1871 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 1872 IE_NAME = 'youtube:playlist'
81127aa5
PH
1873 _TESTS = [{
1874 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1875 'info_dict': {
1876 'title': 'ytdl test PL',
a1cf99d0 1877 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1878 },
1879 'playlist_count': 3,
9291475f
PH
1880 }, {
1881 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1882 'info_dict': {
acf757f4 1883 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1884 'title': 'YDL_Empty_List',
1885 },
1886 'playlist_count': 0,
4201ba13 1887 'skip': 'This playlist is private',
9291475f
PH
1888 }, {
1889 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1890 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1891 'info_dict': {
1892 'title': '29C3: Not my department',
acf757f4 1893 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1894 },
1895 'playlist_count': 95,
1896 }, {
1897 'note': 'issue #673',
1898 'url': 'PLBB231211A4F62143',
1899 'info_dict': {
f46a8702 1900 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1901 'id': 'PLBB231211A4F62143',
9291475f
PH
1902 },
1903 'playlist_mincount': 26,
1904 }, {
1905 'note': 'Large playlist',
1906 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1907 'info_dict': {
1908 'title': 'Uploads from Cauchemar',
acf757f4 1909 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1910 },
1911 'playlist_mincount': 799,
1912 }, {
1913 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1914 'info_dict': {
1915 'title': 'YDL_safe_search',
acf757f4 1916 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1917 },
1918 'playlist_count': 2,
4201ba13 1919 'skip': 'This playlist is private',
ac7553d0
PH
1920 }, {
1921 'note': 'embedded',
2d3d2997 1922 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
1923 'playlist_count': 4,
1924 'info_dict': {
1925 'title': 'JODA15',
acf757f4 1926 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1927 }
6b08cdf6
PH
1928 }, {
1929 'note': 'Embedded SWF player',
2d3d2997 1930 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
1931 'playlist_count': 4,
1932 'info_dict': {
1933 'title': 'JODA7',
acf757f4 1934 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1935 }
4b7df0d3
JMF
1936 }, {
1937 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1938 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1939 'info_dict': {
acf757f4
PH
1940 'title': 'Uploads from Interstellar Movie',
1941 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 1942 },
481cc733 1943 'playlist_mincount': 21,
dacb3a86
S
1944 }, {
1945 # Playlist URL that does not actually serve a playlist
1946 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
1947 'info_dict': {
1948 'id': 'FqZTN594JQw',
1949 'ext': 'webm',
1950 'title': "Smiley's People 01 detective, Adventure Series, Action",
1951 'uploader': 'STREEM',
1952 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 1953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
1954 'upload_date': '20150526',
1955 'license': 'Standard YouTube License',
1956 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
1957 'categories': ['People & Blogs'],
1958 'tags': list,
1959 'like_count': int,
1960 'dislike_count': int,
1961 },
1962 'params': {
1963 'skip_download': True,
1964 },
1965 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
1966 }, {
1967 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
1968 'info_dict': {
1969 'id': 'yeWKywCrFtk',
1970 'ext': 'mp4',
1971 'title': 'Small Scale Baler and Braiding Rugs',
1972 'uploader': 'Backus-Page House Museum',
1973 'uploader_id': 'backuspagemuseum',
ec85ded8 1974 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
1975 'upload_date': '20161008',
1976 'license': 'Standard YouTube License',
1977 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
1978 'categories': ['Nonprofits & Activism'],
1979 'tags': list,
1980 'like_count': int,
1981 'dislike_count': int,
1982 },
1983 'params': {
1984 'noplaylist': True,
1985 'skip_download': True,
1986 },
feaa5ad7
S
1987 }, {
1988 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
1989 'only_matching': True,
a6857510
S
1990 }, {
1991 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
1992 'only_matching': True,
81127aa5 1993 }]
c5e8d7af 1994
880e1c52
JMF
1995 def _real_initialize(self):
1996 self._login()
1997
652cdaa2 1998 def _extract_mix(self, playlist_id):
99209c29 1999 # The mixes are generated from a single video
652cdaa2 2000 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2001 ids = []
2002 last_id = playlist_id[-11:]
2003 for n in itertools.count(1):
2004 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2005 webpage = self._download_webpage(
2006 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2007 new_ids = orderedSet(re.findall(
2008 r'''(?xs)data-video-username=".*?".*?
2009 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2010 webpage))
2011 # Fetch new pages until all the videos are repeated, it seems that
2012 # there are always 51 unique videos.
2013 new_ids = [_id for _id in new_ids if _id not in ids]
2014 if not new_ids:
2015 break
2016 ids.extend(new_ids)
2017 last_id = ids[-1]
2018
2019 url_results = self._ids_to_results(ids)
2020
bc2f773b 2021 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2022 title_span = (
2023 search_title('playlist-title') or
2024 search_title('title long-title') or
2025 search_title('title'))
76d1700b 2026 title = clean_html(title_span)
652cdaa2
JMF
2027
2028 return self.playlist_result(url_results, playlist_id, title)
2029
448830ce 2030 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2031 url = self._TEMPLATE_URL % playlist_id
2032 page = self._download_webpage(url, playlist_id)
dbb94fb0 2033
8bc0800d
G
2034 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2035 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2036 match = match.strip()
2037 # Check if the playlist exists or is private
4201ba13
S
2038 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2039 if mobj:
2040 reason = mobj.group('reason')
2041 message = 'This playlist %s' % reason
2042 if 'private' in reason:
2043 message += ', use --username or --netrc to access it'
2044 message += '.'
2045 raise ExtractorError(message, expected=True)
39b62db1
YCH
2046 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2047 raise ExtractorError(
2048 'Invalid parameters. Maybe URL is incorrect.',
2049 expected=True)
2050 elif re.match(r'[^<]*Choose your language[^<]*', match):
2051 continue
2052 else:
2053 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2054
dbb94fb0 2055 playlist_title = self._html_search_regex(
63b4295d 2056 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2057 page, 'title', default=None)
c5e8d7af 2058
dacb3a86
S
2059 has_videos = True
2060
2061 if not playlist_title:
2062 try:
2063 # Some playlist URLs don't actually serve a playlist (e.g.
2064 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2065 next(self._entries(page, playlist_id))
2066 except StopIteration:
2067 has_videos = False
2068
2069 return has_videos, self.playlist_result(
2070 self._entries(page, playlist_id), playlist_id, playlist_title)
c5e8d7af 2071
ebf1b291 2072 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2073 # Check if it's a video-specific URL
2074 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733
S
2075 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2076 r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
2077 'video id', default=None)
2078 if video_id:
448830ce
S
2079 if self._downloader.params.get('noplaylist'):
2080 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2081 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2082 else:
2083 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2084 return video_id, None
2085 return None, None
448830ce 2086
ebf1b291
S
2087 def _real_extract(self, url):
2088 # Extract playlist id
2089 mobj = re.match(self._VALID_URL, url)
2090 if mobj is None:
2091 raise ExtractorError('Invalid URL: %s' % url)
2092 playlist_id = mobj.group(1) or mobj.group(2)
2093
dacb3a86 2094 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2095 if video:
2096 return video
2097
466a6145 2098 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2099 # Mixes require a custom extraction process
2100 return self._extract_mix(playlist_id)
2101
dacb3a86
S
2102 has_videos, playlist = self._extract_playlist(playlist_id)
2103 if has_videos or not video_id:
2104 return playlist
2105
2106 # Some playlist URLs don't actually serve a playlist (see
2107 # https://github.com/rg3/youtube-dl/issues/10537).
2108 # Fallback to plain video extraction if there is a video id
2109 # along with playlist id.
2110 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2111
c5e8d7af 2112
648e6a1f 2113class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2114 IE_DESC = 'YouTube.com channels'
9ff67727 2115 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2116 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2117 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2118 IE_NAME = 'youtube:channel'
cdc628a4
PH
2119 _TESTS = [{
2120 'note': 'paginated channel',
2121 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2122 'playlist_mincount': 91,
acf757f4 2123 'info_dict': {
9170ca5b
JMF
2124 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2125 'title': 'Uploads from lex will',
acf757f4 2126 }
5c43afd4
JMF
2127 }, {
2128 'note': 'Age restricted channel',
2129 # from https://www.youtube.com/user/DeusExOfficial
2130 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2131 'playlist_mincount': 64,
2132 'info_dict': {
2133 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2134 'title': 'Uploads from Deus Ex',
2135 },
cdc628a4 2136 }]
c5e8d7af 2137
e462474e
S
2138 @classmethod
2139 def suitable(cls, url):
f07e276a
S
2140 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2141 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2142
9558dcec
S
2143 def _build_template_url(self, url, channel_id):
2144 return self._TEMPLATE_URL % channel_id
2145
c5e8d7af 2146 def _real_extract(self, url):
9ff67727 2147 channel_id = self._match_id(url)
c5e8d7af 2148
9558dcec 2149 url = self._build_template_url(url, channel_id)
386bdfa6
S
2150
2151 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2152 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2153 # otherwise fallback on channel by page extraction
2154 channel_page = self._download_webpage(
2155 url + '?view=57', channel_id,
2156 'Downloading channel page', fatal=False)
2b3c2546
PH
2157 if channel_page is False:
2158 channel_playlist_id = False
2159 else:
2160 channel_playlist_id = self._html_search_meta(
2161 'channelId', channel_page, 'channel id', default=None)
2162 if not channel_playlist_id:
73c4ac2c
S
2163 channel_url = self._html_search_meta(
2164 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2165 channel_page, 'channel url', default=None)
2166 if channel_url:
2167 channel_playlist_id = self._search_regex(
2168 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2169 channel_url, 'channel id', default=None)
386bdfa6
S
2170 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2171 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2172 return self.url_result(
2173 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2174
60bf45c8 2175 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2176 autogenerated = re.search(r'''(?x)
2177 class="[^"]*?(?:
2178 channel-header-autogenerated-label|
2179 yt-channel-title-autogenerated
2180 )[^"]*"''', channel_page) is not None
c5e8d7af 2181
b9643eed
JMF
2182 if autogenerated:
2183 # The videos are contained in a single page
2184 # the ajax pages can't be used, they are empty
b82f815f 2185 entries = [
fb69240c
S
2186 self.url_result(
2187 video_id, 'Youtube', video_id=video_id,
2188 video_title=video_title)
8f02ad4f 2189 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2190 return self.playlist_result(entries, channel_id)
2191
73c4ac2c
S
2192 try:
2193 next(self._entries(channel_page, channel_id))
2194 except StopIteration:
2195 alert_message = self._html_search_regex(
2196 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2197 channel_page, 'alert', default=None, group='alert')
2198 if alert_message:
2199 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2200
648e6a1f 2201 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2202
2203
eb0f3e7e 2204class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2205 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
9558dcec
S
2206 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2207 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2208 IE_NAME = 'youtube:user'
c5e8d7af 2209
cdc628a4
PH
2210 _TESTS = [{
2211 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2212 'playlist_mincount': 320,
2213 'info_dict': {
73c4ac2c
S
2214 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2215 'title': 'Uploads from The Linux Foundation',
cdc628a4 2216 }
9558dcec
S
2217 }, {
2218 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2219 # but not https://www.youtube.com/user/12minuteathlete/videos
2220 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2221 'playlist_mincount': 249,
2222 'info_dict': {
2223 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2224 'title': 'Uploads from 12 Minute Athlete',
2225 }
cdc628a4
PH
2226 }, {
2227 'url': 'ytuser:phihag',
2228 'only_matching': True,
daa0df9e
YCH
2229 }, {
2230 'url': 'https://www.youtube.com/c/gametrailers',
2231 'only_matching': True,
9558dcec
S
2232 }, {
2233 'url': 'https://www.youtube.com/gametrailers',
2234 'only_matching': True,
73c4ac2c
S
2235 }, {
2236 # This channel is not available.
2237 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2238 'only_matching': True,
cdc628a4
PH
2239 }]
2240
e3ea4790 2241 @classmethod
f4b05232 2242 def suitable(cls, url):
e3ea4790
JMF
2243 # Don't return True if the url can be extracted with other youtube
2244 # extractor, the regex would is too permissive and it would match.
f3a58d46 2245 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2246 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2247 return False
2248 else:
2249 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2250
9558dcec
S
2251 def _build_template_url(self, url, channel_id):
2252 mobj = re.match(self._VALID_URL, url)
2253 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2254
b05654f0 2255
f07e276a
S
2256class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2257 IE_DESC = 'YouTube.com live streams'
073d5bf5 2258 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2259 IE_NAME = 'youtube:live'
2260
2261 _TESTS = [{
2d3d2997 2262 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2263 'info_dict': {
2264 'id': 'a48o2S1cPoo',
2265 'ext': 'mp4',
2266 'title': 'The Young Turks - Live Main Show',
2267 'uploader': 'The Young Turks',
2268 'uploader_id': 'TheYoungTurks',
ec85ded8 2269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2270 'upload_date': '20150715',
2271 'license': 'Standard YouTube License',
2272 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2273 'categories': ['News & Politics'],
2274 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2275 'like_count': int,
2276 'dislike_count': int,
2277 },
2278 'params': {
2279 'skip_download': True,
2280 },
2281 }, {
2d3d2997 2282 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2283 'only_matching': True,
c1b2a085
S
2284 }, {
2285 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2286 'only_matching': True,
073d5bf5
S
2287 }, {
2288 'url': 'https://www.youtube.com/TheYoungTurks/live',
2289 'only_matching': True,
f07e276a
S
2290 }]
2291
2292 def _real_extract(self, url):
2293 mobj = re.match(self._VALID_URL, url)
2294 channel_id = mobj.group('id')
2295 base_url = mobj.group('base_url')
2296 webpage = self._download_webpage(url, channel_id, fatal=False)
2297 if webpage:
2298 page_type = self._og_search_property(
2299 'type', webpage, 'page type', default=None)
2300 video_id = self._html_search_meta(
2301 'videoId', webpage, 'video id', default=None)
2302 if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2303 return self.url_result(video_id, YoutubeIE.ie_key())
2304 return self.url_result(base_url)
2305
2306
e462474e
S
2307class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2308 IE_DESC = 'YouTube.com user/channel playlists'
2309 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2310 IE_NAME = 'youtube:playlists'
0c148415 2311
e568c223 2312 _TESTS = [{
2d3d2997 2313 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2314 'playlist_mincount': 4,
2315 'info_dict': {
2316 'id': 'ThirstForScience',
2317 'title': 'Thirst for Science',
2318 },
e568c223
S
2319 }, {
2320 # with "Load more" button
2d3d2997 2321 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2322 'playlist_mincount': 70,
2323 'info_dict': {
2324 'id': 'igorkle1',
2325 'title': 'Игорь Клейнер',
2326 },
e462474e
S
2327 }, {
2328 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2329 'playlist_mincount': 17,
2330 'info_dict': {
2331 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2332 'title': 'Chem Player',
2333 },
e568c223 2334 }]
0c148415
S
2335
2336
b4c08069 2337class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 2338 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2339 # there doesn't appear to be a real limit, for example if you search for
2340 # 'python' you get more than 8.000.000 results
2341 _MAX_RESULTS = float('inf')
78caa52a 2342 IE_NAME = 'youtube:search'
b05654f0 2343 _SEARCH_KEY = 'ytsearch'
b4c08069 2344 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2345 _TESTS = []
b05654f0 2346
b05654f0
PH
2347 def _get_n_results(self, query, n):
2348 """Get a specified number of results for a query"""
2349
b4c08069 2350 videos = []
b05654f0
PH
2351 limit = n
2352
a22b2fd1
YCH
2353 url_query = {
2354 'search_query': query.encode('utf-8'),
2355 }
2356 url_query.update(self._EXTRA_QUERY_ARGS)
2357 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2358
b4c08069 2359 for pagenum in itertools.count(1):
b4c08069 2360 data = self._download_json(
69ea8ca4 2361 result_url, video_id='query "%s"' % query,
b4c08069 2362 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2363 errnote='Unable to download API page',
2364 query={'spf': 'navigate'})
b4c08069 2365 html_content = data[1]['body']['content']
7cc3570e 2366
b4c08069 2367 if 'class="search-message' in html_content:
07ad22b8 2368 raise ExtractorError(
78caa52a 2369 '[youtube] No video results', expected=True)
b05654f0 2370
b4c08069
JMF
2371 new_videos = self._ids_to_results(orderedSet(re.findall(
2372 r'href="/watch\?v=(.{11})', html_content)))
2373 videos += new_videos
2374 if not new_videos or len(videos) > limit:
2375 break
a22b2fd1
YCH
2376 next_link = self._html_search_regex(
2377 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2378 html_content, 'next link', default=None)
2379 if next_link is None:
2380 break
2381 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2382
b4c08069
JMF
2383 if len(videos) > n:
2384 videos = videos[:n]
b05654f0 2385 return self.playlist_result(videos, query)
75dff0ee 2386
c9ae7b95 2387
a3dd9248 2388class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2389 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2390 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2391 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2392 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2393
c9ae7b95 2394
175c2e9e 2395class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
78caa52a
PH
2396 IE_DESC = 'YouTube.com search URLs'
2397 IE_NAME = 'youtube:search_url'
d2c1f79f 2398 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
175c2e9e 2399 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
cdc628a4
PH
2400 _TESTS = [{
2401 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2402 'playlist_mincount': 5,
2403 'info_dict': {
2404 'title': 'youtube-dl test video',
2405 }
d2c1f79f
S
2406 }, {
2407 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2408 'only_matching': True,
cdc628a4 2409 }]
c9ae7b95
PH
2410
2411 def _real_extract(self, url):
2412 mobj = re.match(self._VALID_URL, url)
7fd002c0 2413 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2414 webpage = self._download_webpage(url, query)
175c2e9e 2415 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2416
2417
136dadde 2418class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2419 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2420 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2421 IE_NAME = 'youtube:show'
cdc628a4 2422 _TESTS = [{
4003bd82 2423 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2424 'playlist_mincount': 5,
cdc628a4
PH
2425 'info_dict': {
2426 'id': 'airdisasters',
2427 'title': 'Air Disasters',
2428 }
2429 }]
75dff0ee
JMF
2430
2431 def _real_extract(self, url):
136dadde
S
2432 playlist_id = self._match_id(url)
2433 return super(YoutubeShowIE, self)._real_extract(
2434 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2435
2436
b2e8bc1b 2437class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2438 """
25f14e9f 2439 Base class for feed extractors
d7ae0639
JMF
2440 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2441 """
b2e8bc1b 2442 _LOGIN_REQUIRED = True
d7ae0639
JMF
2443
2444 @property
2445 def IE_NAME(self):
78caa52a 2446 return 'youtube:%s' % self._FEED_NAME
04cc9617 2447
81f0259b 2448 def _real_initialize(self):
b2e8bc1b 2449 self._login()
81f0259b 2450
04cc9617 2451 def _real_extract(self, url):
25f14e9f
S
2452 page = self._download_webpage(
2453 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
2454
2455 # The extraction process is the same as for playlists, but the regex
2456 # for the video ids doesn't contain an index
2457 ids = []
2458 more_widget_html = content_html = page
2bc43303
JMF
2459 for page_num in itertools.count(1):
2460 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2461
2462 # 'recommended' feed has infinite 'load more' and each new portion spins
2463 # the same videos in (sometimes) slightly different order, so we'll check
2464 # for unicity and break when portion has no new videos
2465 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2466 if not new_ids:
2467 break
2468
2bc43303
JMF
2469 ids.extend(new_ids)
2470
2471 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2472 if not mobj:
2473 break
2474
2475 more = self._download_json(
25f14e9f 2476 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2477 'Downloading page #%s' % page_num,
2478 transform_source=uppercase_escape)
2479 content_html = more['content_html']
2480 more_widget_html = more['load_more_widget_html']
2481
25f14e9f
S
2482 return self.playlist_result(
2483 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2484
2485
2486class YoutubeWatchLaterIE(YoutubePlaylistIE):
2487 IE_NAME = 'youtube:watchlater'
2488 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2489 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2490
bc7a9cd8
S
2491 _TESTS = [{
2492 'url': 'https://www.youtube.com/playlist?list=WL',
2493 'only_matching': True,
2494 }, {
2495 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2496 'only_matching': True,
2497 }]
25f14e9f
S
2498
2499 def _real_extract(self, url):
7e5dc339 2500 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2501 if video:
2502 return video
dacb3a86
S
2503 _, playlist = self._extract_playlist('WL')
2504 return playlist
f459d170 2505
5f6a1245 2506
c626a3d9 2507class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2508 IE_NAME = 'youtube:favorites'
f3a34072 2509 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2510 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2511 _LOGIN_REQUIRED = True
2512
2513 def _real_extract(self, url):
2514 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2515 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2516 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2517
2518
25f14e9f
S
2519class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2520 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2521 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2522 _FEED_NAME = 'recommended'
2523 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2524
1ed5b5c9 2525
25f14e9f
S
2526class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2527 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2528 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2529 _FEED_NAME = 'subscriptions'
2530 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2531
1ed5b5c9 2532
25f14e9f
S
2533class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2534 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2535 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2536 _FEED_NAME = 'history'
2537 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2538
2539
15870e90
PH
2540class YoutubeTruncatedURLIE(InfoExtractor):
2541 IE_NAME = 'youtube:truncated_url'
2542 IE_DESC = False # Do not list
975d35db 2543 _VALID_URL = r'''(?x)
b95aab84
PH
2544 (?:https?://)?
2545 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2546 (?:watch\?(?:
c4808c60 2547 feature=[a-z_]+|
b95aab84
PH
2548 annotation_id=annotation_[^&]+|
2549 x-yt-cl=[0-9]+|
c1708b89 2550 hl=[^&]*|
287be8c6 2551 t=[0-9]+
b95aab84
PH
2552 )?
2553 |
2554 attribution_link\?a=[^&]+
2555 )
2556 $
975d35db 2557 '''
15870e90 2558
c4808c60 2559 _TESTS = [{
2d3d2997 2560 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2561 'only_matching': True,
dc2fc736 2562 }, {
2d3d2997 2563 'url': 'https://www.youtube.com/watch?',
dc2fc736 2564 'only_matching': True,
b95aab84
PH
2565 }, {
2566 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2567 'only_matching': True,
2568 }, {
2569 'url': 'https://www.youtube.com/watch?feature=foo',
2570 'only_matching': True,
c1708b89
PH
2571 }, {
2572 'url': 'https://www.youtube.com/watch?hl=en-GB',
2573 'only_matching': True,
287be8c6
PH
2574 }, {
2575 'url': 'https://www.youtube.com/watch?t=2372',
2576 'only_matching': True,
c4808c60
PH
2577 }]
2578
15870e90
PH
2579 def _real_extract(self, url):
2580 raise ExtractorError(
78caa52a
PH
2581 'Did you forget to quote the URL? Remember that & is a meta '
2582 'character in most shells, so you want to put the URL in quotes, '
2583 'like youtube-dl '
2d3d2997 2584 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2585 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2586 expected=True)
772fd5cc
PH
2587
2588
2589class YoutubeTruncatedIDIE(InfoExtractor):
2590 IE_NAME = 'youtube:truncated_id'
2591 IE_DESC = False # Do not list
b95aab84 2592 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2593
2594 _TESTS = [{
2595 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2596 'only_matching': True,
2597 }]
2598
2599 def _real_extract(self, url):
2600 video_id = self._match_id(url)
2601 raise ExtractorError(
2602 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2603 expected=True)