]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[facebook] fix tahoe request(closes #17171)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
8d81f3e3 19 compat_kwargs,
c5e8d7af 20 compat_parse_qs,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
c5e8d7af 26 compat_str,
4bb4a188
PH
27)
28from ..utils import (
c5e8d7af 29 clean_html,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
54fc90aa 40 qualities,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
81c2f20b 50 uppercase_escape,
6e6bc8da 51 urlencode_postdata,
c5e8d7af
PH
52)
53
5f6a1245 54
de7f3446 55class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
56 """Provide base functions for Youtube extractors"""
57 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 58 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
59
60 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
61 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
62 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 63
b2e8bc1b
JMF
64 _NETRC_MACHINE = 'youtube'
65 # If True it will raise an error if no login info is provided
66 _LOGIN_REQUIRED = False
67
409b9324 68 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 69
b2e8bc1b 70 def _set_language(self):
810fb84d
PH
71 self._set_cookie(
72 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 73 # YouTube sets the expire time to about two months
810fb84d 74 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 75
25f14e9f
S
76 def _ids_to_results(self, ids):
77 return [
78 self.url_result(vid_id, 'Youtube', video_id=vid_id)
79 for vid_id in ids]
80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
68217024 89 username, password = self._get_login_info()
b2e8bc1b
JMF
90 # No authentication to be performed
91 if username is None:
70d35d16 92 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 93 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 94 return True
b2e8bc1b 95
7cc3570e
PH
96 login_page = self._download_webpage(
97 self._LOGIN_URL, None,
69ea8ca4
PH
98 note='Downloading login page',
99 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
100 if login_page is False:
101 return
b2e8bc1b 102
1212e997 103 login_form = self._hidden_inputs(login_page)
c5e8d7af 104
e00eb564
S
105 def req(url, f_req, note, errnote):
106 data = login_form.copy()
107 data.update({
108 'pstMsg': 1,
109 'checkConnection': 'youtube',
110 'checkedDomains': 'youtube',
111 'hl': 'en',
112 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 113 'f.req': json.dumps(f_req),
e00eb564
S
114 'flowName': 'GlifWebSignIn',
115 'flowEntry': 'ServiceLogin',
041bc3ad 116 })
e00eb564
S
117 return self._download_json(
118 url, None, note=note, errnote=errnote,
119 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 fatal=False,
121 data=urlencode_postdata(data), headers={
122 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
123 'Google-Accounts-XSRF': 1,
124 })
125
3995d37d
S
126 def warn(message):
127 self._downloader.report_warning(message)
128
129 lookup_req = [
130 username,
131 None, [], None, 'US', None, None, 2, False, True,
132 [
133 None, None,
134 [2, 1, None, 1,
135 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 None, [], 4],
137 1, [None, None, []], None, None, None, True
138 ],
139 username,
140 ]
141
e00eb564 142 lookup_results = req(
3995d37d 143 self._LOOKUP_URL, lookup_req,
e00eb564
S
144 'Looking up account info', 'Unable to look up account info')
145
146 if lookup_results is False:
147 return False
041bc3ad 148
3995d37d
S
149 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 if not user_hash:
151 warn('Unable to extract user hash')
152 return False
153
154 challenge_req = [
155 user_hash,
156 None, 1, None, [1, None, None, None, [password, None, True]],
157 [
158 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ]]
83317f69 161
3995d37d
S
162 challenge_results = req(
163 self._CHALLENGE_URL, challenge_req,
164 'Logging in', 'Unable to log in')
83317f69 165
3995d37d 166 if challenge_results is False:
e00eb564 167 return
83317f69 168
3995d37d
S
169 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 if login_res:
171 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 warn(
173 'Unable to login: %s' % 'Invalid password'
174 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
175 return False
176
177 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 if not res:
179 warn('Unable to extract result entry')
180 return False
181
9a6628aa
S
182 login_challenge = try_get(res, lambda x: x[0][0], list)
183 if login_challenge:
184 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
185 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
186 # SEND_SUCCESS - TFA code has been successfully sent to phone
187 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 188 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
189 if status == 'QUOTA_EXCEEDED':
190 warn('Exceeded the limit of TFA codes, try later')
191 return False
192
193 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 if not tl:
195 warn('Unable to extract TL')
196 return False
197
198 tfa_code = self._get_tfa_info('2-step verification code')
199
200 if not tfa_code:
201 warn(
202 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
203 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
204 return False
205
206 tfa_code = remove_start(tfa_code, 'G-')
207
208 tfa_req = [
209 user_hash, None, 2, None,
210 [
211 9, None, None, None, None, None, None, None,
212 [None, tfa_code, True, 2]
213 ]]
214
215 tfa_results = req(
216 self._TFA_URL.format(tl), tfa_req,
217 'Submitting TFA code', 'Unable to submit TFA code')
218
219 if tfa_results is False:
220 return False
221
222 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 if tfa_res:
224 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 warn(
226 'Unable to finish TFA: %s' % 'Invalid TFA code'
227 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
228 return False
229
230 check_cookie_url = try_get(
231 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
232 else:
233 CHALLENGES = {
234 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
235 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
236 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 }
238 challenge = CHALLENGES.get(
239 challenge_str,
240 '%s returned error %s.' % (self.IE_NAME, challenge_str))
241 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
242 return False
3995d37d
S
243 else:
244 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245
246 if not check_cookie_url:
247 warn('Unable to extract CheckCookie URL')
248 return False
e00eb564
S
249
250 check_cookie_results = self._download_webpage(
3995d37d
S
251 check_cookie_url, None, 'Checking cookie', fatal=False)
252
253 if check_cookie_results is False:
254 return False
e00eb564 255
3995d37d
S
256 if 'https://myaccount.google.com/' not in check_cookie_results:
257 warn('Unable to log in')
b2e8bc1b 258 return False
e00eb564 259
b2e8bc1b
JMF
260 return True
261
30226342 262 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
263 query = kwargs.get('query', {}).copy()
264 query['disable_polymer'] = 'true'
265 kwargs['query'] = query
30226342 266 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
267 *args, **compat_kwargs(kwargs))
268
b2e8bc1b
JMF
269 def _real_initialize(self):
270 if self._downloader is None:
271 return
42939b61 272 self._set_language()
b2e8bc1b
JMF
273 if not self._login():
274 return
c5e8d7af 275
8377574c 276
8e7aad20 277class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 278 # Extract entries from page with "Load more" button
648e6a1f
S
279 def _entries(self, page, playlist_id):
280 more_widget_html = content_html = page
281 for page_num in itertools.count(1):
061a75ed
S
282 for entry in self._process_page(content_html):
283 yield entry
648e6a1f
S
284
285 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
286 if not mobj:
287 break
288
289 more = self._download_json(
290 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
291 'Downloading page #%s' % page_num,
292 transform_source=uppercase_escape)
293 content_html = more['content_html']
294 if not content_html.strip():
295 # Some webpages show a "Load more" button but they don't
296 # have more videos
297 break
298 more_widget_html = more['load_more_widget_html']
299
061a75ed
S
300
301class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
302 def _process_page(self, content):
303 for video_id, video_title in self.extract_videos_from_page(content):
304 yield self.url_result(video_id, 'Youtube', video_id, video_title)
305
648e6a1f
S
306 def extract_videos_from_page(self, page):
307 ids_in_page = []
308 titles_in_page = []
309 for mobj in re.finditer(self._VIDEO_RE, page):
310 # The link with index 0 is not the first video of the playlist (not sure if still actual)
311 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
312 continue
313 video_id = mobj.group('id')
314 video_title = unescapeHTML(mobj.group('title'))
315 if video_title:
316 video_title = video_title.strip()
317 try:
318 idx = ids_in_page.index(video_id)
319 if video_title and not titles_in_page[idx]:
320 titles_in_page[idx] = video_title
321 except ValueError:
322 ids_in_page.append(video_id)
323 titles_in_page.append(video_title)
324 return zip(ids_in_page, titles_in_page)
325
326
061a75ed
S
327class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
328 def _process_page(self, content):
6dee688e
S
329 for playlist_id in orderedSet(re.findall(
330 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
331 content)):
061a75ed
S
332 yield self.url_result(
333 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
334
0c148415
S
335 def _real_extract(self, url):
336 playlist_id = self._match_id(url)
337 webpage = self._download_webpage(url, playlist_id)
0c148415 338 title = self._og_search_title(webpage, fatal=False)
061a75ed 339 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
340
341
360e1ca5 342class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 343 IE_DESC = 'YouTube.com'
cb7dfeea 344 _VALID_URL = r"""(?x)^
c5e8d7af 345 (
edb53e2d 346 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 347 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 348 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 349 (?:www\.)?pwnyoutube\.com/|
8b561bfc 350 (?:www\.)?hooktube\.com/|
f7000f3a 351 (?:www\.)?yourepeat\.com/|
e69ae5b9 352 tube\.majestyc\.net/|
cd5a74a2 353 (?:www\.)?invidio\.us/|
e69ae5b9 354 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
355 (?:.*?\#/)? # handle anchor (#/) redirect urls
356 (?: # the various things that can precede the ID:
ac7553d0 357 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 358 |(?: # or the v= param in all its forms
f7000f3a 359 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 360 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 361 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
362 v=
363 )
f4b05232 364 ))
cbaed4bb
S
365 |(?:
366 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
367 vid\.plus| # or vid.plus/xxxx
368 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 369 )/
edb53e2d 370 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 371 )
c5e8d7af 372 )? # all until now is optional -> you can pass the naked ID
8963d9c2 373 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
374 (?!.*?\blist=
375 (?:
376 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
377 WL # WL are handled by the watch later IE
378 )
379 )
c5e8d7af 380 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 381 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 382 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 383 _formats = {
c2d3cb4c 384 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
385 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
386 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
387 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
388 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
389 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
390 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
391 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 392 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 393 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
394 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
395 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
396 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
397 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
398 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 399 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 400 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
401 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 402
403
404 # 3D videos
c2d3cb4c 405 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
406 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
407 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
408 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 409 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
410 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
411 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 412
96fb5605 413 # Apple HTTP Live Streaming
11f12195 414 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 415 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
416 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
417 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
418 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
419 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 420 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
421 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
422
423 # DASH mp4 video
d23028a8
S
424 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
425 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
426 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
427 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
428 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
429 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
430 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
431 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
432 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
433 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
434 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
435 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 436
f6f1fc92 437 # Dash mp4 audio
d23028a8
S
438 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
439 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
440 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
441 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
442 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
443 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
444 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
445
446 # Dash webm
d23028a8
S
447 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
448 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
449 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
450 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
451 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
452 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
453 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
454 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
456 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
458 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
459 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
461 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 462 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
463 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
465 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
466 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
467 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
468 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
469
470 # Dash webm audio
d23028a8
S
471 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
472 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 473
0857baad 474 # Dash webm audio with opus inside
d23028a8
S
475 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
476 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
477 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 478
ce6b9a2d
PH
479 # RTMP (unnamed)
480 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 481 }
23d17e4b 482 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 483
fd5c4aab
S
484 _GEO_BYPASS = False
485
78caa52a 486 IE_NAME = 'youtube'
2eb88d95
PH
487 _TESTS = [
488 {
2d3d2997 489 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
490 'info_dict': {
491 'id': 'BaW_jenozKc',
492 'ext': 'mp4',
493 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
494 'uploader': 'Philipp Hagemeister',
495 'uploader_id': 'phihag',
ec85ded8 496 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
497 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
498 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 499 'upload_date': '20121002',
7caf9830 500 'license': 'Standard YouTube License',
4bc3a23e
PH
501 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
502 'categories': ['Science & Technology'],
000b6b5a 503 'tags': ['youtube-dl'],
556dbe7f 504 'duration': 10,
dbdaaa23 505 'view_count': int,
3e7c1224
PH
506 'like_count': int,
507 'dislike_count': int,
7c80519c 508 'start_time': 1,
297a564b 509 'end_time': 9,
2eb88d95 510 }
0e853ca4 511 },
0e853ca4 512 {
2d3d2997 513 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
514 'note': 'Test generic use_cipher_signature video (#897)',
515 'info_dict': {
516 'id': 'UxxajLWwzqY',
517 'ext': 'mp4',
518 'upload_date': '20120506',
519 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 520 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 521 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
522 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
523 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
524 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 525 'duration': 180,
4bc3a23e
PH
526 'uploader': 'Icona Pop',
527 'uploader_id': 'IconaPop',
ec85ded8 528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 529 'license': 'Standard YouTube License',
0cb58b02 530 'creator': 'Icona Pop',
936784b2
S
531 'track': 'I Love It (feat. Charli XCX)',
532 'artist': 'Icona Pop',
2eb88d95 533 }
c108eb73
JMF
534 },
535 {
4bc3a23e
PH
536 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
537 'note': 'Test VEVO video with age protection (#956)',
538 'info_dict': {
539 'id': '07FYdnEawAQ',
540 'ext': 'mp4',
541 'upload_date': '20130703',
542 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 543 'alt_title': 'Tunnel Vision',
4bc3a23e 544 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 545 'duration': 419,
4bc3a23e
PH
546 'uploader': 'justintimberlakeVEVO',
547 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 549 'license': 'Standard YouTube License',
0cb58b02 550 'creator': 'Justin Timberlake',
7e72694b 551 'track': 'Tunnel Vision',
936784b2 552 'artist': 'Justin Timberlake',
34952f09 553 'age_limit': 18,
c108eb73
JMF
554 }
555 },
fccd3771 556 {
4bc3a23e
PH
557 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
558 'note': 'Embed-only video (#1746)',
559 'info_dict': {
560 'id': 'yZIXLfi8CZQ',
561 'ext': 'mp4',
562 'upload_date': '20120608',
563 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
564 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
565 'uploader': 'SET India',
94bfcd23 566 'uploader_id': 'setindia',
ec85ded8 567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 568 'license': 'Standard YouTube License',
94bfcd23 569 'age_limit': 18,
fccd3771
PH
570 }
571 },
11b56058 572 {
2d3d2997 573 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
574 'note': 'Use the first video ID in the URL',
575 'info_dict': {
576 'id': 'BaW_jenozKc',
577 'ext': 'mp4',
578 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
579 'uploader': 'Philipp Hagemeister',
580 'uploader_id': 'phihag',
ec85ded8 581 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 582 'upload_date': '20121002',
7caf9830 583 'license': 'Standard YouTube License',
11b56058
PM
584 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
585 'categories': ['Science & Technology'],
586 'tags': ['youtube-dl'],
556dbe7f 587 'duration': 10,
dbdaaa23 588 'view_count': int,
11b56058
PM
589 'like_count': int,
590 'dislike_count': int,
34a7de29
S
591 },
592 'params': {
593 'skip_download': True,
594 },
11b56058 595 },
dd27fd17 596 {
2d3d2997 597 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
598 'note': '256k DASH audio (format 141) via DASH manifest',
599 'info_dict': {
600 'id': 'a9LDPn-MO4I',
601 'ext': 'm4a',
602 'upload_date': '20121002',
603 'uploader_id': '8KVIDEO',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
605 'description': '',
606 'uploader': '8KVIDEO',
7caf9830 607 'license': 'Standard YouTube License',
4bc3a23e 608 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 609 },
4bc3a23e
PH
610 'params': {
611 'youtube_include_dash_manifest': True,
612 'format': '141',
4919603f 613 },
de3c7fe0 614 'skip': 'format 141 not served anymore',
dd27fd17 615 },
3489b7d2
JMF
616 # DASH manifest with encrypted signature
617 {
78caa52a
PH
618 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
619 'info_dict': {
620 'id': 'IB3lcPjvWLA',
621 'ext': 'm4a',
b766eb27 622 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
eb6793ba 623 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
556dbe7f 624 'duration': 244,
78caa52a
PH
625 'uploader': 'AfrojackVEVO',
626 'uploader_id': 'AfrojackVEVO',
627 'upload_date': '20131011',
7caf9830 628 'license': 'Standard YouTube License',
3489b7d2 629 },
4bc3a23e 630 'params': {
78caa52a 631 'youtube_include_dash_manifest': True,
de3c7fe0 632 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
633 },
634 },
aaeb86f6
S
635 # JS player signature function name containing $
636 {
637 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
638 'info_dict': {
639 'id': 'nfWlot6h_JM',
640 'ext': 'm4a',
641 'title': 'Taylor Swift - Shake It Off',
0cb58b02 642 'alt_title': 'Shake It Off',
f57b7835 643 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 644 'duration': 242,
aaeb86f6
S
645 'uploader': 'TaylorSwiftVEVO',
646 'uploader_id': 'TaylorSwiftVEVO',
647 'upload_date': '20140818',
7caf9830 648 'license': 'Standard YouTube License',
0cb58b02 649 'creator': 'Taylor Swift',
aaeb86f6
S
650 },
651 'params': {
652 'youtube_include_dash_manifest': True,
de3c7fe0 653 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
654 },
655 },
aa79ac0c
PH
656 # Controversy video
657 {
658 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
659 'info_dict': {
660 'id': 'T4XJQO3qol8',
661 'ext': 'mp4',
556dbe7f 662 'duration': 219,
aa79ac0c 663 'upload_date': '20100909',
eb6793ba 664 'uploader': 'TJ Kirk',
aa79ac0c 665 'uploader_id': 'TheAmazingAtheist',
ec85ded8 666 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 667 'license': 'Standard YouTube License',
aa79ac0c
PH
668 'title': 'Burning Everyone\'s Koran',
669 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
670 }
c522adb1
JMF
671 },
672 # Normal age-gate video (No vevo, embed allowed)
673 {
2d3d2997 674 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
675 'info_dict': {
676 'id': 'HtVdAasjOgU',
677 'ext': 'mp4',
678 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 679 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 680 'duration': 142,
c522adb1
JMF
681 'uploader': 'The Witcher',
682 'uploader_id': 'WitcherGame',
ec85ded8 683 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 684 'upload_date': '20140605',
7caf9830 685 'license': 'Standard YouTube License',
34952f09 686 'age_limit': 18,
c522adb1
JMF
687 },
688 },
fccae2b9
S
689 # Age-gate video with encrypted signature
690 {
2d3d2997 691 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
692 'info_dict': {
693 'id': '6kLq3WMV1nU',
eb6793ba 694 'ext': 'webm',
fccae2b9
S
695 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
696 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 697 'duration': 246,
fccae2b9
S
698 'uploader': 'LloydVEVO',
699 'uploader_id': 'LloydVEVO',
ec85ded8 700 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 701 'upload_date': '20110629',
7caf9830 702 'license': 'Standard YouTube License',
34952f09 703 'age_limit': 18,
fccae2b9
S
704 },
705 },
774e208f 706 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
7d02dcfa 707 # YouTube Red ad is not captured for creator
774e208f
PH
708 {
709 'url': '__2ABJjxzNo',
710 'info_dict': {
711 'id': '__2ABJjxzNo',
712 'ext': 'mp4',
556dbe7f 713 'duration': 266,
774e208f
PH
714 'upload_date': '20100430',
715 'uploader_id': 'deadmau5',
ec85ded8 716 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 717 'creator': 'deadmau5',
774e208f
PH
718 'description': 'md5:12c56784b8032162bb936a5f76d55360',
719 'uploader': 'deadmau5',
7caf9830 720 'license': 'Standard YouTube License',
774e208f 721 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 722 'alt_title': 'Some Chords',
774e208f
PH
723 },
724 'expected_warnings': [
725 'DASH manifest missing',
726 ]
e52a40ab
PH
727 },
728 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
729 {
730 'url': 'lqQg6PlCWgI',
731 'info_dict': {
732 'id': 'lqQg6PlCWgI',
733 'ext': 'mp4',
556dbe7f 734 'duration': 6085,
90227264 735 'upload_date': '20150827',
cbe2bd91 736 'uploader_id': 'olympic',
ec85ded8 737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 738 'license': 'Standard YouTube License',
cbe2bd91 739 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 740 'uploader': 'Olympic',
cbe2bd91
PH
741 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
742 },
743 'params': {
744 'skip_download': 'requires avconv',
e52a40ab 745 }
cbe2bd91 746 },
6271f1ca
PH
747 # Non-square pixels
748 {
749 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
750 'info_dict': {
751 'id': '_b-2C3KPAM0',
752 'ext': 'mp4',
753 'stretched_ratio': 16 / 9.,
556dbe7f 754 'duration': 85,
6271f1ca
PH
755 'upload_date': '20110310',
756 'uploader_id': 'AllenMeow',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 758 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 759 'uploader': '孫ᄋᄅ',
7caf9830 760 'license': 'Standard YouTube License',
6271f1ca
PH
761 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
762 },
06b491eb
S
763 },
764 # url_encoded_fmt_stream_map is empty string
765 {
766 'url': 'qEJwOuvDf7I',
767 'info_dict': {
768 'id': 'qEJwOuvDf7I',
f57b7835 769 'ext': 'webm',
06b491eb
S
770 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
771 'description': '',
772 'upload_date': '20150404',
773 'uploader_id': 'spbelect',
774 'uploader': 'Наблюдатели Петербурга',
775 },
776 'params': {
777 'skip_download': 'requires avconv',
e323cf3f
S
778 },
779 'skip': 'This live event has ended.',
06b491eb 780 },
da77d856
S
781 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
782 {
783 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
784 'info_dict': {
785 'id': 'FIl7x6_3R5Y',
eb6793ba 786 'ext': 'webm',
da77d856
S
787 'title': 'md5:7b81415841e02ecd4313668cde88737a',
788 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 789 'duration': 220,
da77d856
S
790 'upload_date': '20150625',
791 'uploader_id': 'dorappi2000',
ec85ded8 792 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 793 'uploader': 'dorappi2000',
7caf9830 794 'license': 'Standard YouTube License',
eb6793ba 795 'formats': 'mincount:31',
da77d856 796 },
eb6793ba 797 'skip': 'not actual anymore',
2ee8f5d8 798 },
8a1a26ce
YCH
799 # DASH manifest with segment_list
800 {
801 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
802 'md5': '8ce563a1d667b599d21064e982ab9e31',
803 'info_dict': {
804 'id': 'CsmdDsKjzN8',
805 'ext': 'mp4',
17ee98e1 806 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
807 'uploader': 'Airtek',
808 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
809 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 810 'license': 'Standard YouTube License',
8a1a26ce
YCH
811 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
812 },
813 'params': {
814 'youtube_include_dash_manifest': True,
815 'format': '135', # bestvideo
be49068d
S
816 },
817 'skip': 'This live event has ended.',
2ee8f5d8 818 },
cf7e015f
S
819 {
820 # Multifeed videos (multiple cameras), URL is for Main Camera
821 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
822 'info_dict': {
823 'id': 'jqWvoWXjCVs',
824 'title': 'teamPGP: Rocket League Noob Stream',
825 'description': 'md5:dc7872fb300e143831327f1bae3af010',
826 },
827 'playlist': [{
828 'info_dict': {
829 'id': 'jqWvoWXjCVs',
830 'ext': 'mp4',
831 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
832 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 833 'duration': 7335,
cf7e015f
S
834 'upload_date': '20150721',
835 'uploader': 'Beer Games Beer',
836 'uploader_id': 'beergamesbeer',
ec85ded8 837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 838 'license': 'Standard YouTube License',
cf7e015f
S
839 },
840 }, {
841 'info_dict': {
842 'id': '6h8e8xoXJzg',
843 'ext': 'mp4',
844 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
845 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 846 'duration': 7337,
cf7e015f
S
847 'upload_date': '20150721',
848 'uploader': 'Beer Games Beer',
849 'uploader_id': 'beergamesbeer',
ec85ded8 850 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 851 'license': 'Standard YouTube License',
cf7e015f
S
852 },
853 }, {
854 'info_dict': {
855 'id': 'PUOgX5z9xZw',
856 'ext': 'mp4',
857 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
858 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 859 'duration': 7337,
cf7e015f
S
860 'upload_date': '20150721',
861 'uploader': 'Beer Games Beer',
862 'uploader_id': 'beergamesbeer',
ec85ded8 863 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 864 'license': 'Standard YouTube License',
cf7e015f
S
865 },
866 }, {
867 'info_dict': {
868 'id': 'teuwxikvS5k',
869 'ext': 'mp4',
870 'title': 'teamPGP: Rocket League Noob Stream (zim)',
871 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 872 'duration': 7334,
cf7e015f
S
873 'upload_date': '20150721',
874 'uploader': 'Beer Games Beer',
875 'uploader_id': 'beergamesbeer',
ec85ded8 876 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 877 'license': 'Standard YouTube License',
cf7e015f
S
878 },
879 }],
880 'params': {
881 'skip_download': True,
882 },
cbaed4bb 883 },
f9f49d87
S
884 {
885 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
886 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
887 'info_dict': {
888 'id': 'gVfLd0zydlo',
889 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
890 },
891 'playlist_count': 2,
be49068d 892 'skip': 'Not multifeed anymore',
f9f49d87 893 },
cbaed4bb 894 {
2d3d2997 895 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 896 'only_matching': True,
0e49d9a6 897 },
6d4fc66b 898 {
2d3d2997 899 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
900 'only_matching': True,
901 },
0e49d9a6 902 {
61f92af1 903 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
904 # Also tests cut-off URL expansion in video description (see
905 # https://github.com/rg3/youtube-dl/issues/1892,
906 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
907 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
908 'info_dict': {
909 'id': 'lsguqyKfVQg',
910 'ext': 'mp4',
911 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 912 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 913 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 914 'duration': 133,
0e49d9a6
LL
915 'upload_date': '20151119',
916 'uploader_id': 'IronSoulElf',
ec85ded8 917 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 918 'uploader': 'IronSoulElf',
7caf9830 919 'license': 'Standard YouTube License',
eb6793ba
S
920 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
921 'track': 'Dark Walk - Position Music',
922 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
0e49d9a6
LL
923 },
924 'params': {
925 'skip_download': True,
926 },
927 },
61f92af1
S
928 {
929 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
930 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
931 'only_matching': True,
932 },
313dfc45
LL
933 {
934 # Video with yt:stretch=17:0
935 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
936 'info_dict': {
937 'id': 'Q39EVAstoRM',
938 'ext': 'mp4',
939 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
940 'description': 'md5:ee18a25c350637c8faff806845bddee9',
941 'upload_date': '20151107',
942 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
943 'uploader': 'CH GAMER DROID',
944 },
945 'params': {
946 'skip_download': True,
947 },
be49068d 948 'skip': 'This video does not exist.',
313dfc45 949 },
7caf9830
S
950 {
951 # Video licensed under Creative Commons
952 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
953 'info_dict': {
954 'id': 'M4gD1WSo5mA',
955 'ext': 'mp4',
956 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
957 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 958 'duration': 721,
7caf9830
S
959 'upload_date': '20150127',
960 'uploader_id': 'BerkmanCenter',
ec85ded8 961 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 962 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
963 'license': 'Creative Commons Attribution license (reuse allowed)',
964 },
965 'params': {
966 'skip_download': True,
967 },
968 },
fd050249
S
969 {
970 # Channel-like uploader_url
971 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
972 'info_dict': {
973 'id': 'eQcmzGIKrzg',
974 'ext': 'mp4',
975 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
976 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 977 'duration': 4060,
fd050249 978 'upload_date': '20151119',
eb6793ba 979 'uploader': 'Bernie Sanders',
fd050249 980 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 981 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
982 'license': 'Creative Commons Attribution license (reuse allowed)',
983 },
984 'params': {
985 'skip_download': True,
986 },
987 },
040ac686
S
988 {
989 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
990 'only_matching': True,
7f29cf54
S
991 },
992 {
993 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
994 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
995 'only_matching': True,
6496ccb4
S
996 },
997 {
998 # Rental video preview
999 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1000 'info_dict': {
1001 'id': 'uGpuVWrhIzE',
1002 'ext': 'mp4',
1003 'title': 'Piku - Trailer',
1004 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1005 'upload_date': '20150811',
1006 'uploader': 'FlixMatrix',
1007 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1008 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1009 'license': 'Standard YouTube License',
1010 },
1011 'params': {
1012 'skip_download': True,
1013 },
eb6793ba 1014 'skip': 'This video is not available.',
022a5d66 1015 },
12afdc2a
S
1016 {
1017 # YouTube Red video with episode data
1018 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1019 'info_dict': {
1020 'id': 'iqKdEhx-dD4',
1021 'ext': 'mp4',
1022 'title': 'Isolation - Mind Field (Ep 1)',
eb6793ba 1023 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
556dbe7f 1024 'duration': 2085,
12afdc2a
S
1025 'upload_date': '20170118',
1026 'uploader': 'Vsauce',
1027 'uploader_id': 'Vsauce',
1028 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1029 'license': 'Standard YouTube License',
1030 'series': 'Mind Field',
1031 'season_number': 1,
1032 'episode_number': 1,
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
1037 'expected_warnings': [
1038 'Skipping DASH manifest',
1039 ],
1040 },
c7121fa7
S
1041 {
1042 # The following content has been identified by the YouTube community
1043 # as inappropriate or offensive to some audiences.
1044 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1045 'info_dict': {
1046 'id': '6SJNVb0GnPI',
1047 'ext': 'mp4',
1048 'title': 'Race Differences in Intelligence',
1049 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1050 'duration': 965,
1051 'upload_date': '20140124',
1052 'uploader': 'New Century Foundation',
1053 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1054 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1055 'license': 'Standard YouTube License',
c7121fa7
S
1056 },
1057 'params': {
1058 'skip_download': True,
1059 },
1060 },
022a5d66
S
1061 {
1062 # itag 212
1063 'url': '1t24XAntNCY',
1064 'only_matching': True,
fd5c4aab
S
1065 },
1066 {
1067 # geo restricted to JP
1068 'url': 'sJL6WA-aGkQ',
1069 'only_matching': True,
1070 },
d0ba5587
S
1071 {
1072 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1073 'only_matching': True,
1074 },
cd5a74a2
S
1075 {
1076 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1077 'only_matching': True,
1078 },
2eb88d95
PH
1079 ]
1080
e0df6211
PH
1081 def __init__(self, *args, **kwargs):
1082 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1083 self._player_cache = {}
e0df6211 1084
c5e8d7af
PH
1085 def report_video_info_webpage_download(self, video_id):
1086 """Report attempt to download video info webpage."""
69ea8ca4 1087 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1088
c5e8d7af
PH
1089 def report_information_extraction(self, video_id):
1090 """Report attempt to extract video information."""
69ea8ca4 1091 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1092
1093 def report_unavailable_format(self, video_id, format):
1094 """Report extracted video URL."""
69ea8ca4 1095 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1096
1097 def report_rtmp_download(self):
1098 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1099 self.to_screen('RTMP download detected')
c5e8d7af 1100
60064c53
PH
1101 def _signature_cache_id(self, example_sig):
1102 """ Return a string representation of a signature """
78caa52a 1103 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1104
1105 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1106 id_m = re.match(
e31fed95 1107 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1108 player_url)
c081b35c
PH
1109 if not id_m:
1110 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1111 player_type = id_m.group('ext')
1112 player_id = id_m.group('id')
1113
c4417ddb 1114 # Read from filesystem cache
60064c53
PH
1115 func_id = '%s_%s_%s' % (
1116 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1117 assert os.path.basename(func_id) == func_id
a0e07d31 1118
69ea8ca4 1119 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1120 if cache_spec is not None:
78caa52a 1121 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1122
6d1a55a5
PH
1123 download_note = (
1124 'Downloading player %s' % player_url
1125 if self._downloader.params.get('verbose') else
1126 'Downloading %s player %s' % (player_type, player_id)
1127 )
e0df6211
PH
1128 if player_type == 'js':
1129 code = self._download_webpage(
1130 player_url, video_id,
6d1a55a5 1131 note=download_note,
69ea8ca4 1132 errnote='Download of %s failed' % player_url)
83799698 1133 res = self._parse_sig_js(code)
c4417ddb 1134 elif player_type == 'swf':
e0df6211
PH
1135 urlh = self._request_webpage(
1136 player_url, video_id,
6d1a55a5 1137 note=download_note,
69ea8ca4 1138 errnote='Download of %s failed' % player_url)
e0df6211 1139 code = urlh.read()
83799698 1140 res = self._parse_sig_swf(code)
e0df6211
PH
1141 else:
1142 assert False, 'Invalid player type %r' % player_type
1143
785521bf
PH
1144 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1145 cache_res = res(test_string)
1146 cache_spec = [ord(c) for c in cache_res]
83799698 1147
69ea8ca4 1148 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1149 return res
1150
60064c53 1151 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1152 def gen_sig_code(idxs):
1153 def _genslice(start, end, step):
78caa52a 1154 starts = '' if start == 0 else str(start)
8bcc8756 1155 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1156 steps = '' if step == 1 else (':%d' % step)
78caa52a 1157 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1158
1159 step = None
7af808a5
PH
1160 # Quelch pyflakes warnings - start will be set when step is set
1161 start = '(Never used)'
edf3e38e
PH
1162 for i, prev in zip(idxs[1:], idxs[:-1]):
1163 if step is not None:
1164 if i - prev == step:
1165 continue
1166 yield _genslice(start, prev, step)
1167 step = None
1168 continue
1169 if i - prev in [-1, 1]:
1170 step = i - prev
1171 start = prev
1172 continue
1173 else:
78caa52a 1174 yield 's[%d]' % prev
edf3e38e 1175 if step is None:
78caa52a 1176 yield 's[%d]' % i
edf3e38e
PH
1177 else:
1178 yield _genslice(start, i, step)
1179
78caa52a 1180 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1181 cache_res = func(test_string)
edf3e38e 1182 cache_spec = [ord(c) for c in cache_res]
78caa52a 1183 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1184 signature_id_tuple = '(%s)' % (
1185 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1186 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1187 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1188 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1189
e0df6211
PH
1190 def _parse_sig_js(self, jscode):
1191 funcname = self._search_regex(
3c90cc8b 1192 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35
S
1193 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1194 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1195 r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1196 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1197
1198 jsi = JSInterpreter(jscode)
1199 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1200 return lambda s: initial_function([s])
1201
1202 def _parse_sig_swf(self, file_contents):
54256267 1203 swfi = SWFInterpreter(file_contents)
78caa52a 1204 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1205 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1206 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1207 return lambda s: initial_function([s])
1208
83799698 1209 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1210 """Turn the encrypted s field into a working signature"""
6b37f0be 1211
c8bf86d5 1212 if player_url is None:
69ea8ca4 1213 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1214
69ea8ca4 1215 if player_url.startswith('//'):
78caa52a 1216 player_url = 'https:' + player_url
3c90cc8b
S
1217 elif not re.match(r'https?://', player_url):
1218 player_url = compat_urlparse.urljoin(
1219 'https://www.youtube.com', player_url)
c8bf86d5 1220 try:
62af3a0e 1221 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1222 if player_id not in self._player_cache:
1223 func = self._extract_signature_function(
60064c53 1224 video_id, player_url, s
c8bf86d5
PH
1225 )
1226 self._player_cache[player_id] = func
1227 func = self._player_cache[player_id]
1228 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1229 self._print_sig_code(func, s)
c8bf86d5
PH
1230 return func(s)
1231 except Exception as e:
1232 tb = traceback.format_exc()
1233 raise ExtractorError(
78caa52a 1234 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1235
360e1ca5 1236 def _get_subtitles(self, video_id, webpage):
de7f3446 1237 try:
60e47a26 1238 subs_doc = self._download_xml(
38c2e5b8 1239 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1240 video_id, note=False)
1241 except ExtractorError as err:
9b9c5355 1242 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1243 return {}
de7f3446
JMF
1244
1245 sub_lang_list = {}
60e47a26
JMF
1246 for track in subs_doc.findall('track'):
1247 lang = track.attrib['lang_code']
7e660ac1
LD
1248 if lang in sub_lang_list:
1249 continue
360e1ca5 1250 sub_formats = []
23d17e4b 1251 for ext in self._SUBTITLE_FORMATS:
15707c7e 1252 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1253 'lang': lang,
1254 'v': video_id,
1255 'fmt': ext,
1256 'name': track.attrib['name'].encode('utf-8'),
1257 })
1258 sub_formats.append({
1259 'url': 'https://www.youtube.com/api/timedtext?' + params,
1260 'ext': ext,
1261 })
1262 sub_lang_list[lang] = sub_formats
de7f3446 1263 if not sub_lang_list:
69ea8ca4 1264 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1265 return {}
1266 return sub_lang_list
1267
a72778d3
S
1268 def _get_ytplayer_config(self, video_id, webpage):
1269 patterns = (
526b3b07
S
1270 # User data may contain arbitrary character sequences that may affect
1271 # JSON extraction with regex, e.g. when '};' is contained the second
1272 # regex won't capture the whole JSON. Yet working around by trying more
1273 # concrete regex first keeping in mind proper quoted string handling
1274 # to be implemented in future that will replace this workaround (see
1275 # https://github.com/rg3/youtube-dl/issues/7468,
1276 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1277 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1278 r';ytplayer\.config\s*=\s*({.+?});',
1279 )
1280 config = self._search_regex(
1281 patterns, webpage, 'ytplayer.config', default=None)
1282 if config:
1283 return self._parse_json(
1284 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1285
360e1ca5 1286 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1287 """We need the webpage for getting the captions url, pass it as an
1288 argument to speed up the process."""
69ea8ca4 1289 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1290 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1291 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1292 if not player_config:
de7f3446
JMF
1293 self._downloader.report_warning(err_msg)
1294 return {}
de7f3446 1295 try:
0792d563 1296 args = player_config['args']
b78b292f
S
1297 caption_url = args.get('ttsurl')
1298 if caption_url:
1299 timestamp = args['timestamp']
1300 # We get the available subtitles
15707c7e 1301 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1302 'type': 'list',
1303 'tlangs': 1,
1304 'asrs': 1,
1305 })
1306 list_url = caption_url + '&' + list_params
1307 caption_list = self._download_xml(list_url, video_id)
1308 original_lang_node = caption_list.find('track')
1309 if original_lang_node is None:
1310 self._downloader.report_warning('Video doesn\'t have automatic captions')
1311 return {}
1312 original_lang = original_lang_node.attrib['lang_code']
1313 caption_kind = original_lang_node.attrib.get('kind', '')
1314
1315 sub_lang_list = {}
1316 for lang_node in caption_list.findall('target'):
1317 sub_lang = lang_node.attrib['lang_code']
1318 sub_formats = []
1319 for ext in self._SUBTITLE_FORMATS:
15707c7e 1320 params = compat_urllib_parse_urlencode({
b78b292f
S
1321 'lang': original_lang,
1322 'tlang': sub_lang,
1323 'fmt': ext,
1324 'ts': timestamp,
1325 'kind': caption_kind,
1326 })
1327 sub_formats.append({
1328 'url': caption_url + '&' + params,
1329 'ext': ext,
1330 })
1331 sub_lang_list[sub_lang] = sub_formats
1332 return sub_lang_list
1333
ddbb4c5c
S
1334 def make_captions(sub_url, sub_langs):
1335 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1336 caption_qs = compat_parse_qs(parsed_sub_url.query)
1337 captions = {}
1338 for sub_lang in sub_langs:
1339 sub_formats = []
1340 for ext in self._SUBTITLE_FORMATS:
1341 caption_qs.update({
1342 'tlang': [sub_lang],
1343 'fmt': [ext],
1344 })
1345 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1346 query=compat_urllib_parse_urlencode(caption_qs, True)))
1347 sub_formats.append({
1348 'url': sub_url,
1349 'ext': ext,
1350 })
1351 captions[sub_lang] = sub_formats
1352 return captions
1353
1354 # New captions format as of 22.06.2017
1355 player_response = args.get('player_response')
1356 if player_response and isinstance(player_response, compat_str):
1357 player_response = self._parse_json(
1358 player_response, video_id, fatal=False)
1359 if player_response:
1360 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1361 base_url = renderer['captionTracks'][0]['baseUrl']
1362 sub_lang_list = []
1363 for lang in renderer['translationLanguages']:
1364 lang_code = lang.get('languageCode')
1365 if lang_code:
1366 sub_lang_list.append(lang_code)
1367 return make_captions(base_url, sub_lang_list)
1368
b78b292f
S
1369 # Some videos don't provide ttsurl but rather caption_tracks and
1370 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1371 # Does not used anymore as of 22.06.2017
b78b292f
S
1372 caption_tracks = args['caption_tracks']
1373 caption_translation_languages = args['caption_translation_languages']
1374 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1375 sub_lang_list = []
b78b292f
S
1376 for lang in caption_translation_languages.split(','):
1377 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1378 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1379 if sub_lang:
1380 sub_lang_list.append(sub_lang)
1381 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1382 # An extractor error can be raise by the download process if there are
1383 # no automatic captions but there are subtitles
ddbb4c5c 1384 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1385 self._downloader.report_warning(err_msg)
1386 return {}
1387
d77ab8e2
S
1388 def _mark_watched(self, video_id, video_info):
1389 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1390 if not playback_url:
1391 return
1392 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1393 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1394
1395 # cpn generation algorithm is reverse engineered from base.js.
1396 # In fact it works even with dummy cpn.
1397 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1398 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1399
1400 qs.update({
1401 'ver': ['2'],
1402 'cpn': [cpn],
1403 })
1404 playback_url = compat_urlparse.urlunparse(
15707c7e 1405 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1406
1407 self._download_webpage(
1408 playback_url, video_id, 'Marking watched',
1409 'Unable to mark watched', fatal=False)
1410
66c9fa36
S
1411 @staticmethod
1412 def _extract_urls(webpage):
1413 # Embedded YouTube player
1414 entries = [
1415 unescapeHTML(mobj.group('url'))
1416 for mobj in re.finditer(r'''(?x)
1417 (?:
1418 <iframe[^>]+?src=|
1419 data-video-url=|
1420 <embed[^>]+?src=|
1421 embedSWF\(?:\s*|
1422 <object[^>]+data=|
1423 new\s+SWFObject\(
1424 )
1425 (["\'])
1426 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1427 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1428 \1''', webpage)]
1429
1430 # lazyYT YouTube embed
1431 entries.extend(list(map(
1432 unescapeHTML,
1433 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1434
1435 # Wordpress "YouTube Video Importer" plugin
1436 matches = re.findall(r'''(?x)<div[^>]+
1437 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1438 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1439 entries.extend(m[-1] for m in matches)
1440
1441 return entries
1442
1443 @staticmethod
1444 def _extract_url(webpage):
1445 urls = YoutubeIE._extract_urls(webpage)
1446 return urls[0] if urls else None
1447
97665381
PH
1448 @classmethod
1449 def extract_id(cls, url):
1450 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1451 if mobj is None:
69ea8ca4 1452 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1453 video_id = mobj.group(2)
1454 return video_id
1455
1fb07d10
JG
1456 def _extract_annotations(self, video_id):
1457 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1458 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1459
9cafc3fd
S
1460 @staticmethod
1461 def _extract_chapters(description, duration):
1462 if not description:
1463 return None
1464 chapter_lines = re.findall(
1465 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1466 description)
1467 if not chapter_lines:
1468 return None
1469 chapters = []
1470 for next_num, (chapter_line, time_point) in enumerate(
1471 chapter_lines, start=1):
1472 start_time = parse_duration(time_point)
1473 if start_time is None:
1474 continue
39d4c1be
S
1475 if start_time > duration:
1476 break
9cafc3fd
S
1477 end_time = (duration if next_num == len(chapter_lines)
1478 else parse_duration(chapter_lines[next_num][1]))
1479 if end_time is None:
1480 continue
39d4c1be
S
1481 if end_time > duration:
1482 end_time = duration
1483 if start_time > end_time:
1484 break
9cafc3fd
S
1485 chapter_title = re.sub(
1486 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1487 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1488 chapters.append({
1489 'start_time': start_time,
1490 'end_time': end_time,
1491 'title': chapter_title,
1492 })
1493 return chapters
1494
c5e8d7af 1495 def _real_extract(self, url):
cf7e015f
S
1496 url, smuggled_data = unsmuggle_url(url, {})
1497
7e8c0af0 1498 proto = (
78caa52a
PH
1499 'http' if self._downloader.params.get('prefer_insecure', False)
1500 else 'https')
7e8c0af0 1501
7c80519c 1502 start_time = None
297a564b 1503 end_time = None
7c80519c
JMF
1504 parsed_url = compat_urllib_parse_urlparse(url)
1505 for component in [parsed_url.fragment, parsed_url.query]:
1506 query = compat_parse_qs(component)
297a564b 1507 if start_time is None and 't' in query:
7c80519c 1508 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1509 if start_time is None and 'start' in query:
1510 start_time = parse_duration(query['start'][0])
297a564b
JMF
1511 if end_time is None and 'end' in query:
1512 end_time = parse_duration(query['end'][0])
7c80519c 1513
c5e8d7af
PH
1514 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1515 mobj = re.search(self._NEXT_URL_RE, url)
1516 if mobj:
7fd002c0 1517 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1518 video_id = self.extract_id(url)
c5e8d7af
PH
1519
1520 # Get video webpage
aa79ac0c 1521 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1522 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1523
1524 # Attempt to extract SWF player URL
e0df6211 1525 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1526 if mobj is not None:
1527 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1528 else:
1529 player_url = None
1530
d8d24a92
S
1531 dash_mpds = []
1532
1533 def add_dash_mpd(video_info):
1534 dash_mpd = video_info.get('dashmpd')
1535 if dash_mpd and dash_mpd[0] not in dash_mpds:
1536 dash_mpds.append(dash_mpd[0])
1537
c7121fa7
S
1538 is_live = None
1539 view_count = None
1540
1541 def extract_view_count(v_info):
1542 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1543
dbdaaa23
S
1544 player_response = {}
1545
c5e8d7af 1546 # Get video info
6449cd80 1547 embed_webpage = None
c108eb73 1548 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1549 age_gate = True
1550 # We simulate the access to the video from www.youtube.com/v/{video_id}
1551 # this can be viewed without login into Youtube
beb95e77
CL
1552 url = proto + '://www.youtube.com/embed/%s' % video_id
1553 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1554 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1555 'video_id': video_id,
1556 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1557 'sts': self._search_regex(
beb95e77 1558 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1559 })
7e8c0af0 1560 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1561 video_info_webpage = self._download_webpage(
1562 video_info_url, video_id,
20436c30 1563 note='Refetching age-gated info webpage',
94bd3613 1564 errnote='unable to download video info webpage')
c5e8d7af 1565 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1566 add_dash_mpd(video_info)
c108eb73
JMF
1567 else:
1568 age_gate = False
bc93bdb5 1569 video_info = None
dc4e4f90 1570 sts = None
d8d24a92 1571 # Try looking directly into the video webpage
a72778d3
S
1572 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1573 if ytplayer_config:
4e62ebe2 1574 args = ytplayer_config['args']
4c76aa06 1575 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1576 # Convert to the same format returned by compat_parse_qs
1577 video_info = dict((k, [v]) for k, v in args.items())
1578 add_dash_mpd(video_info)
6496ccb4
S
1579 # Rental video is not rented but preview is available (e.g.
1580 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1581 # https://github.com/rg3/youtube-dl/issues/10532)
1582 if not video_info and args.get('ypc_vid'):
1583 return self.url_result(
1584 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1585 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1586 is_live = True
dc4e4f90 1587 sts = ytplayer_config.get('sts')
dbdaaa23
S
1588 if not player_response:
1589 pl_response = str_or_none(args.get('player_response'))
1590 if pl_response:
1591 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1592 if isinstance(pl_response, dict):
1593 player_response = pl_response
0a3cf9ad
S
1594 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1595 # We also try looking in get_video_info since it may contain different dashmpd
1596 # URL that points to a DASH manifest with possibly different itag set (some itags
1597 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1598 # manifest pointed by get_video_info's dashmpd).
1599 # The general idea is to take a union of itags of both DASH manifests (for example
1600 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1601 self.report_video_info_webpage_download(video_id)
dc4e4f90
S
1602 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1603 query = {
1604 'video_id': video_id,
1605 'ps': 'default',
1606 'eurl': '',
1607 'gl': 'US',
1608 'hl': 'en',
1609 }
1610 if el:
1611 query['el'] = el
1612 if sts:
1613 query['sts'] = sts
810fb84d 1614 video_info_webpage = self._download_webpage(
dc4e4f90 1615 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1616 video_id, note=False,
dc4e4f90
S
1617 errnote='unable to download video info webpage',
1618 fatal=False, query=query)
1619 if not video_info_webpage:
1620 continue
0a3cf9ad 1621 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1622 if not player_response:
1623 pl_response = get_video_info.get('player_response', [None])[0]
1624 if isinstance(pl_response, dict):
1625 player_response = pl_response
fd545fc6 1626 add_dash_mpd(get_video_info)
c7121fa7
S
1627 if view_count is None:
1628 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1629 if not video_info:
1630 video_info = get_video_info
1631 if 'token' in get_video_info:
89ea063e
S
1632 # Different get_video_info requests may report different results, e.g.
1633 # some may report video unavailability, but some may serve it without
1634 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1635 # the original webpage as well as el=info and el=embedded get_video_info
1636 # requests report video unavailability due to geo restriction while
1637 # el=detailpage succeeds and returns valid data). This is probably
1638 # due to YouTube measures against IP ranges of hosting providers.
1639 # Working around by preferring the first succeeded video_info containing
1640 # the token if no such video_info yet was found.
44b2264f
S
1641 if 'token' not in video_info:
1642 video_info = get_video_info
4e62ebe2 1643 break
bbb7c3f7
YCH
1644
1645 def extract_unavailable_message():
1646 return self._html_search_regex(
1647 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1648 video_webpage, 'unavailable message', default=None)
1649
c5e8d7af
PH
1650 if 'token' not in video_info:
1651 if 'reason' in video_info:
af214c3a 1652 if 'The uploader has not made this video available in your country.' in video_info['reason']:
fd5c4aab
S
1653 regions_allowed = self._html_search_meta(
1654 'regionsAllowed', video_webpage, default=None)
1655 countries = regions_allowed.split(',') if regions_allowed else None
1656 self.raise_geo_restricted(
1657 msg=video_info['reason'][0], countries=countries)
bbb7c3f7
YCH
1658 reason = video_info['reason'][0]
1659 if 'Invalid parameters' in reason:
1660 unavailable_message = extract_unavailable_message()
1661 if unavailable_message:
1662 reason = unavailable_message
d11271dd 1663 raise ExtractorError(
bbb7c3f7 1664 'YouTube said: %s' % reason,
d11271dd 1665 expected=True, video_id=video_id)
c5e8d7af 1666 else:
d11271dd 1667 raise ExtractorError(
78caa52a 1668 '"token" parameter not in video info for unknown reason',
d11271dd 1669 video_id=video_id)
c5e8d7af 1670
dbdaaa23
S
1671 video_details = try_get(
1672 player_response, lambda x: x['videoDetails'], dict) or {}
1673
cf7e015f
S
1674 # title
1675 if 'title' in video_info:
1676 video_title = video_info['title'][0]
dbdaaa23
S
1677 elif 'title' in player_response:
1678 video_title = video_details['title']
cf7e015f
S
1679 else:
1680 self._downloader.report_warning('Unable to extract video title')
1681 video_title = '_'
1682
1683 # description
9cafc3fd 1684 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1685 if video_description:
fa4bc6e7
RA
1686
1687 def replace_url(m):
1688 redir_url = compat_urlparse.urljoin(url, m.group(1))
1689 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1690 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1691 qs = compat_parse_qs(parsed_redir_url.query)
1692 q = qs.get('q')
1693 if q and q[0]:
1694 return q[0]
1695 return redir_url
1696
9cafc3fd 1697 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1698 <a\s+
25cb7a0e 1699 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1700 (?:title|href)="([^"]+)"\s+
25cb7a0e 1701 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1702 class="[^"]*"[^>]*>
23f13e97 1703 [^<]+\.{3}\s*
cf7e015f 1704 </a>
fa4bc6e7 1705 ''', replace_url, video_description)
cf7e015f
S
1706 video_description = clean_html(video_description)
1707 else:
1708 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1709 if fd_mobj:
1710 video_description = unescapeHTML(fd_mobj.group(1))
1711 else:
1712 video_description = ''
1713
5e1eddb9
S
1714 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1715 if not self._downloader.params.get('noplaylist'):
1716 entries = []
1717 feed_ids = []
6863631c 1718 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1719 for feed in multifeed_metadata_list.split(','):
6863631c
S
1720 # Unquote should take place before split on comma (,) since textual
1721 # fields may contain comma as well (see
1722 # https://github.com/rg3/youtube-dl/issues/8536)
1723 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1724 entries.append({
1725 '_type': 'url_transparent',
1726 'ie_key': 'Youtube',
1727 'url': smuggle_url(
1728 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1729 {'force_singlefeed': True}),
1730 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1731 })
1732 feed_ids.append(feed_data['id'][0])
1733 self.to_screen(
1734 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1735 % (', '.join(feed_ids), video_id))
1736 return self.playlist_result(entries, video_id, video_title, video_description)
1737 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1738
c7121fa7 1739 if view_count is None:
1c9c8de2 1740 view_count = extract_view_count(video_info)
dbdaaa23
S
1741 if view_count is None and video_details:
1742 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1743
c5e8d7af
PH
1744 # Check for "rental" videos
1745 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
c9612c04 1746 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1747
c63ca0ee
S
1748 def _extract_filesize(media_url):
1749 return int_or_none(self._search_regex(
1750 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1751
c5e8d7af
PH
1752 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1753 self.report_rtmp_download()
dd27fd17
PH
1754 formats = [{
1755 'format_id': '_rtmp',
1756 'protocol': 'rtmp',
1757 'url': video_info['conn'][0],
1758 'player_url': player_url,
1759 }]
391dd6f0 1760 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1761 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1762 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1763 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1764 formats_spec = {}
82156fdb 1765 fmt_list = video_info.get('fmt_list', [''])[0]
1766 if fmt_list:
1767 for fmt in fmt_list.split(','):
1768 spec = fmt.split('/')
3318832e 1769 if len(spec) > 1:
1770 width_height = spec[1].split('x')
1771 if len(width_height) == 2:
1772 formats_spec[spec[0]] = {
1773 'resolution': spec[1],
1774 'width': int_or_none(width_height[0]),
1775 'height': int_or_none(width_height[1]),
1776 }
54fc90aa 1777 q = qualities(['small', 'medium', 'hd720'])
c9afb51c 1778 formats = []
00fe14fc 1779 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1780 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1781 if 'itag' not in url_data or 'url' not in url_data:
1782 continue
1783 format_id = url_data['itag'][0]
1784 url = url_data['url'][0]
1785
a49eccdf 1786 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1787 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1788 jsplayer_url_json = self._search_regex(
6449cd80
PH
1789 ASSETS_RE,
1790 embed_webpage if age_gate else video_webpage,
1791 'JS player URL (1)', default=None)
1792 if not jsplayer_url_json and not age_gate:
1793 # We need the embed website after all
1794 if embed_webpage is None:
1795 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1796 embed_webpage = self._download_webpage(
1797 embed_url, video_id, 'Downloading embed webpage')
1798 jsplayer_url_json = self._search_regex(
1799 ASSETS_RE, embed_webpage, 'JS player URL')
1800
beb95e77 1801 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1802 if player_url is None:
1803 player_url_json = self._search_regex(
1804 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1805 video_webpage, 'age gate player URL')
201e9eaa
PH
1806 player_url = json.loads(player_url_json)
1807
a49eccdf
YCH
1808 if 'sig' in url_data:
1809 url += '&signature=' + url_data['sig'][0]
1810 elif 's' in url_data:
1811 encrypted_sig = url_data['s'][0]
1812
201e9eaa 1813 if self._downloader.params.get('verbose'):
cf010131 1814 if player_url is None:
201e9eaa
PH
1815 player_version = 'unknown'
1816 player_desc = 'unknown'
1817 else:
1818 if player_url.endswith('swf'):
1819 player_version = self._search_regex(
1820 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1821 'flash player', fatal=False)
201e9eaa 1822 player_desc = 'flash player %s' % player_version
cf010131 1823 else:
201e9eaa 1824 player_version = self._search_regex(
b62985a9
YCH
1825 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1826 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
1827 player_url,
1828 'html5 player', fatal=False)
78caa52a 1829 player_desc = 'html5 player %s' % player_version
201e9eaa 1830
60064c53 1831 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1832 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1833 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1834
1835 signature = self._decrypt_signature(
1836 encrypted_sig, video_id, player_url, age_gate)
1837 url += '&signature=' + signature
1838 if 'ratebypass' not in url:
1839 url += '&ratebypass=yes'
c9afb51c 1840
94278f72
YCH
1841 dct = {
1842 'format_id': format_id,
1843 'url': url,
1844 'player_url': player_url,
1845 }
1846 if format_id in self._formats:
1847 dct.update(self._formats[format_id])
3318832e 1848 if format_id in formats_spec:
1849 dct.update(formats_spec[format_id])
94278f72 1850
aabc2be6
S
1851 # Some itags are not included in DASH manifest thus corresponding formats will
1852 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1853 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1854 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1855 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 1856
c63ca0ee
S
1857 filesize = int_or_none(url_data.get(
1858 'clen', [None])[0]) or _extract_filesize(url)
1859
54fc90aa
RA
1860 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1861
94278f72 1862 more_fields = {
c63ca0ee 1863 'filesize': filesize,
aabc2be6 1864 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1865 'width': width,
1866 'height': height,
1867 'fps': int_or_none(url_data.get('fps', [None])[0]),
54fc90aa
RA
1868 'format_note': quality,
1869 'quality': q(quality),
c9afb51c 1870 }
94278f72
YCH
1871 for key, value in more_fields.items():
1872 if value:
1873 dct[key] = value
aabc2be6
S
1874 type_ = url_data.get('type', [None])[0]
1875 if type_:
1876 type_split = type_.split(';')
1877 kind_ext = type_split[0].split('/')
1878 if len(kind_ext) == 2:
94278f72
YCH
1879 kind, _ = kind_ext
1880 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1881 if kind in ('audio', 'video'):
1882 codecs = None
1883 for mobj in re.finditer(
1884 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1885 if mobj.group('key') == 'codecs':
1886 codecs = mobj.group('val')
1887 break
1888 if codecs:
6310acf5 1889 dct.update(parse_codecs(codecs))
e4a60912
S
1890 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1891 dct['downloader_options'] = {
1892 # Youtube throttles chunks >~10M
1893 'http_chunk_size': 10485760,
1894 }
aabc2be6 1895 formats.append(dct)
1d043b93
JMF
1896 elif video_info.get('hlsvp'):
1897 manifest_url = video_info['hlsvp'][0]
89beedd3
RA
1898 formats = []
1899 m3u8_formats = self._extract_m3u8_formats(
1900 manifest_url, video_id, 'mp4', fatal=False)
1901 for a_format in m3u8_formats:
1902 itag = self._search_regex(
1903 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1904 if itag:
1905 a_format['format_id'] = itag
1906 if itag in self._formats:
1907 dct = self._formats[itag].copy()
1908 dct.update(a_format)
1909 a_format = dct
1910 a_format['player_url'] = player_url
1911 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
049d71d8 1912 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
89beedd3 1913 formats.append(a_format)
c5e8d7af 1914 else:
4c76aa06
RA
1915 error_message = clean_html(video_info.get('reason', [None])[0])
1916 if not error_message:
1917 error_message = extract_unavailable_message()
1918 if error_message:
1919 raise ExtractorError(error_message, expected=True)
69ea8ca4 1920 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1921
7e72694b 1922 # uploader
dbdaaa23
S
1923 video_uploader = try_get(
1924 video_info, lambda x: x['author'][0],
1925 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
1926 if video_uploader:
1927 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1928 else:
1929 self._downloader.report_warning('unable to extract uploader name')
1930
1931 # uploader_id
1932 video_uploader_id = None
1933 video_uploader_url = None
1934 mobj = re.search(
1935 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1936 video_webpage)
1937 if mobj is not None:
1938 video_uploader_id = mobj.group('uploader_id')
1939 video_uploader_url = mobj.group('uploader_url')
1940 else:
1941 self._downloader.report_warning('unable to extract uploader nickname')
1942
dd4c4492
S
1943 channel_id = self._html_search_meta(
1944 'channelId', video_webpage, 'channel id')
1945 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1946
7e72694b
S
1947 # thumbnail image
1948 # We try first to get a high quality image:
1949 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1950 video_webpage, re.DOTALL)
1951 if m_thumb is not None:
1952 video_thumbnail = m_thumb.group(1)
1953 elif 'thumbnail_url' not in video_info:
1954 self._downloader.report_warning('unable to extract video thumbnail')
1955 video_thumbnail = None
1956 else: # don't panic if we can't find it
1957 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1958
1959 # upload date
1960 upload_date = self._html_search_meta(
1961 'datePublished', video_webpage, 'upload date', default=None)
1962 if not upload_date:
1963 upload_date = self._search_regex(
1964 [r'(?s)id="eow-date.*?>(.*?)</span>',
1965 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1966 video_webpage, 'upload date', default=None)
1967 upload_date = unified_strdate(upload_date)
1968
1969 video_license = self._html_search_regex(
1970 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1971 video_webpage, 'license', default=None)
1972
1973 m_music = re.search(
1974 r'''(?x)
1975 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1976 <ul[^>]*>\s*
1977 <li>(?P<title>.+?)
1978 by (?P<creator>.+?)
1979 (?:
1980 \(.+?\)|
1981 <a[^>]*
1982 (?:
1983 \bhref=["\']/red[^>]*>| # drop possible
1984 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1985 )
1986 .*?
1987 )?</li
1988 ''',
1989 video_webpage)
1990 if m_music:
1991 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1992 video_creator = clean_html(m_music.group('creator'))
1993 else:
1994 video_alt_title = video_creator = None
1995
1996 def extract_meta(field):
1997 return self._html_search_regex(
1998 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1999 video_webpage, field, default=None)
2000
2001 track = extract_meta('Song')
2002 artist = extract_meta('Artist')
2003
2004 m_episode = re.search(
2005 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2006 video_webpage)
2007 if m_episode:
2008 series = m_episode.group('series')
2009 season_number = int(m_episode.group('season'))
2010 episode_number = int(m_episode.group('episode'))
2011 else:
2012 series = season_number = episode_number = None
2013
2014 m_cat_container = self._search_regex(
2015 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2016 video_webpage, 'categories', default=None)
2017 if m_cat_container:
2018 category = self._html_search_regex(
2019 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2020 default=None)
2021 video_categories = None if category is None else [category]
2022 else:
2023 video_categories = None
2024
2025 video_tags = [
2026 unescapeHTML(m.group('content'))
2027 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2028
2029 def _extract_count(count_name):
2030 return str_to_int(self._search_regex(
2031 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2032 % re.escape(count_name),
2033 video_webpage, count_name, default=None))
2034
2035 like_count = _extract_count('like')
2036 dislike_count = _extract_count('dislike')
2037
dbdaaa23
S
2038 if view_count is None:
2039 view_count = str_to_int(self._search_regex(
2040 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2041 'view count', default=None))
2042
7e72694b
S
2043 # subtitles
2044 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2045 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2046
2047 video_duration = try_get(
2048 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2049 if not video_duration:
2050 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2051 if not video_duration:
2052 video_duration = parse_duration(self._html_search_meta(
2053 'duration', video_webpage, 'video duration'))
2054
2055 # annotations
2056 video_annotations = None
2057 if self._downloader.params.get('writeannotations', False):
2058 video_annotations = self._extract_annotations(video_id)
2059
2060 chapters = self._extract_chapters(description_original, video_duration)
2061
dd27fd17 2062 # Look for the DASH manifest
203fb43f 2063 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2064 dash_mpd_fatal = True
8ff648e4 2065 for mpd_url in dash_mpds:
d8d24a92 2066 dash_formats = {}
774e208f 2067 try:
05d0d131
YCH
2068 def decrypt_sig(mobj):
2069 s = mobj.group(1)
2070 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2071 return '/signature/%s' % dec_s
2072
8ff648e4 2073 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2074
8ff648e4 2075 for df in self._extract_mpd_formats(
2076 mpd_url, video_id, fatal=dash_mpd_fatal,
2077 formats_dict=self._formats):
c63ca0ee
S
2078 if not df.get('filesize'):
2079 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2080 # Do not overwrite DASH format found in some previous DASH manifest
2081 if df['format_id'] not in dash_formats:
2082 dash_formats[df['format_id']] = df
77c6fb5b
S
2083 # Additional DASH manifests may end up in HTTP Error 403 therefore
2084 # allow them to fail without bug report message if we already have
2085 # some DASH manifest succeeded. This is temporary workaround to reduce
2086 # burst of bug reports until we figure out the reason and whether it
2087 # can be fixed at all.
2088 dash_mpd_fatal = False
774e208f
PH
2089 except (ExtractorError, KeyError) as e:
2090 self.report_warning(
2091 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2092 if dash_formats:
04b3b3df
JMF
2093 # Remove the formats we found through non-DASH, they
2094 # contain less info and it can be wrong, because we use
2095 # fixed values (for example the resolution). See
2096 # https://github.com/rg3/youtube-dl/issues/5774 for an
2097 # example.
d80265cc 2098 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2099 formats.extend(dash_formats.values())
d80044c2 2100
6271f1ca
PH
2101 # Check for malformed aspect ratio
2102 stretched_m = re.search(
2103 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2104 video_webpage)
2105 if stretched_m:
313dfc45
LL
2106 w = float(stretched_m.group('w'))
2107 h = float(stretched_m.group('h'))
5faf9fed
S
2108 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2109 # We will only process correct ratios.
313dfc45 2110 if w > 0 and h > 0:
41f24c32 2111 ratio = w / h
313dfc45
LL
2112 for f in formats:
2113 if f.get('vcodec') != 'none':
2114 f['stretched_ratio'] = ratio
6271f1ca 2115
4bcc7bd1 2116 self._sort_formats(formats)
4ea3be0a 2117
d77ab8e2
S
2118 self.mark_watched(video_id, video_info)
2119
4ea3be0a 2120 return {
8bcc8756
JW
2121 'id': video_id,
2122 'uploader': video_uploader,
2123 'uploader_id': video_uploader_id,
fd050249 2124 'uploader_url': video_uploader_url,
dd4c4492
S
2125 'channel_id': channel_id,
2126 'channel_url': channel_url,
8bcc8756 2127 'upload_date': upload_date,
7caf9830 2128 'license': video_license,
936784b2 2129 'creator': video_creator or artist,
8bcc8756 2130 'title': video_title,
936784b2 2131 'alt_title': video_alt_title or track,
8bcc8756
JW
2132 'thumbnail': video_thumbnail,
2133 'description': video_description,
2134 'categories': video_categories,
000b6b5a 2135 'tags': video_tags,
8bcc8756 2136 'subtitles': video_subtitles,
360e1ca5 2137 'automatic_captions': automatic_captions,
8bcc8756
JW
2138 'duration': video_duration,
2139 'age_limit': 18 if age_gate else 0,
2140 'annotations': video_annotations,
9cafc3fd 2141 'chapters': chapters,
7e8c0af0 2142 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2143 'view_count': view_count,
4ea3be0a 2144 'like_count': like_count,
2145 'dislike_count': dislike_count,
2d30521a 2146 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 2147 'formats': formats,
2fe1ff85 2148 'is_live': is_live,
7c80519c 2149 'start_time': start_time,
297a564b 2150 'end_time': end_time,
12afdc2a
S
2151 'series': series,
2152 'season_number': season_number,
2153 'episode_number': episode_number,
936784b2
S
2154 'track': track,
2155 'artist': artist,
4ea3be0a 2156 }
c5e8d7af 2157
5f6a1245 2158
8e7aad20 2159class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2160 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2161 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2162 (?:https?://)?
2163 (?:\w+\.)?
c5e8d7af 2164 (?:
c0345b82
S
2165 (?:
2166 youtube\.com|
2167 invidio\.us
2168 )
2169 /
feaa5ad7 2170 (?:
87dadd45 2171 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2172 \? (?:.*?[&;])*? (?:p|a|list)=
2173 | p/
2174 )|
2175 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2176 )
d67cc9fa 2177 (
409b9324 2178 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2179 # Top tracks, they can also include dots
d67cc9fa
JMF
2180 |(?:MC)[\w\.]*
2181 )
c5e8d7af
PH
2182 .*
2183 |
d0ba5587
S
2184 (%(playlist_id)s)
2185 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2186 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 2187 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 2188 IE_NAME = 'youtube:playlist'
81127aa5
PH
2189 _TESTS = [{
2190 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2191 'info_dict': {
2192 'title': 'ytdl test PL',
a1cf99d0 2193 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2194 },
2195 'playlist_count': 3,
9291475f
PH
2196 }, {
2197 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2198 'info_dict': {
acf757f4 2199 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2200 'title': 'YDL_Empty_List',
2201 },
2202 'playlist_count': 0,
4201ba13 2203 'skip': 'This playlist is private',
9291475f
PH
2204 }, {
2205 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2206 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2207 'info_dict': {
2208 'title': '29C3: Not my department',
acf757f4 2209 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
2210 },
2211 'playlist_count': 95,
2212 }, {
2213 'note': 'issue #673',
2214 'url': 'PLBB231211A4F62143',
2215 'info_dict': {
f46a8702 2216 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2217 'id': 'PLBB231211A4F62143',
9291475f
PH
2218 },
2219 'playlist_mincount': 26,
2220 }, {
2221 'note': 'Large playlist',
2222 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2223 'info_dict': {
2224 'title': 'Uploads from Cauchemar',
acf757f4 2225 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
2226 },
2227 'playlist_mincount': 799,
2228 }, {
2229 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2230 'info_dict': {
2231 'title': 'YDL_safe_search',
acf757f4 2232 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2233 },
2234 'playlist_count': 2,
4201ba13 2235 'skip': 'This playlist is private',
ac7553d0
PH
2236 }, {
2237 'note': 'embedded',
2d3d2997 2238 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2239 'playlist_count': 4,
2240 'info_dict': {
2241 'title': 'JODA15',
acf757f4 2242 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 2243 }
87dadd45
S
2244 }, {
2245 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2246 'playlist_mincount': 485,
2247 'info_dict': {
2248 'title': '2017 華語最新單曲 (2/24更新)',
2249 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2250 }
6b08cdf6
PH
2251 }, {
2252 'note': 'Embedded SWF player',
2d3d2997 2253 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2254 'playlist_count': 4,
2255 'info_dict': {
2256 'title': 'JODA7',
acf757f4 2257 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 2258 }
4b7df0d3
JMF
2259 }, {
2260 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2261 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2262 'info_dict': {
acf757f4
PH
2263 'title': 'Uploads from Interstellar Movie',
2264 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2265 },
481cc733 2266 'playlist_mincount': 21,
dacb3a86
S
2267 }, {
2268 # Playlist URL that does not actually serve a playlist
2269 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2270 'info_dict': {
2271 'id': 'FqZTN594JQw',
2272 'ext': 'webm',
2273 'title': "Smiley's People 01 detective, Adventure Series, Action",
2274 'uploader': 'STREEM',
2275 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2276 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2277 'upload_date': '20150526',
2278 'license': 'Standard YouTube License',
2279 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2280 'categories': ['People & Blogs'],
2281 'tags': list,
dbdaaa23 2282 'view_count': int,
dacb3a86
S
2283 'like_count': int,
2284 'dislike_count': int,
2285 },
2286 'params': {
2287 'skip_download': True,
2288 },
2289 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2290 }, {
2291 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2292 'info_dict': {
2293 'id': 'yeWKywCrFtk',
2294 'ext': 'mp4',
2295 'title': 'Small Scale Baler and Braiding Rugs',
2296 'uploader': 'Backus-Page House Museum',
2297 'uploader_id': 'backuspagemuseum',
ec85ded8 2298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
2299 'upload_date': '20161008',
2300 'license': 'Standard YouTube License',
2301 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2302 'categories': ['Nonprofits & Activism'],
2303 'tags': list,
2304 'like_count': int,
2305 'dislike_count': int,
2306 },
2307 'params': {
2308 'noplaylist': True,
2309 'skip_download': True,
2310 },
feaa5ad7
S
2311 }, {
2312 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2313 'only_matching': True,
a6857510
S
2314 }, {
2315 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2316 'only_matching': True,
409b9324
S
2317 }, {
2318 # music album playlist
2319 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2320 'only_matching': True,
c0345b82
S
2321 }, {
2322 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2323 'only_matching': True,
81127aa5 2324 }]
c5e8d7af 2325
880e1c52
JMF
2326 def _real_initialize(self):
2327 self._login()
2328
652cdaa2 2329 def _extract_mix(self, playlist_id):
99209c29 2330 # The mixes are generated from a single video
652cdaa2 2331 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2332 ids = []
2333 last_id = playlist_id[-11:]
2334 for n in itertools.count(1):
2335 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2336 webpage = self._download_webpage(
2337 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2338 new_ids = orderedSet(re.findall(
2339 r'''(?xs)data-video-username=".*?".*?
2340 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2341 webpage))
2342 # Fetch new pages until all the videos are repeated, it seems that
2343 # there are always 51 unique videos.
2344 new_ids = [_id for _id in new_ids if _id not in ids]
2345 if not new_ids:
2346 break
2347 ids.extend(new_ids)
2348 last_id = ids[-1]
2349
2350 url_results = self._ids_to_results(ids)
2351
bc2f773b 2352 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2353 title_span = (
2354 search_title('playlist-title') or
2355 search_title('title long-title') or
2356 search_title('title'))
76d1700b 2357 title = clean_html(title_span)
652cdaa2
JMF
2358
2359 return self.playlist_result(url_results, playlist_id, title)
2360
448830ce 2361 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2362 url = self._TEMPLATE_URL % playlist_id
2363 page = self._download_webpage(url, playlist_id)
dbb94fb0 2364
8bc0800d
G
2365 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2366 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2367 match = match.strip()
2368 # Check if the playlist exists or is private
4201ba13
S
2369 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2370 if mobj:
2371 reason = mobj.group('reason')
2372 message = 'This playlist %s' % reason
2373 if 'private' in reason:
2374 message += ', use --username or --netrc to access it'
2375 message += '.'
2376 raise ExtractorError(message, expected=True)
39b62db1
YCH
2377 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2378 raise ExtractorError(
2379 'Invalid parameters. Maybe URL is incorrect.',
2380 expected=True)
2381 elif re.match(r'[^<]*Choose your language[^<]*', match):
2382 continue
2383 else:
2384 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2385
dbb94fb0 2386 playlist_title = self._html_search_regex(
63b4295d 2387 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2388 page, 'title', default=None)
c5e8d7af 2389
07aeced6
S
2390 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2391 uploader = self._search_regex(
2392 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2393 page, 'uploader', default=None)
2394 mobj = re.search(
2395 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2396 page)
2397 if mobj:
2398 uploader_id = mobj.group('uploader_id')
2399 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2400 else:
2401 uploader_id = uploader_url = None
2402
dacb3a86
S
2403 has_videos = True
2404
2405 if not playlist_title:
2406 try:
2407 # Some playlist URLs don't actually serve a playlist (e.g.
2408 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2409 next(self._entries(page, playlist_id))
2410 except StopIteration:
2411 has_videos = False
2412
07aeced6 2413 playlist = self.playlist_result(
dacb3a86 2414 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2415 playlist.update({
2416 'uploader': uploader,
2417 'uploader_id': uploader_id,
2418 'uploader_url': uploader_url,
2419 })
2420
2421 return has_videos, playlist
c5e8d7af 2422
ebf1b291 2423 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2424 # Check if it's a video-specific URL
2425 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2426 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2427 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2428 'video id', default=None)
2429 if video_id:
448830ce
S
2430 if self._downloader.params.get('noplaylist'):
2431 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2432 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2433 else:
2434 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2435 return video_id, None
2436 return None, None
448830ce 2437
ebf1b291
S
2438 def _real_extract(self, url):
2439 # Extract playlist id
2440 mobj = re.match(self._VALID_URL, url)
2441 if mobj is None:
2442 raise ExtractorError('Invalid URL: %s' % url)
2443 playlist_id = mobj.group(1) or mobj.group(2)
2444
dacb3a86 2445 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2446 if video:
2447 return video
2448
466a6145 2449 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2450 # Mixes require a custom extraction process
2451 return self._extract_mix(playlist_id)
2452
dacb3a86
S
2453 has_videos, playlist = self._extract_playlist(playlist_id)
2454 if has_videos or not video_id:
2455 return playlist
2456
2457 # Some playlist URLs don't actually serve a playlist (see
2458 # https://github.com/rg3/youtube-dl/issues/10537).
2459 # Fallback to plain video extraction if there is a video id
2460 # along with playlist id.
2461 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2462
c5e8d7af 2463
648e6a1f 2464class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2465 IE_DESC = 'YouTube.com channels'
cd5a74a2 2466 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2467 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2468 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2469 IE_NAME = 'youtube:channel'
cdc628a4
PH
2470 _TESTS = [{
2471 'note': 'paginated channel',
2472 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2473 'playlist_mincount': 91,
acf757f4 2474 'info_dict': {
9170ca5b
JMF
2475 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2476 'title': 'Uploads from lex will',
acf757f4 2477 }
5c43afd4
JMF
2478 }, {
2479 'note': 'Age restricted channel',
2480 # from https://www.youtube.com/user/DeusExOfficial
2481 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2482 'playlist_mincount': 64,
2483 'info_dict': {
2484 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2485 'title': 'Uploads from Deus Ex',
2486 },
cd5a74a2
S
2487 }, {
2488 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2489 'only_matching': True,
cdc628a4 2490 }]
c5e8d7af 2491
e462474e
S
2492 @classmethod
2493 def suitable(cls, url):
f07e276a
S
2494 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2495 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2496
9558dcec
S
2497 def _build_template_url(self, url, channel_id):
2498 return self._TEMPLATE_URL % channel_id
2499
c5e8d7af 2500 def _real_extract(self, url):
9ff67727 2501 channel_id = self._match_id(url)
c5e8d7af 2502
9558dcec 2503 url = self._build_template_url(url, channel_id)
386bdfa6
S
2504
2505 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2506 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2507 # otherwise fallback on channel by page extraction
2508 channel_page = self._download_webpage(
2509 url + '?view=57', channel_id,
2510 'Downloading channel page', fatal=False)
2b3c2546
PH
2511 if channel_page is False:
2512 channel_playlist_id = False
2513 else:
2514 channel_playlist_id = self._html_search_meta(
2515 'channelId', channel_page, 'channel id', default=None)
2516 if not channel_playlist_id:
73c4ac2c
S
2517 channel_url = self._html_search_meta(
2518 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2519 channel_page, 'channel url', default=None)
2520 if channel_url:
2521 channel_playlist_id = self._search_regex(
2522 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2523 channel_url, 'channel id', default=None)
386bdfa6
S
2524 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2525 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2526 return self.url_result(
2527 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2528
60bf45c8 2529 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2530 autogenerated = re.search(r'''(?x)
2531 class="[^"]*?(?:
2532 channel-header-autogenerated-label|
2533 yt-channel-title-autogenerated
2534 )[^"]*"''', channel_page) is not None
c5e8d7af 2535
b9643eed
JMF
2536 if autogenerated:
2537 # The videos are contained in a single page
2538 # the ajax pages can't be used, they are empty
b82f815f 2539 entries = [
fb69240c
S
2540 self.url_result(
2541 video_id, 'Youtube', video_id=video_id,
2542 video_title=video_title)
8f02ad4f 2543 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2544 return self.playlist_result(entries, channel_id)
2545
73c4ac2c
S
2546 try:
2547 next(self._entries(channel_page, channel_id))
2548 except StopIteration:
2549 alert_message = self._html_search_regex(
2550 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2551 channel_page, 'alert', default=None, group='alert')
2552 if alert_message:
2553 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2554
648e6a1f 2555 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2556
2557
eb0f3e7e 2558class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2559 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2560 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2561 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2562 IE_NAME = 'youtube:user'
c5e8d7af 2563
cdc628a4
PH
2564 _TESTS = [{
2565 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2566 'playlist_mincount': 320,
2567 'info_dict': {
73c4ac2c
S
2568 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2569 'title': 'Uploads from The Linux Foundation',
cdc628a4 2570 }
9558dcec
S
2571 }, {
2572 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2573 # but not https://www.youtube.com/user/12minuteathlete/videos
2574 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2575 'playlist_mincount': 249,
2576 'info_dict': {
2577 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2578 'title': 'Uploads from 12 Minute Athlete',
2579 }
cdc628a4
PH
2580 }, {
2581 'url': 'ytuser:phihag',
2582 'only_matching': True,
daa0df9e
YCH
2583 }, {
2584 'url': 'https://www.youtube.com/c/gametrailers',
2585 'only_matching': True,
9558dcec
S
2586 }, {
2587 'url': 'https://www.youtube.com/gametrailers',
2588 'only_matching': True,
73c4ac2c 2589 }, {
0e879f43 2590 # This channel is not available, geo restricted to JP
73c4ac2c
S
2591 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2592 'only_matching': True,
cdc628a4
PH
2593 }]
2594
e3ea4790 2595 @classmethod
f4b05232 2596 def suitable(cls, url):
e3ea4790
JMF
2597 # Don't return True if the url can be extracted with other youtube
2598 # extractor, the regex would is too permissive and it would match.
f3a58d46 2599 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2600 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2601 return False
2602 else:
2603 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2604
9558dcec
S
2605 def _build_template_url(self, url, channel_id):
2606 mobj = re.match(self._VALID_URL, url)
2607 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2608
b05654f0 2609
f07e276a
S
2610class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2611 IE_DESC = 'YouTube.com live streams'
073d5bf5 2612 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2613 IE_NAME = 'youtube:live'
2614
2615 _TESTS = [{
2d3d2997 2616 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2617 'info_dict': {
2618 'id': 'a48o2S1cPoo',
2619 'ext': 'mp4',
2620 'title': 'The Young Turks - Live Main Show',
2621 'uploader': 'The Young Turks',
2622 'uploader_id': 'TheYoungTurks',
ec85ded8 2623 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2624 'upload_date': '20150715',
2625 'license': 'Standard YouTube License',
2626 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2627 'categories': ['News & Politics'],
2628 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2629 'like_count': int,
2630 'dislike_count': int,
2631 },
2632 'params': {
2633 'skip_download': True,
2634 },
2635 }, {
2d3d2997 2636 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2637 'only_matching': True,
c1b2a085
S
2638 }, {
2639 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2640 'only_matching': True,
073d5bf5
S
2641 }, {
2642 'url': 'https://www.youtube.com/TheYoungTurks/live',
2643 'only_matching': True,
f07e276a
S
2644 }]
2645
2646 def _real_extract(self, url):
2647 mobj = re.match(self._VALID_URL, url)
2648 channel_id = mobj.group('id')
2649 base_url = mobj.group('base_url')
2650 webpage = self._download_webpage(url, channel_id, fatal=False)
2651 if webpage:
2652 page_type = self._og_search_property(
e7f3529f 2653 'type', webpage, 'page type', default='')
f07e276a
S
2654 video_id = self._html_search_meta(
2655 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2656 if page_type.startswith('video') and video_id and re.match(
2657 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2658 return self.url_result(video_id, YoutubeIE.ie_key())
2659 return self.url_result(base_url)
2660
2661
e462474e
S
2662class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2663 IE_DESC = 'YouTube.com user/channel playlists'
2664 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2665 IE_NAME = 'youtube:playlists'
0c148415 2666
e568c223 2667 _TESTS = [{
2d3d2997 2668 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2669 'playlist_mincount': 4,
2670 'info_dict': {
2671 'id': 'ThirstForScience',
2672 'title': 'Thirst for Science',
2673 },
e568c223
S
2674 }, {
2675 # with "Load more" button
2d3d2997 2676 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2677 'playlist_mincount': 70,
2678 'info_dict': {
2679 'id': 'igorkle1',
2680 'title': 'Игорь Клейнер',
2681 },
e462474e
S
2682 }, {
2683 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2684 'playlist_mincount': 17,
2685 'info_dict': {
2686 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2687 'title': 'Chem Player',
2688 },
e568c223 2689 }]
0c148415
S
2690
2691
870f3bfc
S
2692class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2693 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2694
2695
2696class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 2697 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2698 # there doesn't appear to be a real limit, for example if you search for
2699 # 'python' you get more than 8.000.000 results
2700 _MAX_RESULTS = float('inf')
78caa52a 2701 IE_NAME = 'youtube:search'
b05654f0 2702 _SEARCH_KEY = 'ytsearch'
b4c08069 2703 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2704 _TESTS = []
b05654f0 2705
b05654f0
PH
2706 def _get_n_results(self, query, n):
2707 """Get a specified number of results for a query"""
2708
b4c08069 2709 videos = []
b05654f0
PH
2710 limit = n
2711
a22b2fd1
YCH
2712 url_query = {
2713 'search_query': query.encode('utf-8'),
2714 }
2715 url_query.update(self._EXTRA_QUERY_ARGS)
2716 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2717
b4c08069 2718 for pagenum in itertools.count(1):
b4c08069 2719 data = self._download_json(
69ea8ca4 2720 result_url, video_id='query "%s"' % query,
b4c08069 2721 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2722 errnote='Unable to download API page',
2723 query={'spf': 'navigate'})
b4c08069 2724 html_content = data[1]['body']['content']
7cc3570e 2725
b4c08069 2726 if 'class="search-message' in html_content:
07ad22b8 2727 raise ExtractorError(
78caa52a 2728 '[youtube] No video results', expected=True)
b05654f0 2729
870f3bfc 2730 new_videos = list(self._process_page(html_content))
b4c08069
JMF
2731 videos += new_videos
2732 if not new_videos or len(videos) > limit:
2733 break
a22b2fd1
YCH
2734 next_link = self._html_search_regex(
2735 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2736 html_content, 'next link', default=None)
2737 if next_link is None:
2738 break
2739 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2740
b4c08069
JMF
2741 if len(videos) > n:
2742 videos = videos[:n]
b05654f0 2743 return self.playlist_result(videos, query)
75dff0ee 2744
c9ae7b95 2745
a3dd9248 2746class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2747 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2748 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2749 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2750 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2751
c9ae7b95 2752
870f3bfc 2753class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
2754 IE_DESC = 'YouTube.com search URLs'
2755 IE_NAME = 'youtube:search_url'
d2c1f79f 2756 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
2757 _TESTS = [{
2758 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2759 'playlist_mincount': 5,
2760 'info_dict': {
2761 'title': 'youtube-dl test video',
2762 }
d2c1f79f
S
2763 }, {
2764 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2765 'only_matching': True,
cdc628a4 2766 }]
c9ae7b95
PH
2767
2768 def _real_extract(self, url):
2769 mobj = re.match(self._VALID_URL, url)
7fd002c0 2770 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2771 webpage = self._download_webpage(url, query)
175c2e9e 2772 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2773
2774
136dadde 2775class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2776 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2777 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2778 IE_NAME = 'youtube:show'
cdc628a4 2779 _TESTS = [{
4003bd82 2780 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2781 'playlist_mincount': 5,
cdc628a4
PH
2782 'info_dict': {
2783 'id': 'airdisasters',
2784 'title': 'Air Disasters',
2785 }
2786 }]
75dff0ee
JMF
2787
2788 def _real_extract(self, url):
136dadde
S
2789 playlist_id = self._match_id(url)
2790 return super(YoutubeShowIE, self)._real_extract(
2791 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2792
2793
b2e8bc1b 2794class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2795 """
25f14e9f 2796 Base class for feed extractors
d7ae0639
JMF
2797 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2798 """
b2e8bc1b 2799 _LOGIN_REQUIRED = True
d7ae0639
JMF
2800
2801 @property
2802 def IE_NAME(self):
78caa52a 2803 return 'youtube:%s' % self._FEED_NAME
04cc9617 2804
81f0259b 2805 def _real_initialize(self):
b2e8bc1b 2806 self._login()
81f0259b 2807
3853309f 2808 def _entries(self, page):
2bc43303
JMF
2809 # The extraction process is the same as for playlists, but the regex
2810 # for the video ids doesn't contain an index
2811 ids = []
2812 more_widget_html = content_html = page
2bc43303
JMF
2813 for page_num in itertools.count(1):
2814 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2815
2816 # 'recommended' feed has infinite 'load more' and each new portion spins
2817 # the same videos in (sometimes) slightly different order, so we'll check
2818 # for unicity and break when portion has no new videos
3853309f 2819 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
2820 if not new_ids:
2821 break
2822
2bc43303
JMF
2823 ids.extend(new_ids)
2824
3853309f
S
2825 for entry in self._ids_to_results(new_ids):
2826 yield entry
2827
2bc43303
JMF
2828 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2829 if not mobj:
2830 break
2831
2832 more = self._download_json(
25f14e9f 2833 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2834 'Downloading page #%s' % page_num,
2835 transform_source=uppercase_escape)
2836 content_html = more['content_html']
2837 more_widget_html = more['load_more_widget_html']
2838
3853309f
S
2839 def _real_extract(self, url):
2840 page = self._download_webpage(
2841 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2842 self._PLAYLIST_TITLE)
25f14e9f 2843 return self.playlist_result(
3853309f 2844 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
2845
2846
2847class YoutubeWatchLaterIE(YoutubePlaylistIE):
2848 IE_NAME = 'youtube:watchlater'
2849 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2850 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2851
bc7a9cd8
S
2852 _TESTS = [{
2853 'url': 'https://www.youtube.com/playlist?list=WL',
2854 'only_matching': True,
2855 }, {
2856 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2857 'only_matching': True,
2858 }]
25f14e9f
S
2859
2860 def _real_extract(self, url):
7e5dc339 2861 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2862 if video:
2863 return video
dacb3a86
S
2864 _, playlist = self._extract_playlist('WL')
2865 return playlist
f459d170 2866
5f6a1245 2867
c626a3d9 2868class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2869 IE_NAME = 'youtube:favorites'
f3a34072 2870 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2871 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2872 _LOGIN_REQUIRED = True
2873
2874 def _real_extract(self, url):
2875 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2876 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2877 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2878
2879
25f14e9f
S
2880class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2881 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2882 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2883 _FEED_NAME = 'recommended'
2884 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2885
1ed5b5c9 2886
25f14e9f
S
2887class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2888 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2889 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2890 _FEED_NAME = 'subscriptions'
2891 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2892
1ed5b5c9 2893
25f14e9f
S
2894class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2895 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2896 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2897 _FEED_NAME = 'history'
2898 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2899
2900
15870e90
PH
2901class YoutubeTruncatedURLIE(InfoExtractor):
2902 IE_NAME = 'youtube:truncated_url'
2903 IE_DESC = False # Do not list
975d35db 2904 _VALID_URL = r'''(?x)
b95aab84
PH
2905 (?:https?://)?
2906 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2907 (?:watch\?(?:
c4808c60 2908 feature=[a-z_]+|
b95aab84
PH
2909 annotation_id=annotation_[^&]+|
2910 x-yt-cl=[0-9]+|
c1708b89 2911 hl=[^&]*|
287be8c6 2912 t=[0-9]+
b95aab84
PH
2913 )?
2914 |
2915 attribution_link\?a=[^&]+
2916 )
2917 $
975d35db 2918 '''
15870e90 2919
c4808c60 2920 _TESTS = [{
2d3d2997 2921 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2922 'only_matching': True,
dc2fc736 2923 }, {
2d3d2997 2924 'url': 'https://www.youtube.com/watch?',
dc2fc736 2925 'only_matching': True,
b95aab84
PH
2926 }, {
2927 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2928 'only_matching': True,
2929 }, {
2930 'url': 'https://www.youtube.com/watch?feature=foo',
2931 'only_matching': True,
c1708b89
PH
2932 }, {
2933 'url': 'https://www.youtube.com/watch?hl=en-GB',
2934 'only_matching': True,
287be8c6
PH
2935 }, {
2936 'url': 'https://www.youtube.com/watch?t=2372',
2937 'only_matching': True,
c4808c60
PH
2938 }]
2939
15870e90
PH
2940 def _real_extract(self, url):
2941 raise ExtractorError(
78caa52a
PH
2942 'Did you forget to quote the URL? Remember that & is a meta '
2943 'character in most shells, so you want to put the URL in quotes, '
2944 'like youtube-dl '
2d3d2997 2945 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2946 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2947 expected=True)
772fd5cc
PH
2948
2949
2950class YoutubeTruncatedIDIE(InfoExtractor):
2951 IE_NAME = 'youtube:truncated_id'
2952 IE_DESC = False # Do not list
b95aab84 2953 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2954
2955 _TESTS = [{
2956 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2957 'only_matching': True,
2958 }]
2959
2960 def _real_extract(self, url):
2961 video_id = self._match_id(url)
2962 raise ExtractorError(
2963 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2964 expected=True)