]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[kaltura] Add support for multiple embeds on a webpage (closes #25523)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
351f37c0 33 extract_attributes,
c5e8d7af 34 ExtractorError,
2d30521a 35 float_or_none,
4bb4a188
PH
36 get_element_by_attribute,
37 get_element_by_id,
dd27fd17 38 int_or_none,
94278f72 39 mimetype2ext,
4bb4a188 40 orderedSet,
6310acf5 41 parse_codecs,
7c80519c 42 parse_duration,
0cb58b02 43 remove_quotes,
3995d37d 44 remove_start,
cf7e015f 45 smuggle_url,
dbdaaa23 46 str_or_none,
c93d53f5 47 str_to_int,
556dbe7f 48 try_get,
c5e8d7af
PH
49 unescapeHTML,
50 unified_strdate,
cf7e015f 51 unsmuggle_url,
81c2f20b 52 uppercase_escape,
21c340b8 53 url_or_none,
6e6bc8da 54 urlencode_postdata,
c5e8d7af
PH
55)
56
5f6a1245 57
de7f3446 58class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
66b48727 71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 72
b2e8bc1b 73 def _set_language(self):
810fb84d
PH
74 self._set_cookie(
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 76 # YouTube sets the expire time to about two months
810fb84d 77 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 78
25f14e9f
S
79 def _ids_to_results(self, ids):
80 return [
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
82 for vid_id in ids]
83
b2e8bc1b 84 def _login(self):
83317f69 85 """
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
89
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91 """
68217024 92 username, password = self._get_login_info()
b2e8bc1b
JMF
93 # No authentication to be performed
94 if username is None:
70d35d16 95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 97 return True
b2e8bc1b 98
7cc3570e
PH
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
69ea8ca4
PH
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
103 if login_page is False:
104 return
b2e8bc1b 105
1212e997 106 login_form = self._hidden_inputs(login_page)
c5e8d7af 107
e00eb564
S
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
110 data.update({
111 'pstMsg': 1,
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
114 'hl': 'en',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 116 'f.req': json.dumps(f_req),
e00eb564
S
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
baf67a60
S
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
041bc3ad 121 })
e00eb564
S
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
125 fatal=False,
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
129 })
130
3995d37d
S
131 def warn(message):
132 self._downloader.report_warning(message)
133
134 lookup_req = [
135 username,
136 None, [], None, 'US', None, None, 2, False, True,
137 [
138 None, None,
139 [2, 1, None, 1,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
141 None, [], 4],
142 1, [None, None, []], None, None, None, True
143 ],
144 username,
145 ]
146
e00eb564 147 lookup_results = req(
3995d37d 148 self._LOOKUP_URL, lookup_req,
e00eb564
S
149 'Looking up account info', 'Unable to look up account info')
150
151 if lookup_results is False:
152 return False
041bc3ad 153
3995d37d
S
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
155 if not user_hash:
156 warn('Unable to extract user hash')
157 return False
158
159 challenge_req = [
160 user_hash,
161 None, 1, None, [1, None, None, None, [password, None, True]],
162 [
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
165 ]]
83317f69 166
3995d37d
S
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
83317f69 170
3995d37d 171 if challenge_results is False:
e00eb564 172 return
83317f69 173
3995d37d
S
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
175 if login_res:
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
177 warn(
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
180 return False
181
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
183 if not res:
184 warn('Unable to extract result entry')
185 return False
186
9a6628aa
S
187 login_challenge = try_get(res, lambda x: x[0][0], list)
188 if login_challenge:
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 193 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
196 return False
197
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
199 if not tl:
200 warn('Unable to extract TL')
201 return False
202
203 tfa_code = self._get_tfa_info('2-step verification code')
204
205 if not tfa_code:
206 warn(
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
209 return False
210
211 tfa_code = remove_start(tfa_code, 'G-')
212
213 tfa_req = [
214 user_hash, None, 2, None,
215 [
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
218 ]]
219
220 tfa_results = req(
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
223
224 if tfa_results is False:
225 return False
226
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
228 if tfa_res:
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
230 warn(
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
233 return False
234
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
237 else:
238 CHALLENGES = {
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
242 }
243 challenge = CHALLENGES.get(
244 challenge_str,
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
247 return False
3995d37d
S
248 else:
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
250
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
253 return False
e00eb564
S
254
255 check_cookie_results = self._download_webpage(
3995d37d
S
256 check_cookie_url, None, 'Checking cookie', fatal=False)
257
258 if check_cookie_results is False:
259 return False
e00eb564 260
3995d37d
S
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
b2e8bc1b 263 return False
e00eb564 264
b2e8bc1b
JMF
265 return True
266
30226342 267 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
30226342 271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
272 *args, **compat_kwargs(kwargs))
273
b2e8bc1b
JMF
274 def _real_initialize(self):
275 if self._downloader is None:
276 return
42939b61 277 self._set_language()
b2e8bc1b
JMF
278 if not self._login():
279 return
c5e8d7af 280
8377574c 281
8e7aad20 282class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 283 # Extract entries from page with "Load more" button
648e6a1f
S
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
061a75ed
S
287 for entry in self._process_page(content_html):
288 yield entry
648e6a1f
S
289
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
291 if not mobj:
292 break
293
f8c55c66
S
294 count = 0
295 retries = 3
296 while count <= retries:
297 try:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
305 break
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
308 count += 1
309 if count <= retries:
310 continue
311 raise
312
648e6a1f
S
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
316 # have more videos
317 break
318 more_widget_html = more['load_more_widget_html']
319
061a75ed
S
320
321class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
325
351f37c0
S
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
648e6a1f
S
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
330 continue
331 video_id = mobj.group('id')
351f37c0
S
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
334 if video_title:
335 video_title = video_title.strip()
351f37c0
S
336 if video_title == '► Play all':
337 video_title = None
648e6a1f
S
338 try:
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
342 except ValueError:
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
351f37c0
S
345
346 def extract_videos_from_page(self, page):
347 ids_in_page = []
348 titles_in_page = []
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
351 return zip(ids_in_page, titles_in_page)
352
353
061a75ed
S
354class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
6dee688e
S
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
358 content)):
061a75ed
S
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
361
0c148415
S
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
0c148415 365 title = self._og_search_title(webpage, fatal=False)
061a75ed 366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
367
368
360e1ca5 369class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 370 IE_DESC = 'YouTube.com'
cb7dfeea 371 _VALID_URL = r"""(?x)^
c5e8d7af 372 (
edb53e2d 373 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 375 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 376 (?:www\.)?pwnyoutube\.com/|
8b561bfc 377 (?:www\.)?hooktube\.com/|
f7000f3a 378 (?:www\.)?yourepeat\.com/|
e69ae5b9 379 tube\.majestyc\.net/|
ba036333 380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 381 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 384 (?:www\.)?invidious\.kabi\.tk/|
ba036333 385 (?:www\.)?invidious\.13ad\.de/|
791d2e81 386 (?:www\.)?invidious\.mastodon\.host/|
494d664e 387 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 388 (?:www\.)?invidious\.drycat\.fr/|
ba036333 389 (?:www\.)?tube\.poal\.co/|
8ae113ca 390 (?:www\.)?vid\.wxzm\.sx/|
384bf91f 391 (?:www\.)?yewtu\.be/|
494d664e 392 (?:www\.)?yt\.elukerio\.org/|
894b3826 393 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 394 (?:www\.)?invidious\.ggc-project\.de/|
395 (?:www\.)?yt\.maisputain\.ovh/|
396 (?:www\.)?invidious\.13ad\.de/|
397 (?:www\.)?invidious\.toot\.koeln/|
398 (?:www\.)?invidious\.fdn\.fr/|
399 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 400 (?:www\.)?kgg2m7yk5aybusll\.onion/|
401 (?:www\.)?qklhadlycap4cnod\.onion/|
402 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
403 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
404 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
405 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 406 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 407 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 408 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
409 (?:.*?\#/)? # handle anchor (#/) redirect urls
410 (?: # the various things that can precede the ID:
ac7553d0 411 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 412 |(?: # or the v= param in all its forms
f7000f3a 413 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 414 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 415 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
416 v=
417 )
f4b05232 418 ))
cbaed4bb
S
419 |(?:
420 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
421 vid\.plus| # or vid.plus/xxxx
422 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 423 )/
edb53e2d 424 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 425 )
c5e8d7af 426 )? # all until now is optional -> you can pass the naked ID
8963d9c2 427 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
428 (?!.*?\blist=
429 (?:
430 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
431 WL # WL are handled by the watch later IE
432 )
433 )
c5e8d7af 434 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 435 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 436 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
437 _PLAYER_INFO_RE = (
438 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
439 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
440 )
2c62dc26 441 _formats = {
c2d3cb4c 442 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
443 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
444 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
445 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
446 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
447 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
448 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 450 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 451 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
452 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
453 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
454 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
455 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
456 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 457 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 458 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
459 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 460
461
462 # 3D videos
c2d3cb4c 463 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
464 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
465 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
466 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 467 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
468 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
469 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 470
96fb5605 471 # Apple HTTP Live Streaming
11f12195 472 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 473 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
474 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
475 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
476 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
477 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 478 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
480
481 # DASH mp4 video
d23028a8
S
482 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
485 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 487 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
488 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
491 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
492 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
493 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 494
f6f1fc92 495 # Dash mp4 audio
d23028a8
S
496 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
497 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
498 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
499 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
500 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
501 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
502 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
503
504 # Dash webm
d23028a8
S
505 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
506 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
507 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
508 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
509 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
510 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
512 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
517 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 520 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
521 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
523 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
524 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
525 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
527
528 # Dash webm audio
d23028a8
S
529 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
530 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 531
0857baad 532 # Dash webm audio with opus inside
d23028a8
S
533 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
534 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
535 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 536
ce6b9a2d
PH
537 # RTMP (unnamed)
538 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
539
540 # av01 video only formats sometimes served with "unknown" codecs
541 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
542 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
543 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
544 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 545 }
19041a38 546 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 547
fd5c4aab
S
548 _GEO_BYPASS = False
549
78caa52a 550 IE_NAME = 'youtube'
2eb88d95
PH
551 _TESTS = [
552 {
2d3d2997 553 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
554 'info_dict': {
555 'id': 'BaW_jenozKc',
556 'ext': 'mp4',
557 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
558 'uploader': 'Philipp Hagemeister',
559 'uploader_id': 'phihag',
ec85ded8 560 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
561 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
562 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
563 'upload_date': '20121002',
564 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
565 'categories': ['Science & Technology'],
000b6b5a 566 'tags': ['youtube-dl'],
556dbe7f 567 'duration': 10,
dbdaaa23 568 'view_count': int,
3e7c1224
PH
569 'like_count': int,
570 'dislike_count': int,
7c80519c 571 'start_time': 1,
297a564b 572 'end_time': 9,
2eb88d95 573 }
0e853ca4 574 },
0e853ca4 575 {
2d3d2997 576 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
577 'note': 'Test generic use_cipher_signature video (#897)',
578 'info_dict': {
579 'id': 'UxxajLWwzqY',
580 'ext': 'mp4',
581 'upload_date': '20120506',
582 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 583 'alt_title': 'I Love It (feat. Charli XCX)',
5429d6a9 584 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
000b6b5a
S
585 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
586 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
587 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 588 'duration': 180,
4bc3a23e
PH
589 'uploader': 'Icona Pop',
590 'uploader_id': 'IconaPop',
ec85ded8 591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 592 'creator': 'Icona Pop',
936784b2
S
593 'track': 'I Love It (feat. Charli XCX)',
594 'artist': 'Icona Pop',
2eb88d95 595 }
c108eb73
JMF
596 },
597 {
4bc3a23e
PH
598 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
599 'note': 'Test VEVO video with age protection (#956)',
600 'info_dict': {
601 'id': '07FYdnEawAQ',
602 'ext': 'mp4',
603 'upload_date': '20130703',
4fe54c12 604 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 605 'alt_title': 'Tunnel Vision',
4fe54c12 606 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 607 'duration': 419,
4bc3a23e
PH
608 'uploader': 'justintimberlakeVEVO',
609 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 610 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 611 'creator': 'Justin Timberlake',
7e72694b 612 'track': 'Tunnel Vision',
936784b2 613 'artist': 'Justin Timberlake',
34952f09 614 'age_limit': 18,
c108eb73
JMF
615 }
616 },
fccd3771 617 {
4bc3a23e
PH
618 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
619 'note': 'Embed-only video (#1746)',
620 'info_dict': {
621 'id': 'yZIXLfi8CZQ',
622 'ext': 'mp4',
623 'upload_date': '20120608',
624 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
625 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
626 'uploader': 'SET India',
94bfcd23 627 'uploader_id': 'setindia',
ec85ded8 628 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 629 'age_limit': 18,
fccd3771
PH
630 }
631 },
11b56058 632 {
2d3d2997 633 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
634 'note': 'Use the first video ID in the URL',
635 'info_dict': {
636 'id': 'BaW_jenozKc',
637 'ext': 'mp4',
638 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
639 'uploader': 'Philipp Hagemeister',
640 'uploader_id': 'phihag',
ec85ded8 641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
642 'upload_date': '20121002',
643 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
644 'categories': ['Science & Technology'],
645 'tags': ['youtube-dl'],
556dbe7f 646 'duration': 10,
dbdaaa23 647 'view_count': int,
11b56058
PM
648 'like_count': int,
649 'dislike_count': int,
34a7de29
S
650 },
651 'params': {
652 'skip_download': True,
653 },
11b56058 654 },
dd27fd17 655 {
2d3d2997 656 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
657 'note': '256k DASH audio (format 141) via DASH manifest',
658 'info_dict': {
659 'id': 'a9LDPn-MO4I',
660 'ext': 'm4a',
661 'upload_date': '20121002',
662 'uploader_id': '8KVIDEO',
ec85ded8 663 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
664 'description': '',
665 'uploader': '8KVIDEO',
666 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 667 },
4bc3a23e
PH
668 'params': {
669 'youtube_include_dash_manifest': True,
670 'format': '141',
4919603f 671 },
de3c7fe0 672 'skip': 'format 141 not served anymore',
dd27fd17 673 },
3489b7d2
JMF
674 # DASH manifest with encrypted signature
675 {
78caa52a
PH
676 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
677 'info_dict': {
678 'id': 'IB3lcPjvWLA',
679 'ext': 'm4a',
4fe54c12
S
680 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
681 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 682 'duration': 244,
78caa52a
PH
683 'uploader': 'AfrojackVEVO',
684 'uploader_id': 'AfrojackVEVO',
685 'upload_date': '20131011',
3489b7d2 686 },
4bc3a23e 687 'params': {
78caa52a 688 'youtube_include_dash_manifest': True,
de3c7fe0 689 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
690 },
691 },
aaeb86f6
S
692 # JS player signature function name containing $
693 {
694 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
695 'info_dict': {
696 'id': 'nfWlot6h_JM',
697 'ext': 'm4a',
698 'title': 'Taylor Swift - Shake It Off',
5429d6a9 699 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
556dbe7f 700 'duration': 242,
aaeb86f6
S
701 'uploader': 'TaylorSwiftVEVO',
702 'uploader_id': 'TaylorSwiftVEVO',
703 'upload_date': '20140818',
704 },
705 'params': {
706 'youtube_include_dash_manifest': True,
de3c7fe0 707 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
708 },
709 },
aa79ac0c
PH
710 # Controversy video
711 {
712 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
713 'info_dict': {
714 'id': 'T4XJQO3qol8',
715 'ext': 'mp4',
556dbe7f 716 'duration': 219,
aa79ac0c 717 'upload_date': '20100909',
4fe54c12 718 'uploader': 'Amazing Atheist',
aa79ac0c 719 'uploader_id': 'TheAmazingAtheist',
ec85ded8 720 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
721 'title': 'Burning Everyone\'s Koran',
722 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
723 }
c522adb1
JMF
724 },
725 # Normal age-gate video (No vevo, embed allowed)
726 {
2d3d2997 727 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
728 'info_dict': {
729 'id': 'HtVdAasjOgU',
730 'ext': 'mp4',
731 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 732 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 733 'duration': 142,
c522adb1
JMF
734 'uploader': 'The Witcher',
735 'uploader_id': 'WitcherGame',
ec85ded8 736 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 737 'upload_date': '20140605',
34952f09 738 'age_limit': 18,
c522adb1
JMF
739 },
740 },
fccae2b9
S
741 # Age-gate video with encrypted signature
742 {
2d3d2997 743 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
744 'info_dict': {
745 'id': '6kLq3WMV1nU',
4fe54c12 746 'ext': 'mp4',
fccae2b9
S
747 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
748 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 749 'duration': 246,
fccae2b9
S
750 'uploader': 'LloydVEVO',
751 'uploader_id': 'LloydVEVO',
ec85ded8 752 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 753 'upload_date': '20110629',
34952f09 754 'age_limit': 18,
fccae2b9
S
755 },
756 },
067aa17e 757 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 758 # YouTube Red ad is not captured for creator
774e208f
PH
759 {
760 'url': '__2ABJjxzNo',
761 'info_dict': {
762 'id': '__2ABJjxzNo',
763 'ext': 'mp4',
556dbe7f 764 'duration': 266,
774e208f
PH
765 'upload_date': '20100430',
766 'uploader_id': 'deadmau5',
ec85ded8 767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
5429d6a9 768 'creator': 'Dada Life, deadmau5',
774e208f
PH
769 'description': 'md5:12c56784b8032162bb936a5f76d55360',
770 'uploader': 'deadmau5',
771 'title': 'Deadmau5 - Some Chords (HD)',
5429d6a9 772 'alt_title': 'This Machine Kills Some Chords',
774e208f
PH
773 },
774 'expected_warnings': [
775 'DASH manifest missing',
776 ]
e52a40ab 777 },
067aa17e 778 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
779 {
780 'url': 'lqQg6PlCWgI',
781 'info_dict': {
782 'id': 'lqQg6PlCWgI',
783 'ext': 'mp4',
556dbe7f 784 'duration': 6085,
90227264 785 'upload_date': '20150827',
cbe2bd91 786 'uploader_id': 'olympic',
ec85ded8 787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 788 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 789 'uploader': 'Olympic',
cbe2bd91
PH
790 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
791 },
792 'params': {
793 'skip_download': 'requires avconv',
e52a40ab 794 }
cbe2bd91 795 },
6271f1ca
PH
796 # Non-square pixels
797 {
798 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
799 'info_dict': {
800 'id': '_b-2C3KPAM0',
801 'ext': 'mp4',
802 'stretched_ratio': 16 / 9.,
556dbe7f 803 'duration': 85,
6271f1ca
PH
804 'upload_date': '20110310',
805 'uploader_id': 'AllenMeow',
ec85ded8 806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 807 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 808 'uploader': '孫ᄋᄅ',
6271f1ca
PH
809 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
810 },
06b491eb
S
811 },
812 # url_encoded_fmt_stream_map is empty string
813 {
814 'url': 'qEJwOuvDf7I',
815 'info_dict': {
816 'id': 'qEJwOuvDf7I',
f57b7835 817 'ext': 'webm',
06b491eb
S
818 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
819 'description': '',
820 'upload_date': '20150404',
821 'uploader_id': 'spbelect',
822 'uploader': 'Наблюдатели Петербурга',
823 },
824 'params': {
825 'skip_download': 'requires avconv',
e323cf3f
S
826 },
827 'skip': 'This live event has ended.',
06b491eb 828 },
067aa17e 829 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
830 {
831 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
832 'info_dict': {
833 'id': 'FIl7x6_3R5Y',
eb6793ba 834 'ext': 'webm',
da77d856
S
835 'title': 'md5:7b81415841e02ecd4313668cde88737a',
836 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 837 'duration': 220,
da77d856
S
838 'upload_date': '20150625',
839 'uploader_id': 'dorappi2000',
ec85ded8 840 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 841 'uploader': 'dorappi2000',
eb6793ba 842 'formats': 'mincount:31',
da77d856 843 },
eb6793ba 844 'skip': 'not actual anymore',
2ee8f5d8 845 },
8a1a26ce
YCH
846 # DASH manifest with segment_list
847 {
848 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
849 'md5': '8ce563a1d667b599d21064e982ab9e31',
850 'info_dict': {
851 'id': 'CsmdDsKjzN8',
852 'ext': 'mp4',
17ee98e1 853 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
854 'uploader': 'Airtek',
855 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
856 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
857 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
858 },
859 'params': {
860 'youtube_include_dash_manifest': True,
861 'format': '135', # bestvideo
be49068d
S
862 },
863 'skip': 'This live event has ended.',
2ee8f5d8 864 },
cf7e015f
S
865 {
866 # Multifeed videos (multiple cameras), URL is for Main Camera
867 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
868 'info_dict': {
869 'id': 'jqWvoWXjCVs',
870 'title': 'teamPGP: Rocket League Noob Stream',
871 'description': 'md5:dc7872fb300e143831327f1bae3af010',
872 },
873 'playlist': [{
874 'info_dict': {
875 'id': 'jqWvoWXjCVs',
876 'ext': 'mp4',
877 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
878 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 879 'duration': 7335,
cf7e015f
S
880 'upload_date': '20150721',
881 'uploader': 'Beer Games Beer',
882 'uploader_id': 'beergamesbeer',
ec85ded8 883 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 884 'license': 'Standard YouTube License',
cf7e015f
S
885 },
886 }, {
887 'info_dict': {
888 'id': '6h8e8xoXJzg',
889 'ext': 'mp4',
890 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
891 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 892 'duration': 7337,
cf7e015f
S
893 'upload_date': '20150721',
894 'uploader': 'Beer Games Beer',
895 'uploader_id': 'beergamesbeer',
ec85ded8 896 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 897 'license': 'Standard YouTube License',
cf7e015f
S
898 },
899 }, {
900 'info_dict': {
901 'id': 'PUOgX5z9xZw',
902 'ext': 'mp4',
903 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
904 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 905 'duration': 7337,
cf7e015f
S
906 'upload_date': '20150721',
907 'uploader': 'Beer Games Beer',
908 'uploader_id': 'beergamesbeer',
ec85ded8 909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 910 'license': 'Standard YouTube License',
cf7e015f
S
911 },
912 }, {
913 'info_dict': {
914 'id': 'teuwxikvS5k',
915 'ext': 'mp4',
916 'title': 'teamPGP: Rocket League Noob Stream (zim)',
917 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 918 'duration': 7334,
cf7e015f
S
919 'upload_date': '20150721',
920 'uploader': 'Beer Games Beer',
921 'uploader_id': 'beergamesbeer',
ec85ded8 922 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 923 'license': 'Standard YouTube License',
cf7e015f
S
924 },
925 }],
926 'params': {
927 'skip_download': True,
928 },
4fe54c12 929 'skip': 'This video is not available.',
cbaed4bb 930 },
f9f49d87 931 {
067aa17e 932 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
933 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
934 'info_dict': {
935 'id': 'gVfLd0zydlo',
936 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
937 },
938 'playlist_count': 2,
be49068d 939 'skip': 'Not multifeed anymore',
f9f49d87 940 },
cbaed4bb 941 {
2d3d2997 942 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 943 'only_matching': True,
0e49d9a6 944 },
6d4fc66b 945 {
2d3d2997 946 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
947 'only_matching': True,
948 },
0e49d9a6 949 {
067aa17e 950 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 951 # Also tests cut-off URL expansion in video description (see
067aa17e
S
952 # https://github.com/ytdl-org/youtube-dl/issues/1892,
953 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
954 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
955 'info_dict': {
956 'id': 'lsguqyKfVQg',
957 'ext': 'mp4',
958 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 959 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 960 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 961 'duration': 133,
0e49d9a6
LL
962 'upload_date': '20151119',
963 'uploader_id': 'IronSoulElf',
ec85ded8 964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 965 'uploader': 'IronSoulElf',
eb6793ba
S
966 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
967 'track': 'Dark Walk - Position Music',
968 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 969 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
970 },
971 'params': {
972 'skip_download': True,
973 },
974 },
61f92af1 975 {
067aa17e 976 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
977 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
978 'only_matching': True,
979 },
313dfc45
LL
980 {
981 # Video with yt:stretch=17:0
982 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
983 'info_dict': {
984 'id': 'Q39EVAstoRM',
985 'ext': 'mp4',
986 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
987 'description': 'md5:ee18a25c350637c8faff806845bddee9',
988 'upload_date': '20151107',
989 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
990 'uploader': 'CH GAMER DROID',
991 },
992 'params': {
993 'skip_download': True,
994 },
be49068d 995 'skip': 'This video does not exist.',
313dfc45 996 },
7caf9830
S
997 {
998 # Video licensed under Creative Commons
999 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1000 'info_dict': {
1001 'id': 'M4gD1WSo5mA',
1002 'ext': 'mp4',
1003 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1004 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1005 'duration': 721,
7caf9830
S
1006 'upload_date': '20150127',
1007 'uploader_id': 'BerkmanCenter',
ec85ded8 1008 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1009 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1011 },
1012 'params': {
1013 'skip_download': True,
1014 },
1015 },
fd050249
S
1016 {
1017 # Channel-like uploader_url
1018 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1019 'info_dict': {
1020 'id': 'eQcmzGIKrzg',
1021 'ext': 'mp4',
1022 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1023 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1024 'duration': 4060,
fd050249 1025 'upload_date': '20151119',
eb6793ba 1026 'uploader': 'Bernie Sanders',
fd050249 1027 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1028 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1029 'license': 'Creative Commons Attribution license (reuse allowed)',
1030 },
1031 'params': {
1032 'skip_download': True,
1033 },
1034 },
040ac686
S
1035 {
1036 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1037 'only_matching': True,
7f29cf54
S
1038 },
1039 {
067aa17e 1040 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1041 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1042 'only_matching': True,
6496ccb4
S
1043 },
1044 {
1045 # Rental video preview
1046 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1047 'info_dict': {
1048 'id': 'uGpuVWrhIzE',
1049 'ext': 'mp4',
1050 'title': 'Piku - Trailer',
1051 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1052 'upload_date': '20150811',
1053 'uploader': 'FlixMatrix',
1054 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1055 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1056 'license': 'Standard YouTube License',
1057 },
1058 'params': {
1059 'skip_download': True,
1060 },
eb6793ba 1061 'skip': 'This video is not available.',
022a5d66 1062 },
12afdc2a
S
1063 {
1064 # YouTube Red video with episode data
1065 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1066 'info_dict': {
1067 'id': 'iqKdEhx-dD4',
1068 'ext': 'mp4',
1069 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1070 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1071 'duration': 2085,
12afdc2a
S
1072 'upload_date': '20170118',
1073 'uploader': 'Vsauce',
1074 'uploader_id': 'Vsauce',
1075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1076 'series': 'Mind Field',
1077 'season_number': 1,
1078 'episode_number': 1,
1079 },
1080 'params': {
1081 'skip_download': True,
1082 },
1083 'expected_warnings': [
1084 'Skipping DASH manifest',
1085 ],
1086 },
c7121fa7
S
1087 {
1088 # The following content has been identified by the YouTube community
1089 # as inappropriate or offensive to some audiences.
1090 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1091 'info_dict': {
1092 'id': '6SJNVb0GnPI',
1093 'ext': 'mp4',
1094 'title': 'Race Differences in Intelligence',
1095 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1096 'duration': 965,
1097 'upload_date': '20140124',
1098 'uploader': 'New Century Foundation',
1099 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1101 },
1102 'params': {
1103 'skip_download': True,
1104 },
1105 },
022a5d66
S
1106 {
1107 # itag 212
1108 'url': '1t24XAntNCY',
1109 'only_matching': True,
fd5c4aab
S
1110 },
1111 {
1112 # geo restricted to JP
1113 'url': 'sJL6WA-aGkQ',
1114 'only_matching': True,
1115 },
d0ba5587
S
1116 {
1117 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1118 'only_matching': True,
1119 },
cd5a74a2
S
1120 {
1121 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1122 'only_matching': True,
1123 },
825cd268
RA
1124 {
1125 # DRM protected
1126 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1127 'only_matching': True,
4fe54c12
S
1128 },
1129 {
1130 # Video with unsupported adaptive stream type formats
1131 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1132 'info_dict': {
1133 'id': 'Z4Vy8R84T1U',
1134 'ext': 'mp4',
1135 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1136 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1137 'duration': 433,
1138 'upload_date': '20130923',
1139 'uploader': 'Amelia Putri Harwita',
1140 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1142 'formats': 'maxcount:10',
1143 },
1144 'params': {
1145 'skip_download': True,
1146 'youtube_include_dash_manifest': False,
1147 },
5429d6a9 1148 'skip': 'not actual anymore',
5caabd3c 1149 },
1150 {
822b9d9c 1151 # Youtube Music Auto-generated description
5caabd3c 1152 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1153 'info_dict': {
1154 'id': 'MgNrAu2pzNs',
1155 'ext': 'mp4',
1156 'title': 'Voyeur Girl',
1157 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1158 'upload_date': '20190312',
5429d6a9
S
1159 'uploader': 'Stephen - Topic',
1160 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1161 'artist': 'Stephen',
1162 'track': 'Voyeur Girl',
1163 'album': 'it\'s too much love to know my dear',
1164 'release_date': '20190313',
1165 'release_year': 2019,
1166 },
1167 'params': {
1168 'skip_download': True,
1169 },
1170 },
1171 {
822b9d9c 1172 # Youtube Music Auto-generated description
5caabd3c 1173 # Retrieve 'artist' field from 'Artist:' in video description
1174 # when it is present on youtube music video
5caabd3c 1175 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1176 'info_dict': {
1177 'id': 'k0jLE7tTwjY',
1178 'ext': 'mp4',
1179 'title': 'Latch Feat. Sam Smith',
1180 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1181 'upload_date': '20150110',
1182 'uploader': 'Various Artists - Topic',
1183 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1184 'artist': 'Disclosure',
1185 'track': 'Latch Feat. Sam Smith',
1186 'album': 'Latch Featuring Sam Smith',
1187 'release_date': '20121008',
1188 'release_year': 2012,
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
1194 {
822b9d9c 1195 # Youtube Music Auto-generated description
5caabd3c 1196 # handle multiple artists on youtube music video
1197 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1198 'info_dict': {
1199 'id': '74qn0eJSjpA',
1200 'ext': 'mp4',
1201 'title': 'Eastside',
1202 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1203 'upload_date': '20180710',
1204 'uploader': 'Benny Blanco - Topic',
1205 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1206 'artist': 'benny blanco, Halsey, Khalid',
1207 'track': 'Eastside',
1208 'album': 'Eastside',
1209 'release_date': '20180713',
1210 'release_year': 2018,
1211 },
1212 'params': {
1213 'skip_download': True,
1214 },
1215 },
1216 {
822b9d9c 1217 # Youtube Music Auto-generated description
5caabd3c 1218 # handle youtube music video with release_year and no release_date
1219 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1220 'info_dict': {
1221 'id': '-hcAI0g-f5M',
1222 'ext': 'mp4',
1223 'title': 'Put It On Me',
5429d6a9 1224 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
5caabd3c 1225 'upload_date': '20180426',
1226 'uploader': 'Matt Maeson - Topic',
1227 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1228 'artist': 'Matt Maeson',
1229 'track': 'Put It On Me',
1230 'album': 'The Hearse',
1231 'release_date': None,
1232 'release_year': 2018,
1233 },
1234 'params': {
1235 'skip_download': True,
1236 },
1237 },
66b48727
RA
1238 {
1239 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1240 'only_matching': True,
1241 },
011e75e6
S
1242 {
1243 # invalid -> valid video id redirection
1244 'url': 'DJztXj2GPfl',
1245 'info_dict': {
1246 'id': 'DJztXj2GPfk',
1247 'ext': 'mp4',
1248 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1249 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1250 'upload_date': '20090125',
1251 'uploader': 'Prochorowka',
1252 'uploader_id': 'Prochorowka',
1253 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1254 'artist': 'Panjabi MC',
1255 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1256 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1257 },
1258 'params': {
1259 'skip_download': True,
1260 },
1261 }
2eb88d95
PH
1262 ]
1263
e0df6211
PH
1264 def __init__(self, *args, **kwargs):
1265 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1266 self._player_cache = {}
e0df6211 1267
c5e8d7af
PH
1268 def report_video_info_webpage_download(self, video_id):
1269 """Report attempt to download video info webpage."""
69ea8ca4 1270 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1271
c5e8d7af
PH
1272 def report_information_extraction(self, video_id):
1273 """Report attempt to extract video information."""
69ea8ca4 1274 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1275
1276 def report_unavailable_format(self, video_id, format):
1277 """Report extracted video URL."""
69ea8ca4 1278 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1279
1280 def report_rtmp_download(self):
1281 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1282 self.to_screen('RTMP download detected')
c5e8d7af 1283
60064c53
PH
1284 def _signature_cache_id(self, example_sig):
1285 """ Return a string representation of a signature """
78caa52a 1286 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1287
e40c758c
S
1288 @classmethod
1289 def _extract_player_info(cls, player_url):
1290 for player_re in cls._PLAYER_INFO_RE:
1291 id_m = re.search(player_re, player_url)
1292 if id_m:
1293 break
1294 else:
c081b35c 1295 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1296 return id_m.group('ext'), id_m.group('id')
1297
1298 def _extract_signature_function(self, video_id, player_url, example_sig):
1299 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1300
c4417ddb 1301 # Read from filesystem cache
60064c53
PH
1302 func_id = '%s_%s_%s' % (
1303 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1304 assert os.path.basename(func_id) == func_id
a0e07d31 1305
69ea8ca4 1306 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1307 if cache_spec is not None:
78caa52a 1308 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1309
6d1a55a5
PH
1310 download_note = (
1311 'Downloading player %s' % player_url
1312 if self._downloader.params.get('verbose') else
1313 'Downloading %s player %s' % (player_type, player_id)
1314 )
e0df6211
PH
1315 if player_type == 'js':
1316 code = self._download_webpage(
1317 player_url, video_id,
6d1a55a5 1318 note=download_note,
69ea8ca4 1319 errnote='Download of %s failed' % player_url)
83799698 1320 res = self._parse_sig_js(code)
c4417ddb 1321 elif player_type == 'swf':
e0df6211
PH
1322 urlh = self._request_webpage(
1323 player_url, video_id,
6d1a55a5 1324 note=download_note,
69ea8ca4 1325 errnote='Download of %s failed' % player_url)
e0df6211 1326 code = urlh.read()
83799698 1327 res = self._parse_sig_swf(code)
e0df6211
PH
1328 else:
1329 assert False, 'Invalid player type %r' % player_type
1330
785521bf
PH
1331 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1332 cache_res = res(test_string)
1333 cache_spec = [ord(c) for c in cache_res]
83799698 1334
69ea8ca4 1335 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1336 return res
1337
60064c53 1338 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1339 def gen_sig_code(idxs):
1340 def _genslice(start, end, step):
78caa52a 1341 starts = '' if start == 0 else str(start)
8bcc8756 1342 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1343 steps = '' if step == 1 else (':%d' % step)
78caa52a 1344 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1345
1346 step = None
7af808a5
PH
1347 # Quelch pyflakes warnings - start will be set when step is set
1348 start = '(Never used)'
edf3e38e
PH
1349 for i, prev in zip(idxs[1:], idxs[:-1]):
1350 if step is not None:
1351 if i - prev == step:
1352 continue
1353 yield _genslice(start, prev, step)
1354 step = None
1355 continue
1356 if i - prev in [-1, 1]:
1357 step = i - prev
1358 start = prev
1359 continue
1360 else:
78caa52a 1361 yield 's[%d]' % prev
edf3e38e 1362 if step is None:
78caa52a 1363 yield 's[%d]' % i
edf3e38e
PH
1364 else:
1365 yield _genslice(start, i, step)
1366
78caa52a 1367 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1368 cache_res = func(test_string)
edf3e38e 1369 cache_spec = [ord(c) for c in cache_res]
78caa52a 1370 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1371 signature_id_tuple = '(%s)' % (
1372 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1373 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1374 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1375 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1376
e0df6211
PH
1377 def _parse_sig_js(self, jscode):
1378 funcname = self._search_regex(
abefc03f
S
1379 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
c3cfea90 1381 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1382 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1383 # Obsolete patterns
1384 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1385 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1386 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1387 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1388 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1389 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1390 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1391 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1392 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1393
1394 jsi = JSInterpreter(jscode)
1395 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1396 return lambda s: initial_function([s])
1397
1398 def _parse_sig_swf(self, file_contents):
54256267 1399 swfi = SWFInterpreter(file_contents)
78caa52a 1400 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1401 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1402 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1403 return lambda s: initial_function([s])
1404
83799698 1405 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1406 """Turn the encrypted s field into a working signature"""
6b37f0be 1407
c8bf86d5 1408 if player_url is None:
69ea8ca4 1409 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1410
69ea8ca4 1411 if player_url.startswith('//'):
78caa52a 1412 player_url = 'https:' + player_url
3c90cc8b
S
1413 elif not re.match(r'https?://', player_url):
1414 player_url = compat_urlparse.urljoin(
1415 'https://www.youtube.com', player_url)
c8bf86d5 1416 try:
62af3a0e 1417 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1418 if player_id not in self._player_cache:
1419 func = self._extract_signature_function(
60064c53 1420 video_id, player_url, s
c8bf86d5
PH
1421 )
1422 self._player_cache[player_id] = func
1423 func = self._player_cache[player_id]
1424 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1425 self._print_sig_code(func, s)
c8bf86d5
PH
1426 return func(s)
1427 except Exception as e:
1428 tb = traceback.format_exc()
1429 raise ExtractorError(
78caa52a 1430 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1431
360e1ca5 1432 def _get_subtitles(self, video_id, webpage):
de7f3446 1433 try:
60e47a26 1434 subs_doc = self._download_xml(
38c2e5b8 1435 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1436 video_id, note=False)
1437 except ExtractorError as err:
9b9c5355 1438 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1439 return {}
de7f3446
JMF
1440
1441 sub_lang_list = {}
60e47a26
JMF
1442 for track in subs_doc.findall('track'):
1443 lang = track.attrib['lang_code']
7e660ac1
LD
1444 if lang in sub_lang_list:
1445 continue
360e1ca5 1446 sub_formats = []
23d17e4b 1447 for ext in self._SUBTITLE_FORMATS:
15707c7e 1448 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1449 'lang': lang,
1450 'v': video_id,
1451 'fmt': ext,
1452 'name': track.attrib['name'].encode('utf-8'),
1453 })
1454 sub_formats.append({
1455 'url': 'https://www.youtube.com/api/timedtext?' + params,
1456 'ext': ext,
1457 })
1458 sub_lang_list[lang] = sub_formats
de7f3446 1459 if not sub_lang_list:
69ea8ca4 1460 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1461 return {}
1462 return sub_lang_list
1463
a72778d3
S
1464 def _get_ytplayer_config(self, video_id, webpage):
1465 patterns = (
526b3b07
S
1466 # User data may contain arbitrary character sequences that may affect
1467 # JSON extraction with regex, e.g. when '};' is contained the second
1468 # regex won't capture the whole JSON. Yet working around by trying more
1469 # concrete regex first keeping in mind proper quoted string handling
1470 # to be implemented in future that will replace this workaround (see
067aa17e
S
1471 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1472 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1473 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1474 r';ytplayer\.config\s*=\s*({.+?});',
1475 )
1476 config = self._search_regex(
1477 patterns, webpage, 'ytplayer.config', default=None)
1478 if config:
1479 return self._parse_json(
1480 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1481
360e1ca5 1482 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1483 """We need the webpage for getting the captions url, pass it as an
1484 argument to speed up the process."""
69ea8ca4 1485 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1486 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1487 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1488 if not player_config:
de7f3446
JMF
1489 self._downloader.report_warning(err_msg)
1490 return {}
de7f3446 1491 try:
0792d563 1492 args = player_config['args']
b78b292f
S
1493 caption_url = args.get('ttsurl')
1494 if caption_url:
1495 timestamp = args['timestamp']
1496 # We get the available subtitles
15707c7e 1497 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1498 'type': 'list',
1499 'tlangs': 1,
1500 'asrs': 1,
1501 })
1502 list_url = caption_url + '&' + list_params
1503 caption_list = self._download_xml(list_url, video_id)
1504 original_lang_node = caption_list.find('track')
1505 if original_lang_node is None:
1506 self._downloader.report_warning('Video doesn\'t have automatic captions')
1507 return {}
1508 original_lang = original_lang_node.attrib['lang_code']
1509 caption_kind = original_lang_node.attrib.get('kind', '')
1510
1511 sub_lang_list = {}
1512 for lang_node in caption_list.findall('target'):
1513 sub_lang = lang_node.attrib['lang_code']
1514 sub_formats = []
1515 for ext in self._SUBTITLE_FORMATS:
15707c7e 1516 params = compat_urllib_parse_urlencode({
b78b292f
S
1517 'lang': original_lang,
1518 'tlang': sub_lang,
1519 'fmt': ext,
1520 'ts': timestamp,
1521 'kind': caption_kind,
1522 })
1523 sub_formats.append({
1524 'url': caption_url + '&' + params,
1525 'ext': ext,
1526 })
1527 sub_lang_list[sub_lang] = sub_formats
1528 return sub_lang_list
1529
ddbb4c5c
S
1530 def make_captions(sub_url, sub_langs):
1531 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1532 caption_qs = compat_parse_qs(parsed_sub_url.query)
1533 captions = {}
1534 for sub_lang in sub_langs:
1535 sub_formats = []
1536 for ext in self._SUBTITLE_FORMATS:
1537 caption_qs.update({
1538 'tlang': [sub_lang],
1539 'fmt': [ext],
1540 })
1541 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1542 query=compat_urllib_parse_urlencode(caption_qs, True)))
1543 sub_formats.append({
1544 'url': sub_url,
1545 'ext': ext,
1546 })
1547 captions[sub_lang] = sub_formats
1548 return captions
1549
1550 # New captions format as of 22.06.2017
1551 player_response = args.get('player_response')
1552 if player_response and isinstance(player_response, compat_str):
1553 player_response = self._parse_json(
1554 player_response, video_id, fatal=False)
1555 if player_response:
1556 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1557 base_url = renderer['captionTracks'][0]['baseUrl']
1558 sub_lang_list = []
1559 for lang in renderer['translationLanguages']:
1560 lang_code = lang.get('languageCode')
1561 if lang_code:
1562 sub_lang_list.append(lang_code)
1563 return make_captions(base_url, sub_lang_list)
1564
b78b292f
S
1565 # Some videos don't provide ttsurl but rather caption_tracks and
1566 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1567 # Does not used anymore as of 22.06.2017
b78b292f
S
1568 caption_tracks = args['caption_tracks']
1569 caption_translation_languages = args['caption_translation_languages']
1570 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1571 sub_lang_list = []
b78b292f
S
1572 for lang in caption_translation_languages.split(','):
1573 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1574 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1575 if sub_lang:
1576 sub_lang_list.append(sub_lang)
1577 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1578 # An extractor error can be raise by the download process if there are
1579 # no automatic captions but there are subtitles
ddbb4c5c 1580 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1581 self._downloader.report_warning(err_msg)
1582 return {}
1583
21c340b8
S
1584 def _mark_watched(self, video_id, video_info, player_response):
1585 playback_url = url_or_none(try_get(
1586 player_response,
1587 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1588 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1589 if not playback_url:
1590 return
1591 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1592 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1593
1594 # cpn generation algorithm is reverse engineered from base.js.
1595 # In fact it works even with dummy cpn.
1596 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1597 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1598
1599 qs.update({
1600 'ver': ['2'],
1601 'cpn': [cpn],
1602 })
1603 playback_url = compat_urlparse.urlunparse(
15707c7e 1604 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1605
1606 self._download_webpage(
1607 playback_url, video_id, 'Marking watched',
1608 'Unable to mark watched', fatal=False)
1609
66c9fa36
S
1610 @staticmethod
1611 def _extract_urls(webpage):
1612 # Embedded YouTube player
1613 entries = [
1614 unescapeHTML(mobj.group('url'))
1615 for mobj in re.finditer(r'''(?x)
1616 (?:
1617 <iframe[^>]+?src=|
1618 data-video-url=|
1619 <embed[^>]+?src=|
1620 embedSWF\(?:\s*|
1621 <object[^>]+data=|
1622 new\s+SWFObject\(
1623 )
1624 (["\'])
1625 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1626 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1627 \1''', webpage)]
1628
1629 # lazyYT YouTube embed
1630 entries.extend(list(map(
1631 unescapeHTML,
1632 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1633
1634 # Wordpress "YouTube Video Importer" plugin
1635 matches = re.findall(r'''(?x)<div[^>]+
1636 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1637 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1638 entries.extend(m[-1] for m in matches)
1639
1640 return entries
1641
1642 @staticmethod
1643 def _extract_url(webpage):
1644 urls = YoutubeIE._extract_urls(webpage)
1645 return urls[0] if urls else None
1646
97665381
PH
1647 @classmethod
1648 def extract_id(cls, url):
1649 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1650 if mobj is None:
69ea8ca4 1651 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1652 video_id = mobj.group(2)
1653 return video_id
1654
9cafc3fd
S
1655 @staticmethod
1656 def _extract_chapters(description, duration):
1657 if not description:
1658 return None
1659 chapter_lines = re.findall(
1660 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1661 description)
1662 if not chapter_lines:
1663 return None
1664 chapters = []
1665 for next_num, (chapter_line, time_point) in enumerate(
1666 chapter_lines, start=1):
1667 start_time = parse_duration(time_point)
1668 if start_time is None:
1669 continue
39d4c1be
S
1670 if start_time > duration:
1671 break
9cafc3fd
S
1672 end_time = (duration if next_num == len(chapter_lines)
1673 else parse_duration(chapter_lines[next_num][1]))
1674 if end_time is None:
1675 continue
39d4c1be
S
1676 if end_time > duration:
1677 end_time = duration
1678 if start_time > end_time:
1679 break
9cafc3fd
S
1680 chapter_title = re.sub(
1681 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1682 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1683 chapters.append({
1684 'start_time': start_time,
1685 'end_time': end_time,
1686 'title': chapter_title,
1687 })
1688 return chapters
1689
c5e8d7af 1690 def _real_extract(self, url):
cf7e015f
S
1691 url, smuggled_data = unsmuggle_url(url, {})
1692
7e8c0af0 1693 proto = (
78caa52a
PH
1694 'http' if self._downloader.params.get('prefer_insecure', False)
1695 else 'https')
7e8c0af0 1696
7c80519c 1697 start_time = None
297a564b 1698 end_time = None
7c80519c
JMF
1699 parsed_url = compat_urllib_parse_urlparse(url)
1700 for component in [parsed_url.fragment, parsed_url.query]:
1701 query = compat_parse_qs(component)
297a564b 1702 if start_time is None and 't' in query:
7c80519c 1703 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1704 if start_time is None and 'start' in query:
1705 start_time = parse_duration(query['start'][0])
297a564b
JMF
1706 if end_time is None and 'end' in query:
1707 end_time = parse_duration(query['end'][0])
7c80519c 1708
c5e8d7af
PH
1709 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1710 mobj = re.search(self._NEXT_URL_RE, url)
1711 if mobj:
7fd002c0 1712 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1713 video_id = self.extract_id(url)
c5e8d7af
PH
1714
1715 # Get video webpage
aa79ac0c 1716 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1717 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1718
1719 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1720 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1721
1722 # Attempt to extract SWF player URL
e0df6211 1723 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1724 if mobj is not None:
1725 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1726 else:
1727 player_url = None
1728
d8d24a92
S
1729 dash_mpds = []
1730
1731 def add_dash_mpd(video_info):
1732 dash_mpd = video_info.get('dashmpd')
1733 if dash_mpd and dash_mpd[0] not in dash_mpds:
1734 dash_mpds.append(dash_mpd[0])
1735
561b456e
S
1736 def add_dash_mpd_pr(pl_response):
1737 dash_mpd = url_or_none(try_get(
1738 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1739 compat_str))
1740 if dash_mpd and dash_mpd not in dash_mpds:
1741 dash_mpds.append(dash_mpd)
1742
c7121fa7
S
1743 is_live = None
1744 view_count = None
1745
1746 def extract_view_count(v_info):
1747 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1748
c2d125d9
S
1749 def extract_player_response(player_response, video_id):
1750 pl_response = str_or_none(player_response)
1751 if not pl_response:
1752 return
1753 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1754 if isinstance(pl_response, dict):
1755 add_dash_mpd_pr(pl_response)
1756 return pl_response
1757
dbdaaa23
S
1758 player_response = {}
1759
c5e8d7af 1760 # Get video info
43ebf77d 1761 video_info = {}
6449cd80 1762 embed_webpage = None
c108eb73 1763 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1764 age_gate = True
1765 # We simulate the access to the video from www.youtube.com/v/{video_id}
1766 # this can be viewed without login into Youtube
beb95e77
CL
1767 url = proto + '://www.youtube.com/embed/%s' % video_id
1768 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1769 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1770 'video_id': video_id,
1771 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1772 'sts': self._search_regex(
beb95e77 1773 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1774 })
7e8c0af0 1775 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
d332ec72
S
1776 try:
1777 video_info_webpage = self._download_webpage(
1778 video_info_url, video_id,
1779 note='Refetching age-gated info webpage',
1780 errnote='unable to download video info webpage')
1781 except ExtractorError:
1782 video_info_webpage = None
1783 if video_info_webpage:
1784 video_info = compat_parse_qs(video_info_webpage)
1785 pl_response = video_info.get('player_response', [None])[0]
1786 player_response = extract_player_response(pl_response, video_id)
1787 add_dash_mpd(video_info)
1788 view_count = extract_view_count(video_info)
c108eb73
JMF
1789 else:
1790 age_gate = False
d8d24a92 1791 # Try looking directly into the video webpage
a72778d3
S
1792 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1793 if ytplayer_config:
4e62ebe2 1794 args = ytplayer_config['args']
4c76aa06 1795 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1796 # Convert to the same format returned by compat_parse_qs
1797 video_info = dict((k, [v]) for k, v in args.items())
1798 add_dash_mpd(video_info)
6496ccb4
S
1799 # Rental video is not rented but preview is available (e.g.
1800 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1801 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1802 if not video_info and args.get('ypc_vid'):
1803 return self.url_result(
1804 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1805 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1806 is_live = True
dbdaaa23 1807 if not player_response:
c2d125d9 1808 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1809 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1810 add_dash_mpd_pr(player_response)
bbb7c3f7
YCH
1811
1812 def extract_unavailable_message():
0add33ab
S
1813 messages = []
1814 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1815 msg = self._html_search_regex(
1816 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1817 video_webpage, 'unavailable %s' % kind, default=None)
1818 if msg:
1819 messages.append(msg)
1820 if messages:
1821 return '\n'.join(messages)
bbb7c3f7 1822
f93abcf1 1823 if not video_info and not player_response:
15be3eb5
RA
1824 unavailable_message = extract_unavailable_message()
1825 if not unavailable_message:
1826 unavailable_message = 'Unable to extract video data'
1827 raise ExtractorError(
1828 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1829
f93abcf1
S
1830 if not isinstance(video_info, dict):
1831 video_info = {}
1832
dbdaaa23
S
1833 video_details = try_get(
1834 player_response, lambda x: x['videoDetails'], dict) or {}
1835
8dbf751a
RA
1836 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1837 if not video_title:
cf7e015f
S
1838 self._downloader.report_warning('Unable to extract video title')
1839 video_title = '_'
1840
9cafc3fd 1841 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1842 if video_description:
fa4bc6e7
RA
1843
1844 def replace_url(m):
1845 redir_url = compat_urlparse.urljoin(url, m.group(1))
1846 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1847 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1848 qs = compat_parse_qs(parsed_redir_url.query)
1849 q = qs.get('q')
1850 if q and q[0]:
1851 return q[0]
1852 return redir_url
1853
9cafc3fd 1854 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1855 <a\s+
25cb7a0e 1856 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1857 (?:title|href)="([^"]+)"\s+
25cb7a0e 1858 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1859 class="[^"]*"[^>]*>
23f13e97 1860 [^<]+\.{3}\s*
cf7e015f 1861 </a>
fa4bc6e7 1862 ''', replace_url, video_description)
cf7e015f
S
1863 video_description = clean_html(video_description)
1864 else:
8dbf751a 1865 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1866
8fe10494 1867 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1868 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1869 multifeed_metadata_list = try_get(
1870 player_response,
1871 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1872 compat_str) or try_get(
1873 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1874 if multifeed_metadata_list:
1875 entries = []
1876 feed_ids = []
1877 for feed in multifeed_metadata_list.split(','):
1878 # Unquote should take place before split on comma (,) since textual
1879 # fields may contain comma as well (see
067aa17e 1880 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1881 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1882
1883 def feed_entry(name):
1884 return try_get(feed_data, lambda x: x[name][0], compat_str)
1885
1886 feed_id = feed_entry('id')
1887 if not feed_id:
1888 continue
1889 feed_title = feed_entry('title')
1890 title = video_title
1891 if feed_title:
1892 title += ' (%s)' % feed_title
8fe10494
S
1893 entries.append({
1894 '_type': 'url_transparent',
1895 'ie_key': 'Youtube',
1896 'url': smuggle_url(
1897 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1898 {'force_singlefeed': True}),
6b09401b 1899 'title': title,
8fe10494 1900 })
6b09401b 1901 feed_ids.append(feed_id)
8fe10494
S
1902 self.to_screen(
1903 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1904 % (', '.join(feed_ids), video_id))
1905 return self.playlist_result(entries, video_id, video_title, video_description)
1906 else:
1907 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1908
c7121fa7 1909 if view_count is None:
1c9c8de2 1910 view_count = extract_view_count(video_info)
dbdaaa23
S
1911 if view_count is None and video_details:
1912 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1913
27019dbb 1914 if is_live is None:
898238e9 1915 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1916
c5e8d7af
PH
1917 # Check for "rental" videos
1918 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1919 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1920
c63ca0ee
S
1921 def _extract_filesize(media_url):
1922 return int_or_none(self._search_regex(
1923 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1924
bf1317d2
S
1925 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1926 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1927
c5e8d7af
PH
1928 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1929 self.report_rtmp_download()
dd27fd17
PH
1930 formats = [{
1931 'format_id': '_rtmp',
1932 'protocol': 'rtmp',
1933 'url': video_info['conn'][0],
1934 'player_url': player_url,
1935 }]
bf1317d2 1936 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1937 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1938 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1939 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1940 formats = []
3318832e 1941 formats_spec = {}
82156fdb 1942 fmt_list = video_info.get('fmt_list', [''])[0]
1943 if fmt_list:
1944 for fmt in fmt_list.split(','):
1945 spec = fmt.split('/')
3318832e 1946 if len(spec) > 1:
1947 width_height = spec[1].split('x')
1948 if len(width_height) == 2:
1949 formats_spec[spec[0]] = {
1950 'resolution': spec[1],
1951 'width': int_or_none(width_height[0]),
1952 'height': int_or_none(width_height[1]),
1953 }
bf1317d2
S
1954 for fmt in streaming_formats:
1955 itag = str_or_none(fmt.get('itag'))
1956 if not itag:
201e9eaa 1957 continue
bf1317d2
S
1958 quality = fmt.get('quality')
1959 quality_label = fmt.get('qualityLabel') or quality
1960 formats_spec[itag] = {
1961 'asr': int_or_none(fmt.get('audioSampleRate')),
1962 'filesize': int_or_none(fmt.get('contentLength')),
1963 'format_note': quality_label,
1964 'fps': int_or_none(fmt.get('fps')),
1965 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1966 # bitrate for itag 43 is always 2147483647
1967 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1968 'width': int_or_none(fmt.get('width')),
1969 }
1970
1971 for fmt in streaming_formats:
00eb865b 1972 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
1973 continue
1974 url = url_or_none(fmt.get('url'))
1975
1976 if not url:
fa3db383 1977 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
1978 if not cipher:
1979 continue
1980 url_data = compat_parse_qs(cipher)
1981 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1982 if not url:
1983 continue
1984 else:
1985 cipher = None
1986 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1987
2f483bc1
S
1988 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1989 # Unsupported FORMAT_STREAM_TYPE_OTF
1990 if stream_type == 3:
1991 continue
6449cd80 1992
bf1317d2
S
1993 format_id = fmt.get('itag') or url_data['itag'][0]
1994 if not format_id:
1995 continue
1996 format_id = compat_str(format_id)
a49eccdf 1997
bf1317d2
S
1998 if cipher:
1999 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2000 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2001 jsplayer_url_json = self._search_regex(
2002 ASSETS_RE,
2003 embed_webpage if age_gate else video_webpage,
2004 'JS player URL (1)', default=None)
2005 if not jsplayer_url_json and not age_gate:
2006 # We need the embed website after all
2007 if embed_webpage is None:
2008 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2009 embed_webpage = self._download_webpage(
2010 embed_url, video_id, 'Downloading embed webpage')
2011 jsplayer_url_json = self._search_regex(
2012 ASSETS_RE, embed_webpage, 'JS player URL')
2013
2014 player_url = json.loads(jsplayer_url_json)
cf010131 2015 if player_url is None:
bf1317d2
S
2016 player_url_json = self._search_regex(
2017 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2018 video_webpage, 'age gate player URL')
2019 player_url = json.loads(player_url_json)
2020
2021 if 'sig' in url_data:
2022 url += '&signature=' + url_data['sig'][0]
2023 elif 's' in url_data:
2024 encrypted_sig = url_data['s'][0]
2025
2026 if self._downloader.params.get('verbose'):
2027 if player_url is None:
bf1317d2 2028 player_desc = 'unknown'
cf010131 2029 else:
e40c758c
S
2030 player_type, player_version = self._extract_player_info(player_url)
2031 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2032 parts_sizes = self._signature_cache_id(encrypted_sig)
2033 self.to_screen('{%s} signature length %s, %s' %
2034 (format_id, parts_sizes, player_desc))
2035
2036 signature = self._decrypt_signature(
2037 encrypted_sig, video_id, player_url, age_gate)
2038 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2039 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2040 if 'ratebypass' not in url:
2041 url += '&ratebypass=yes'
c9afb51c 2042
94278f72
YCH
2043 dct = {
2044 'format_id': format_id,
2045 'url': url,
2046 'player_url': player_url,
2047 }
2048 if format_id in self._formats:
2049 dct.update(self._formats[format_id])
3318832e 2050 if format_id in formats_spec:
2051 dct.update(formats_spec[format_id])
94278f72 2052
aabc2be6 2053 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2054 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2055 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2056 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2057 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2058
bf1317d2
S
2059 if width is None:
2060 width = int_or_none(fmt.get('width'))
2061 if height is None:
2062 height = int_or_none(fmt.get('height'))
2063
c63ca0ee
S
2064 filesize = int_or_none(url_data.get(
2065 'clen', [None])[0]) or _extract_filesize(url)
2066
bf1317d2
S
2067 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2068 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2069
4878759f
S
2070 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2071 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2072 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2073
94278f72 2074 more_fields = {
c63ca0ee 2075 'filesize': filesize,
bf1317d2 2076 'tbr': tbr,
c9afb51c
AH
2077 'width': width,
2078 'height': height,
bf1317d2
S
2079 'fps': fps,
2080 'format_note': quality_label or quality,
c9afb51c 2081 }
94278f72
YCH
2082 for key, value in more_fields.items():
2083 if value:
2084 dct[key] = value
bf1317d2 2085 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2086 if type_:
2087 type_split = type_.split(';')
2088 kind_ext = type_split[0].split('/')
2089 if len(kind_ext) == 2:
94278f72
YCH
2090 kind, _ = kind_ext
2091 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2092 if kind in ('audio', 'video'):
2093 codecs = None
2094 for mobj in re.finditer(
2095 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2096 if mobj.group('key') == 'codecs':
2097 codecs = mobj.group('val')
2098 break
2099 if codecs:
6310acf5 2100 dct.update(parse_codecs(codecs))
e4a60912
S
2101 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2102 dct['downloader_options'] = {
2103 # Youtube throttles chunks >~10M
2104 'http_chunk_size': 10485760,
2105 }
aabc2be6 2106 formats.append(dct)
c5e8d7af 2107 else:
c3e54389
S
2108 manifest_url = (
2109 url_or_none(try_get(
2110 player_response,
2111 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2112 compat_str))
2113 or url_or_none(try_get(
c3e54389
S
2114 video_info, lambda x: x['hlsvp'][0], compat_str)))
2115 if manifest_url:
2116 formats = []
2117 m3u8_formats = self._extract_m3u8_formats(
2118 manifest_url, video_id, 'mp4', fatal=False)
2119 for a_format in m3u8_formats:
2120 itag = self._search_regex(
2121 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2122 if itag:
2123 a_format['format_id'] = itag
2124 if itag in self._formats:
2125 dct = self._formats[itag].copy()
2126 dct.update(a_format)
2127 a_format = dct
2128 a_format['player_url'] = player_url
2129 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2130 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2131 formats.append(a_format)
2132 else:
13577349 2133 error_message = extract_unavailable_message()
c3e54389 2134 if not error_message:
13577349
S
2135 error_message = clean_html(try_get(
2136 player_response, lambda x: x['playabilityStatus']['reason'],
2137 compat_str))
2138 if not error_message:
2139 error_message = clean_html(
2140 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2141 if error_message:
2142 raise ExtractorError(error_message, expected=True)
2143 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2144
7e72694b 2145 # uploader
dbdaaa23
S
2146 video_uploader = try_get(
2147 video_info, lambda x: x['author'][0],
2148 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2149 if video_uploader:
2150 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2151 else:
2152 self._downloader.report_warning('unable to extract uploader name')
2153
2154 # uploader_id
2155 video_uploader_id = None
2156 video_uploader_url = None
2157 mobj = re.search(
2158 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2159 video_webpage)
2160 if mobj is not None:
2161 video_uploader_id = mobj.group('uploader_id')
2162 video_uploader_url = mobj.group('uploader_url')
2163 else:
2164 self._downloader.report_warning('unable to extract uploader nickname')
2165
b45a9e69 2166 channel_id = (
3089bc74
S
2167 str_or_none(video_details.get('channelId'))
2168 or self._html_search_meta(
2169 'channelId', video_webpage, 'channel id', default=None)
2170 or self._search_regex(
b45a9e69 2171 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2172 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2173 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2174
7e72694b
S
2175 # thumbnail image
2176 # We try first to get a high quality image:
2177 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2178 video_webpage, re.DOTALL)
2179 if m_thumb is not None:
2180 video_thumbnail = m_thumb.group(1)
2181 elif 'thumbnail_url' not in video_info:
2182 self._downloader.report_warning('unable to extract video thumbnail')
2183 video_thumbnail = None
2184 else: # don't panic if we can't find it
2185 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2186
2187 # upload date
2188 upload_date = self._html_search_meta(
2189 'datePublished', video_webpage, 'upload date', default=None)
2190 if not upload_date:
2191 upload_date = self._search_regex(
2192 [r'(?s)id="eow-date.*?>(.*?)</span>',
2193 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2194 video_webpage, 'upload date', default=None)
2195 upload_date = unified_strdate(upload_date)
2196
2197 video_license = self._html_search_regex(
2198 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2199 video_webpage, 'license', default=None)
2200
2201 m_music = re.search(
2202 r'''(?x)
2203 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2204 <ul[^>]*>\s*
2205 <li>(?P<title>.+?)
2206 by (?P<creator>.+?)
2207 (?:
2208 \(.+?\)|
2209 <a[^>]*
2210 (?:
2211 \bhref=["\']/red[^>]*>| # drop possible
2212 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2213 )
2214 .*?
2215 )?</li
2216 ''',
2217 video_webpage)
2218 if m_music:
2219 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2220 video_creator = clean_html(m_music.group('creator'))
2221 else:
2222 video_alt_title = video_creator = None
2223
2224 def extract_meta(field):
2225 return self._html_search_regex(
2226 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2227 video_webpage, field, default=None)
2228
2229 track = extract_meta('Song')
2230 artist = extract_meta('Artist')
92bc97d3 2231 album = extract_meta('Album')
822b9d9c
RA
2232
2233 # Youtube Music Auto-generated description
92bc97d3 2234 release_date = release_year = None
822b9d9c
RA
2235 if video_description:
2236 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2237 if mobj:
2238 if not track:
2239 track = mobj.group('track').strip()
2240 if not artist:
2241 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2242 if not album:
2243 album = mobj.group('album'.strip())
822b9d9c
RA
2244 release_year = mobj.group('release_year')
2245 release_date = mobj.group('release_date')
2246 if release_date:
2247 release_date = release_date.replace('-', '')
2248 if not release_year:
2249 release_year = int(release_date[:4])
2250 if release_year:
2251 release_year = int(release_year)
7e72694b
S
2252
2253 m_episode = re.search(
2254 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2255 video_webpage)
2256 if m_episode:
c2dd2dc0 2257 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2258 season_number = int(m_episode.group('season'))
2259 episode_number = int(m_episode.group('episode'))
2260 else:
2261 series = season_number = episode_number = None
2262
2263 m_cat_container = self._search_regex(
2264 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2265 video_webpage, 'categories', default=None)
2266 if m_cat_container:
2267 category = self._html_search_regex(
2268 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2269 default=None)
2270 video_categories = None if category is None else [category]
2271 else:
2272 video_categories = None
2273
2274 video_tags = [
2275 unescapeHTML(m.group('content'))
2276 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2277
2278 def _extract_count(count_name):
2279 return str_to_int(self._search_regex(
2280 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2281 % re.escape(count_name),
2282 video_webpage, count_name, default=None))
2283
2284 like_count = _extract_count('like')
2285 dislike_count = _extract_count('dislike')
2286
dbdaaa23
S
2287 if view_count is None:
2288 view_count = str_to_int(self._search_regex(
2289 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2290 'view count', default=None))
2291
bf3c9326
S
2292 average_rating = (
2293 float_or_none(video_details.get('averageRating'))
2294 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2295
7e72694b
S
2296 # subtitles
2297 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2298 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2299
2300 video_duration = try_get(
2301 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2302 if not video_duration:
2303 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2304 if not video_duration:
2305 video_duration = parse_duration(self._html_search_meta(
2306 'duration', video_webpage, 'video duration'))
2307
2308 # annotations
2309 video_annotations = None
2310 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2311 xsrf_token = self._search_regex(
2312 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2313 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2314 invideo_url = try_get(
2315 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2316 if xsrf_token and invideo_url:
2317 xsrf_field_name = self._search_regex(
2318 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2319 video_webpage, 'xsrf field name',
2320 group='xsrf_field_name', default='session_token')
2321 video_annotations = self._download_webpage(
2322 self._proto_relative_url(invideo_url),
2323 video_id, note='Downloading annotations',
2324 errnote='Unable to download video annotations', fatal=False,
2325 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2326
2327 chapters = self._extract_chapters(description_original, video_duration)
2328
dd27fd17 2329 # Look for the DASH manifest
203fb43f 2330 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2331 dash_mpd_fatal = True
8ff648e4 2332 for mpd_url in dash_mpds:
d8d24a92 2333 dash_formats = {}
774e208f 2334 try:
05d0d131
YCH
2335 def decrypt_sig(mobj):
2336 s = mobj.group(1)
2337 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2338 return '/signature/%s' % dec_s
2339
8ff648e4 2340 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2341
8ff648e4 2342 for df in self._extract_mpd_formats(
2343 mpd_url, video_id, fatal=dash_mpd_fatal,
2344 formats_dict=self._formats):
c63ca0ee
S
2345 if not df.get('filesize'):
2346 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2347 # Do not overwrite DASH format found in some previous DASH manifest
2348 if df['format_id'] not in dash_formats:
2349 dash_formats[df['format_id']] = df
77c6fb5b
S
2350 # Additional DASH manifests may end up in HTTP Error 403 therefore
2351 # allow them to fail without bug report message if we already have
2352 # some DASH manifest succeeded. This is temporary workaround to reduce
2353 # burst of bug reports until we figure out the reason and whether it
2354 # can be fixed at all.
2355 dash_mpd_fatal = False
774e208f
PH
2356 except (ExtractorError, KeyError) as e:
2357 self.report_warning(
2358 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2359 if dash_formats:
04b3b3df
JMF
2360 # Remove the formats we found through non-DASH, they
2361 # contain less info and it can be wrong, because we use
2362 # fixed values (for example the resolution). See
067aa17e 2363 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2364 # example.
d80265cc 2365 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2366 formats.extend(dash_formats.values())
d80044c2 2367
6271f1ca
PH
2368 # Check for malformed aspect ratio
2369 stretched_m = re.search(
2370 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2371 video_webpage)
2372 if stretched_m:
313dfc45
LL
2373 w = float(stretched_m.group('w'))
2374 h = float(stretched_m.group('h'))
5faf9fed
S
2375 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2376 # We will only process correct ratios.
313dfc45 2377 if w > 0 and h > 0:
41f24c32 2378 ratio = w / h
313dfc45
LL
2379 for f in formats:
2380 if f.get('vcodec') != 'none':
2381 f['stretched_ratio'] = ratio
6271f1ca 2382
026fbedc 2383 if not formats:
43ebf77d
S
2384 if 'reason' in video_info:
2385 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2386 regions_allowed = self._html_search_meta(
2387 'regionsAllowed', video_webpage, default=None)
2388 countries = regions_allowed.split(',') if regions_allowed else None
2389 self.raise_geo_restricted(
2390 msg=video_info['reason'][0], countries=countries)
2391 reason = video_info['reason'][0]
2392 if 'Invalid parameters' in reason:
2393 unavailable_message = extract_unavailable_message()
2394 if unavailable_message:
2395 reason = unavailable_message
2396 raise ExtractorError(
2397 'YouTube said: %s' % reason,
2398 expected=True, video_id=video_id)
2399 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2400 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2401
4bcc7bd1 2402 self._sort_formats(formats)
4ea3be0a 2403
21c340b8 2404 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2405
4ea3be0a 2406 return {
8bcc8756
JW
2407 'id': video_id,
2408 'uploader': video_uploader,
2409 'uploader_id': video_uploader_id,
fd050249 2410 'uploader_url': video_uploader_url,
dd4c4492
S
2411 'channel_id': channel_id,
2412 'channel_url': channel_url,
8bcc8756 2413 'upload_date': upload_date,
7caf9830 2414 'license': video_license,
936784b2 2415 'creator': video_creator or artist,
8bcc8756 2416 'title': video_title,
936784b2 2417 'alt_title': video_alt_title or track,
8bcc8756
JW
2418 'thumbnail': video_thumbnail,
2419 'description': video_description,
2420 'categories': video_categories,
000b6b5a 2421 'tags': video_tags,
8bcc8756 2422 'subtitles': video_subtitles,
360e1ca5 2423 'automatic_captions': automatic_captions,
8bcc8756
JW
2424 'duration': video_duration,
2425 'age_limit': 18 if age_gate else 0,
2426 'annotations': video_annotations,
9cafc3fd 2427 'chapters': chapters,
7e8c0af0 2428 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2429 'view_count': view_count,
4ea3be0a 2430 'like_count': like_count,
2431 'dislike_count': dislike_count,
bf3c9326 2432 'average_rating': average_rating,
8bcc8756 2433 'formats': formats,
2fe1ff85 2434 'is_live': is_live,
7c80519c 2435 'start_time': start_time,
297a564b 2436 'end_time': end_time,
12afdc2a
S
2437 'series': series,
2438 'season_number': season_number,
2439 'episode_number': episode_number,
936784b2
S
2440 'track': track,
2441 'artist': artist,
5caabd3c 2442 'album': album,
2443 'release_date': release_date,
2444 'release_year': release_year,
4ea3be0a 2445 }
c5e8d7af 2446
5f6a1245 2447
8e7aad20 2448class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2449 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2450 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2451 (?:https?://)?
2452 (?:\w+\.)?
c5e8d7af 2453 (?:
c0345b82 2454 (?:
66b48727 2455 youtube(?:kids)?\.com|
c0345b82
S
2456 invidio\.us
2457 )
2458 /
feaa5ad7 2459 (?:
87dadd45 2460 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2461 \? (?:.*?[&;])*? (?:p|a|list)=
2462 | p/
2463 )|
2464 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2465 )
d67cc9fa 2466 (
66b48727 2467 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2468 # Top tracks, they can also include dots
d67cc9fa
JMF
2469 |(?:MC)[\w\.]*
2470 )
c5e8d7af
PH
2471 .*
2472 |
d0ba5587
S
2473 (%(playlist_id)s)
2474 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2475 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2476 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2477 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2478 IE_NAME = 'youtube:playlist'
81127aa5 2479 _TESTS = [{
0e30a7b9 2480 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2481 'info_dict': {
0e30a7b9 2482 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2483 'uploader': 'Sergey M.',
2484 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2485 'title': 'youtube-dl public playlist',
81127aa5 2486 },
0e30a7b9 2487 'playlist_count': 1,
9291475f 2488 }, {
0e30a7b9 2489 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2490 'info_dict': {
0e30a7b9 2491 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2492 'uploader': 'Sergey M.',
2493 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2494 'title': 'youtube-dl empty playlist',
9291475f
PH
2495 },
2496 'playlist_count': 0,
2497 }, {
2498 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2499 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2500 'info_dict': {
2501 'title': '29C3: Not my department',
acf757f4 2502 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2503 'uploader': 'Christiaan008',
2504 'uploader_id': 'ChRiStIaAn008',
9291475f 2505 },
0e30a7b9 2506 'playlist_count': 96,
9291475f
PH
2507 }, {
2508 'note': 'issue #673',
2509 'url': 'PLBB231211A4F62143',
2510 'info_dict': {
f46a8702 2511 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2512 'id': 'PLBB231211A4F62143',
13a75688
S
2513 'uploader': 'Wickydoo',
2514 'uploader_id': 'Wickydoo',
9291475f
PH
2515 },
2516 'playlist_mincount': 26,
2517 }, {
2518 'note': 'Large playlist',
2519 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2520 'info_dict': {
2521 'title': 'Uploads from Cauchemar',
acf757f4 2522 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2523 'uploader': 'Cauchemar',
2524 'uploader_id': 'Cauchemar89',
9291475f
PH
2525 },
2526 'playlist_mincount': 799,
2527 }, {
2528 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2529 'info_dict': {
2530 'title': 'YDL_safe_search',
acf757f4 2531 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2532 },
2533 'playlist_count': 2,
4201ba13 2534 'skip': 'This playlist is private',
ac7553d0
PH
2535 }, {
2536 'note': 'embedded',
2d3d2997 2537 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2538 'playlist_count': 4,
2539 'info_dict': {
2540 'title': 'JODA15',
acf757f4 2541 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2542 'uploader': 'milan',
2543 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2544 }
87dadd45
S
2545 }, {
2546 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2547 'playlist_mincount': 485,
2548 'info_dict': {
13a75688 2549 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2550 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2551 'uploader': 'LBK',
2552 'uploader_id': 'sdragonfang',
87dadd45 2553 }
6b08cdf6
PH
2554 }, {
2555 'note': 'Embedded SWF player',
2d3d2997 2556 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2557 'playlist_count': 4,
2558 'info_dict': {
2559 'title': 'JODA7',
acf757f4 2560 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2561 },
2562 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2563 }, {
2564 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2565 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2566 'info_dict': {
acf757f4
PH
2567 'title': 'Uploads from Interstellar Movie',
2568 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2569 'uploader': 'Interstellar Movie',
2570 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2571 },
481cc733 2572 'playlist_mincount': 21,
dacb3a86
S
2573 }, {
2574 # Playlist URL that does not actually serve a playlist
2575 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2576 'info_dict': {
2577 'id': 'FqZTN594JQw',
2578 'ext': 'webm',
2579 'title': "Smiley's People 01 detective, Adventure Series, Action",
2580 'uploader': 'STREEM',
2581 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2582 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2583 'upload_date': '20150526',
2584 'license': 'Standard YouTube License',
2585 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2586 'categories': ['People & Blogs'],
2587 'tags': list,
dbdaaa23 2588 'view_count': int,
dacb3a86
S
2589 'like_count': int,
2590 'dislike_count': int,
2591 },
2592 'params': {
2593 'skip_download': True,
2594 },
13a75688 2595 'skip': 'This video is not available.',
dacb3a86 2596 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2597 }, {
2598 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2599 'info_dict': {
2600 'id': 'yeWKywCrFtk',
2601 'ext': 'mp4',
2602 'title': 'Small Scale Baler and Braiding Rugs',
2603 'uploader': 'Backus-Page House Museum',
2604 'uploader_id': 'backuspagemuseum',
ec85ded8 2605 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2606 'upload_date': '20161008',
481cc733
S
2607 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2608 'categories': ['Nonprofits & Activism'],
2609 'tags': list,
2610 'like_count': int,
2611 'dislike_count': int,
2612 },
2613 'params': {
2614 'noplaylist': True,
2615 'skip_download': True,
2616 },
2e18adec
S
2617 }, {
2618 # https://github.com/ytdl-org/youtube-dl/issues/21844
2619 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2620 'info_dict': {
2621 'title': 'Data Analysis with Dr Mike Pound',
2622 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2623 'uploader_id': 'Computerphile',
2624 'uploader': 'Computerphile',
2625 },
2626 'playlist_mincount': 11,
feaa5ad7
S
2627 }, {
2628 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2629 'only_matching': True,
a6857510
S
2630 }, {
2631 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2632 'only_matching': True,
409b9324
S
2633 }, {
2634 # music album playlist
2635 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2636 'only_matching': True,
c0345b82
S
2637 }, {
2638 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2639 'only_matching': True,
66b48727
RA
2640 }, {
2641 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2642 'only_matching': True,
81127aa5 2643 }]
c5e8d7af 2644
880e1c52
JMF
2645 def _real_initialize(self):
2646 self._login()
2647
351f37c0
S
2648 def extract_videos_from_page(self, page):
2649 ids_in_page = []
2650 titles_in_page = []
2651
2652 for item in re.findall(
2653 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2654 attrs = extract_attributes(item)
2655 video_id = attrs['data-video-id']
2656 video_title = unescapeHTML(attrs.get('data-title'))
2657 if video_title:
2658 video_title = video_title.strip()
2659 ids_in_page.append(video_id)
2660 titles_in_page.append(video_title)
2661
2662 # Fallback with old _VIDEO_RE
2663 self.extract_videos_from_page_impl(
2664 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2665
2666 # Relaxed fallbacks
2667 self.extract_videos_from_page_impl(
2668 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2669 ids_in_page, titles_in_page)
2670 self.extract_videos_from_page_impl(
2671 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2672 ids_in_page, titles_in_page)
2673
2674 return zip(ids_in_page, titles_in_page)
2675
652cdaa2 2676 def _extract_mix(self, playlist_id):
99209c29 2677 # The mixes are generated from a single video
652cdaa2 2678 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2679 ids = []
2680 last_id = playlist_id[-11:]
2681 for n in itertools.count(1):
2682 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2683 webpage = self._download_webpage(
2684 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2685 new_ids = orderedSet(re.findall(
2686 r'''(?xs)data-video-username=".*?".*?
2687 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2688 webpage))
2689 # Fetch new pages until all the videos are repeated, it seems that
2690 # there are always 51 unique videos.
2691 new_ids = [_id for _id in new_ids if _id not in ids]
2692 if not new_ids:
2693 break
2694 ids.extend(new_ids)
2695 last_id = ids[-1]
2696
2697 url_results = self._ids_to_results(ids)
2698
bc2f773b 2699 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2700 title_span = (
3089bc74
S
2701 search_title('playlist-title')
2702 or search_title('title long-title')
2703 or search_title('title'))
76d1700b 2704 title = clean_html(title_span)
652cdaa2
JMF
2705
2706 return self.playlist_result(url_results, playlist_id, title)
2707
448830ce 2708 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2709 url = self._TEMPLATE_URL % playlist_id
2710 page = self._download_webpage(url, playlist_id)
dbb94fb0 2711
067aa17e 2712 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2713 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2714 match = match.strip()
2715 # Check if the playlist exists or is private
4201ba13
S
2716 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2717 if mobj:
2718 reason = mobj.group('reason')
2719 message = 'This playlist %s' % reason
2720 if 'private' in reason:
2721 message += ', use --username or --netrc to access it'
2722 message += '.'
2723 raise ExtractorError(message, expected=True)
39b62db1
YCH
2724 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2725 raise ExtractorError(
2726 'Invalid parameters. Maybe URL is incorrect.',
2727 expected=True)
2728 elif re.match(r'[^<]*Choose your language[^<]*', match):
2729 continue
2730 else:
2731 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2732
dbb94fb0 2733 playlist_title = self._html_search_regex(
63b4295d 2734 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2735 page, 'title', default=None)
c5e8d7af 2736
07aeced6 2737 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2738 uploader = self._html_search_regex(
07aeced6
S
2739 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2740 page, 'uploader', default=None)
2741 mobj = re.search(
2742 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2743 page)
2744 if mobj:
2745 uploader_id = mobj.group('uploader_id')
2746 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2747 else:
2748 uploader_id = uploader_url = None
2749
dacb3a86
S
2750 has_videos = True
2751
2752 if not playlist_title:
2753 try:
2754 # Some playlist URLs don't actually serve a playlist (e.g.
2755 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2756 next(self._entries(page, playlist_id))
2757 except StopIteration:
2758 has_videos = False
2759
07aeced6 2760 playlist = self.playlist_result(
dacb3a86 2761 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2762 playlist.update({
2763 'uploader': uploader,
2764 'uploader_id': uploader_id,
2765 'uploader_url': uploader_url,
2766 })
2767
2768 return has_videos, playlist
c5e8d7af 2769
ebf1b291 2770 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2771 # Check if it's a video-specific URL
2772 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2773 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2774 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2775 'video id', default=None)
2776 if video_id:
448830ce
S
2777 if self._downloader.params.get('noplaylist'):
2778 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2779 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2780 else:
2781 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2782 return video_id, None
2783 return None, None
448830ce 2784
ebf1b291
S
2785 def _real_extract(self, url):
2786 # Extract playlist id
2787 mobj = re.match(self._VALID_URL, url)
2788 if mobj is None:
2789 raise ExtractorError('Invalid URL: %s' % url)
2790 playlist_id = mobj.group(1) or mobj.group(2)
2791
dacb3a86 2792 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2793 if video:
2794 return video
2795
466a6145 2796 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2797 # Mixes require a custom extraction process
2798 return self._extract_mix(playlist_id)
2799
dacb3a86
S
2800 has_videos, playlist = self._extract_playlist(playlist_id)
2801 if has_videos or not video_id:
2802 return playlist
2803
2804 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2805 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2806 # Fallback to plain video extraction if there is a video id
2807 # along with playlist id.
2808 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2809
c5e8d7af 2810
648e6a1f 2811class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2812 IE_DESC = 'YouTube.com channels'
66b48727 2813 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2814 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2815 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2816 IE_NAME = 'youtube:channel'
cdc628a4
PH
2817 _TESTS = [{
2818 'note': 'paginated channel',
2819 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2820 'playlist_mincount': 91,
acf757f4 2821 'info_dict': {
9170ca5b
JMF
2822 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2823 'title': 'Uploads from lex will',
13a75688
S
2824 'uploader': 'lex will',
2825 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2826 }
5c43afd4
JMF
2827 }, {
2828 'note': 'Age restricted channel',
2829 # from https://www.youtube.com/user/DeusExOfficial
2830 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2831 'playlist_mincount': 64,
2832 'info_dict': {
2833 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2834 'title': 'Uploads from Deus Ex',
13a75688
S
2835 'uploader': 'Deus Ex',
2836 'uploader_id': 'DeusExOfficial',
5c43afd4 2837 },
cd5a74a2
S
2838 }, {
2839 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2840 'only_matching': True,
66b48727
RA
2841 }, {
2842 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2843 'only_matching': True,
cdc628a4 2844 }]
c5e8d7af 2845
e462474e
S
2846 @classmethod
2847 def suitable(cls, url):
f07e276a
S
2848 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2849 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2850
9558dcec
S
2851 def _build_template_url(self, url, channel_id):
2852 return self._TEMPLATE_URL % channel_id
2853
c5e8d7af 2854 def _real_extract(self, url):
9ff67727 2855 channel_id = self._match_id(url)
c5e8d7af 2856
9558dcec 2857 url = self._build_template_url(url, channel_id)
386bdfa6
S
2858
2859 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2860 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2861 # otherwise fallback on channel by page extraction
2862 channel_page = self._download_webpage(
2863 url + '?view=57', channel_id,
2864 'Downloading channel page', fatal=False)
2b3c2546
PH
2865 if channel_page is False:
2866 channel_playlist_id = False
2867 else:
2868 channel_playlist_id = self._html_search_meta(
2869 'channelId', channel_page, 'channel id', default=None)
2870 if not channel_playlist_id:
73c4ac2c
S
2871 channel_url = self._html_search_meta(
2872 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2873 channel_page, 'channel url', default=None)
2874 if channel_url:
2875 channel_playlist_id = self._search_regex(
2876 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2877 channel_url, 'channel id', default=None)
386bdfa6
S
2878 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2879 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2880 return self.url_result(
2881 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2882
60bf45c8 2883 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2884 autogenerated = re.search(r'''(?x)
2885 class="[^"]*?(?:
2886 channel-header-autogenerated-label|
2887 yt-channel-title-autogenerated
2888 )[^"]*"''', channel_page) is not None
c5e8d7af 2889
b9643eed
JMF
2890 if autogenerated:
2891 # The videos are contained in a single page
2892 # the ajax pages can't be used, they are empty
b82f815f 2893 entries = [
fb69240c
S
2894 self.url_result(
2895 video_id, 'Youtube', video_id=video_id,
2896 video_title=video_title)
8f02ad4f 2897 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2898 return self.playlist_result(entries, channel_id)
2899
73c4ac2c
S
2900 try:
2901 next(self._entries(channel_page, channel_id))
2902 except StopIteration:
2903 alert_message = self._html_search_regex(
2904 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2905 channel_page, 'alert', default=None, group='alert')
2906 if alert_message:
2907 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2908
648e6a1f 2909 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2910
2911
eb0f3e7e 2912class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2913 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2914 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2915 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2916 IE_NAME = 'youtube:user'
c5e8d7af 2917
cdc628a4
PH
2918 _TESTS = [{
2919 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2920 'playlist_mincount': 320,
2921 'info_dict': {
73c4ac2c
S
2922 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2923 'title': 'Uploads from The Linux Foundation',
13a75688
S
2924 'uploader': 'The Linux Foundation',
2925 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2926 }
9558dcec
S
2927 }, {
2928 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2929 # but not https://www.youtube.com/user/12minuteathlete/videos
2930 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2931 'playlist_mincount': 249,
2932 'info_dict': {
2933 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2934 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2935 'uploader': '12 Minute Athlete',
2936 'uploader_id': 'the12minuteathlete',
9558dcec 2937 }
cdc628a4
PH
2938 }, {
2939 'url': 'ytuser:phihag',
2940 'only_matching': True,
daa0df9e
YCH
2941 }, {
2942 'url': 'https://www.youtube.com/c/gametrailers',
2943 'only_matching': True,
9558dcec
S
2944 }, {
2945 'url': 'https://www.youtube.com/gametrailers',
2946 'only_matching': True,
73c4ac2c 2947 }, {
0e879f43 2948 # This channel is not available, geo restricted to JP
73c4ac2c
S
2949 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2950 'only_matching': True,
cdc628a4
PH
2951 }]
2952
e3ea4790 2953 @classmethod
f4b05232 2954 def suitable(cls, url):
e3ea4790
JMF
2955 # Don't return True if the url can be extracted with other youtube
2956 # extractor, the regex would is too permissive and it would match.
f3a58d46 2957 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2958 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2959 return False
2960 else:
2961 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2962
9558dcec
S
2963 def _build_template_url(self, url, channel_id):
2964 mobj = re.match(self._VALID_URL, url)
2965 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2966
b05654f0 2967
f07e276a
S
2968class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2969 IE_DESC = 'YouTube.com live streams'
073d5bf5 2970 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2971 IE_NAME = 'youtube:live'
2972
2973 _TESTS = [{
2d3d2997 2974 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2975 'info_dict': {
2976 'id': 'a48o2S1cPoo',
2977 'ext': 'mp4',
2978 'title': 'The Young Turks - Live Main Show',
2979 'uploader': 'The Young Turks',
2980 'uploader_id': 'TheYoungTurks',
ec85ded8 2981 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2982 'upload_date': '20150715',
2983 'license': 'Standard YouTube License',
2984 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2985 'categories': ['News & Politics'],
2986 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2987 'like_count': int,
2988 'dislike_count': int,
2989 },
2990 'params': {
2991 'skip_download': True,
2992 },
2993 }, {
2d3d2997 2994 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2995 'only_matching': True,
c1b2a085
S
2996 }, {
2997 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2998 'only_matching': True,
073d5bf5
S
2999 }, {
3000 'url': 'https://www.youtube.com/TheYoungTurks/live',
3001 'only_matching': True,
f07e276a
S
3002 }]
3003
3004 def _real_extract(self, url):
3005 mobj = re.match(self._VALID_URL, url)
3006 channel_id = mobj.group('id')
3007 base_url = mobj.group('base_url')
3008 webpage = self._download_webpage(url, channel_id, fatal=False)
3009 if webpage:
3010 page_type = self._og_search_property(
e7f3529f 3011 'type', webpage, 'page type', default='')
f07e276a
S
3012 video_id = self._html_search_meta(
3013 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3014 if page_type.startswith('video') and video_id and re.match(
3015 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3016 return self.url_result(video_id, YoutubeIE.ie_key())
3017 return self.url_result(base_url)
3018
3019
e462474e
S
3020class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3021 IE_DESC = 'YouTube.com user/channel playlists'
3022 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3023 IE_NAME = 'youtube:playlists'
0c148415 3024
e568c223 3025 _TESTS = [{
2d3d2997 3026 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3027 'playlist_mincount': 4,
3028 'info_dict': {
3029 'id': 'ThirstForScience',
13a75688 3030 'title': 'ThirstForScience',
0c148415 3031 },
e568c223
S
3032 }, {
3033 # with "Load more" button
2d3d2997 3034 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3035 'playlist_mincount': 70,
3036 'info_dict': {
3037 'id': 'igorkle1',
3038 'title': 'Игорь Клейнер',
3039 },
e462474e
S
3040 }, {
3041 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3042 'playlist_mincount': 17,
3043 'info_dict': {
3044 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3045 'title': 'Chem Player',
3046 },
13a75688 3047 'skip': 'Blocked',
e568c223 3048 }]
0c148415
S
3049
3050
870f3bfc
S
3051class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3052 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3053
3054
3055class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3056 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3057 # there doesn't appear to be a real limit, for example if you search for
3058 # 'python' you get more than 8.000.000 results
3059 _MAX_RESULTS = float('inf')
78caa52a 3060 IE_NAME = 'youtube:search'
b05654f0 3061 _SEARCH_KEY = 'ytsearch'
b4c08069 3062 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3063 _TESTS = []
b05654f0 3064
b05654f0
PH
3065 def _get_n_results(self, query, n):
3066 """Get a specified number of results for a query"""
3067
b4c08069 3068 videos = []
b05654f0
PH
3069 limit = n
3070
a22b2fd1
YCH
3071 url_query = {
3072 'search_query': query.encode('utf-8'),
3073 }
3074 url_query.update(self._EXTRA_QUERY_ARGS)
3075 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3076
b4c08069 3077 for pagenum in itertools.count(1):
b4c08069 3078 data = self._download_json(
69ea8ca4 3079 result_url, video_id='query "%s"' % query,
b4c08069 3080 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3081 errnote='Unable to download API page',
3082 query={'spf': 'navigate'})
b4c08069 3083 html_content = data[1]['body']['content']
7cc3570e 3084
b4c08069 3085 if 'class="search-message' in html_content:
07ad22b8 3086 raise ExtractorError(
78caa52a 3087 '[youtube] No video results', expected=True)
b05654f0 3088
870f3bfc 3089 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3090 videos += new_videos
3091 if not new_videos or len(videos) > limit:
3092 break
a22b2fd1
YCH
3093 next_link = self._html_search_regex(
3094 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3095 html_content, 'next link', default=None)
3096 if next_link is None:
3097 break
3098 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3099
b4c08069
JMF
3100 if len(videos) > n:
3101 videos = videos[:n]
b05654f0 3102 return self.playlist_result(videos, query)
75dff0ee 3103
c9ae7b95 3104
a3dd9248 3105class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3106 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3107 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3108 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3109 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3110
c9ae7b95 3111
870f3bfc 3112class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3113 IE_DESC = 'YouTube.com search URLs'
3114 IE_NAME = 'youtube:search_url'
d2c1f79f 3115 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3116 _TESTS = [{
3117 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3118 'playlist_mincount': 5,
3119 'info_dict': {
3120 'title': 'youtube-dl test video',
3121 }
d2c1f79f
S
3122 }, {
3123 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3124 'only_matching': True,
cdc628a4 3125 }]
c9ae7b95
PH
3126
3127 def _real_extract(self, url):
3128 mobj = re.match(self._VALID_URL, url)
7fd002c0 3129 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3130 webpage = self._download_webpage(url, query)
175c2e9e 3131 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3132
3133
136dadde 3134class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3135 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3136 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3137 IE_NAME = 'youtube:show'
cdc628a4 3138 _TESTS = [{
4003bd82 3139 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3140 'playlist_mincount': 5,
cdc628a4
PH
3141 'info_dict': {
3142 'id': 'airdisasters',
3143 'title': 'Air Disasters',
3144 }
3145 }]
75dff0ee
JMF
3146
3147 def _real_extract(self, url):
136dadde
S
3148 playlist_id = self._match_id(url)
3149 return super(YoutubeShowIE, self)._real_extract(
3150 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3151
3152
b2e8bc1b 3153class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3154 """
25f14e9f 3155 Base class for feed extractors
d7ae0639
JMF
3156 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3157 """
b2e8bc1b 3158 _LOGIN_REQUIRED = True
d7ae0639
JMF
3159
3160 @property
3161 def IE_NAME(self):
78caa52a 3162 return 'youtube:%s' % self._FEED_NAME
04cc9617 3163
81f0259b 3164 def _real_initialize(self):
b2e8bc1b 3165 self._login()
81f0259b 3166
3853309f 3167 def _entries(self, page):
2bc43303
JMF
3168 # The extraction process is the same as for playlists, but the regex
3169 # for the video ids doesn't contain an index
3170 ids = []
3171 more_widget_html = content_html = page
2bc43303
JMF
3172 for page_num in itertools.count(1):
3173 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3174
3175 # 'recommended' feed has infinite 'load more' and each new portion spins
3176 # the same videos in (sometimes) slightly different order, so we'll check
3177 # for unicity and break when portion has no new videos
3853309f 3178 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3179 if not new_ids:
3180 break
3181
2bc43303
JMF
3182 ids.extend(new_ids)
3183
3853309f
S
3184 for entry in self._ids_to_results(new_ids):
3185 yield entry
3186
2bc43303
JMF
3187 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3188 if not mobj:
3189 break
3190
3191 more = self._download_json(
25f14e9f 3192 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3193 'Downloading page #%s' % page_num,
3194 transform_source=uppercase_escape)
3195 content_html = more['content_html']
3196 more_widget_html = more['load_more_widget_html']
3197
3853309f
S
3198 def _real_extract(self, url):
3199 page = self._download_webpage(
3200 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3201 self._PLAYLIST_TITLE)
25f14e9f 3202 return self.playlist_result(
3853309f 3203 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3204
3205
3206class YoutubeWatchLaterIE(YoutubePlaylistIE):
3207 IE_NAME = 'youtube:watchlater'
3208 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3209 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3210
bc7a9cd8
S
3211 _TESTS = [{
3212 'url': 'https://www.youtube.com/playlist?list=WL',
3213 'only_matching': True,
3214 }, {
3215 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3216 'only_matching': True,
3217 }]
25f14e9f
S
3218
3219 def _real_extract(self, url):
7e5dc339 3220 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3221 if video:
3222 return video
dacb3a86
S
3223 _, playlist = self._extract_playlist('WL')
3224 return playlist
f459d170 3225
5f6a1245 3226
c626a3d9 3227class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3228 IE_NAME = 'youtube:favorites'
f3a34072 3229 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3230 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3231 _LOGIN_REQUIRED = True
3232
3233 def _real_extract(self, url):
3234 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3235 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3236 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3237
3238
25f14e9f
S
3239class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3240 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3241 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3242 _FEED_NAME = 'recommended'
3243 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3244
1ed5b5c9 3245
25f14e9f
S
3246class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3247 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3248 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3249 _FEED_NAME = 'subscriptions'
3250 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3251
1ed5b5c9 3252
25f14e9f
S
3253class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3254 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3255 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3256 _FEED_NAME = 'history'
3257 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3258
3259
15870e90
PH
3260class YoutubeTruncatedURLIE(InfoExtractor):
3261 IE_NAME = 'youtube:truncated_url'
3262 IE_DESC = False # Do not list
975d35db 3263 _VALID_URL = r'''(?x)
b95aab84
PH
3264 (?:https?://)?
3265 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3266 (?:watch\?(?:
c4808c60 3267 feature=[a-z_]+|
b95aab84
PH
3268 annotation_id=annotation_[^&]+|
3269 x-yt-cl=[0-9]+|
c1708b89 3270 hl=[^&]*|
287be8c6 3271 t=[0-9]+
b95aab84
PH
3272 )?
3273 |
3274 attribution_link\?a=[^&]+
3275 )
3276 $
975d35db 3277 '''
15870e90 3278
c4808c60 3279 _TESTS = [{
2d3d2997 3280 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3281 'only_matching': True,
dc2fc736 3282 }, {
2d3d2997 3283 'url': 'https://www.youtube.com/watch?',
dc2fc736 3284 'only_matching': True,
b95aab84
PH
3285 }, {
3286 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3287 'only_matching': True,
3288 }, {
3289 'url': 'https://www.youtube.com/watch?feature=foo',
3290 'only_matching': True,
c1708b89
PH
3291 }, {
3292 'url': 'https://www.youtube.com/watch?hl=en-GB',
3293 'only_matching': True,
287be8c6
PH
3294 }, {
3295 'url': 'https://www.youtube.com/watch?t=2372',
3296 'only_matching': True,
c4808c60
PH
3297 }]
3298
15870e90
PH
3299 def _real_extract(self, url):
3300 raise ExtractorError(
78caa52a
PH
3301 'Did you forget to quote the URL? Remember that & is a meta '
3302 'character in most shells, so you want to put the URL in quotes, '
3303 'like youtube-dl '
2d3d2997 3304 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3305 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3306 expected=True)
772fd5cc
PH
3307
3308
3309class YoutubeTruncatedIDIE(InfoExtractor):
3310 IE_NAME = 'youtube:truncated_id'
3311 IE_DESC = False # Do not list
b95aab84 3312 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3313
3314 _TESTS = [{
3315 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3316 'only_matching': True,
3317 }]
3318
3319 def _real_extract(self, url):
3320 video_id = self._match_id(url)
3321 raise ExtractorError(
3322 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3323 expected=True)