]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
[skip travis] mention trim-file-name
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
351f37c0 33 extract_attributes,
c5e8d7af 34 ExtractorError,
2d30521a 35 float_or_none,
4bb4a188
PH
36 get_element_by_attribute,
37 get_element_by_id,
dd27fd17 38 int_or_none,
94278f72 39 mimetype2ext,
4bb4a188 40 orderedSet,
6310acf5 41 parse_codecs,
b84071c0 42 parse_count,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
66b48727 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
d84b21b4
S
74 _YOUTUBE_CLIENT_HEADERS = {
75 'x-youtube-client-name': '1',
76 'x-youtube-client-version': '1.20200609.04.02',
77 }
78
b2e8bc1b 79 def _set_language(self):
810fb84d 80 self._set_cookie(
ee0b726c 81 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 82 # YouTube sets the expire time to about two months
810fb84d 83 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 84
25f14e9f
S
85 def _ids_to_results(self, ids):
86 return [
87 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 for vid_id in ids]
89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
68217024 98 username, password = self._get_login_info()
b2e8bc1b
JMF
99 # No authentication to be performed
100 if username is None:
70d35d16 101 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 102 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 103 return True
b2e8bc1b 104
7cc3570e
PH
105 login_page = self._download_webpage(
106 self._LOGIN_URL, None,
69ea8ca4
PH
107 note='Downloading login page',
108 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
109 if login_page is False:
110 return
b2e8bc1b 111
1212e997 112 login_form = self._hidden_inputs(login_page)
c5e8d7af 113
e00eb564
S
114 def req(url, f_req, note, errnote):
115 data = login_form.copy()
116 data.update({
117 'pstMsg': 1,
118 'checkConnection': 'youtube',
119 'checkedDomains': 'youtube',
120 'hl': 'en',
121 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 122 'f.req': json.dumps(f_req),
e00eb564
S
123 'flowName': 'GlifWebSignIn',
124 'flowEntry': 'ServiceLogin',
baf67a60
S
125 # TODO: reverse actual botguard identifier generation algo
126 'bgRequest': '["identifier",""]',
041bc3ad 127 })
e00eb564
S
128 return self._download_json(
129 url, None, note=note, errnote=errnote,
130 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
131 fatal=False,
132 data=urlencode_postdata(data), headers={
133 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
134 'Google-Accounts-XSRF': 1,
135 })
136
3995d37d
S
137 def warn(message):
138 self._downloader.report_warning(message)
139
140 lookup_req = [
141 username,
142 None, [], None, 'US', None, None, 2, False, True,
143 [
144 None, None,
145 [2, 1, None, 1,
146 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
147 None, [], 4],
148 1, [None, None, []], None, None, None, True
149 ],
150 username,
151 ]
152
e00eb564 153 lookup_results = req(
3995d37d 154 self._LOOKUP_URL, lookup_req,
e00eb564
S
155 'Looking up account info', 'Unable to look up account info')
156
157 if lookup_results is False:
158 return False
041bc3ad 159
3995d37d
S
160 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
161 if not user_hash:
162 warn('Unable to extract user hash')
163 return False
164
165 challenge_req = [
166 user_hash,
167 None, 1, None, [1, None, None, None, [password, None, True]],
168 [
169 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
170 1, [None, None, []], None, None, None, True
171 ]]
83317f69 172
3995d37d
S
173 challenge_results = req(
174 self._CHALLENGE_URL, challenge_req,
175 'Logging in', 'Unable to log in')
83317f69 176
3995d37d 177 if challenge_results is False:
e00eb564 178 return
83317f69 179
3995d37d
S
180 login_res = try_get(challenge_results, lambda x: x[0][5], list)
181 if login_res:
182 login_msg = try_get(login_res, lambda x: x[5], compat_str)
183 warn(
184 'Unable to login: %s' % 'Invalid password'
185 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
186 return False
187
188 res = try_get(challenge_results, lambda x: x[0][-1], list)
189 if not res:
190 warn('Unable to extract result entry')
191 return False
192
9a6628aa
S
193 login_challenge = try_get(res, lambda x: x[0][0], list)
194 if login_challenge:
195 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
196 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
197 # SEND_SUCCESS - TFA code has been successfully sent to phone
198 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 199 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
200 if status == 'QUOTA_EXCEEDED':
201 warn('Exceeded the limit of TFA codes, try later')
202 return False
203
204 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
205 if not tl:
206 warn('Unable to extract TL')
207 return False
208
209 tfa_code = self._get_tfa_info('2-step verification code')
210
211 if not tfa_code:
212 warn(
213 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
214 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
215 return False
216
217 tfa_code = remove_start(tfa_code, 'G-')
218
219 tfa_req = [
220 user_hash, None, 2, None,
221 [
222 9, None, None, None, None, None, None, None,
223 [None, tfa_code, True, 2]
224 ]]
225
226 tfa_results = req(
227 self._TFA_URL.format(tl), tfa_req,
228 'Submitting TFA code', 'Unable to submit TFA code')
229
230 if tfa_results is False:
231 return False
232
233 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
234 if tfa_res:
235 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
236 warn(
237 'Unable to finish TFA: %s' % 'Invalid TFA code'
238 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
239 return False
240
241 check_cookie_url = try_get(
242 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
243 else:
244 CHALLENGES = {
245 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
246 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
247 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
248 }
249 challenge = CHALLENGES.get(
250 challenge_str,
251 '%s returned error %s.' % (self.IE_NAME, challenge_str))
252 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
253 return False
3995d37d
S
254 else:
255 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
256
257 if not check_cookie_url:
258 warn('Unable to extract CheckCookie URL')
259 return False
e00eb564
S
260
261 check_cookie_results = self._download_webpage(
3995d37d
S
262 check_cookie_url, None, 'Checking cookie', fatal=False)
263
264 if check_cookie_results is False:
265 return False
e00eb564 266
3995d37d
S
267 if 'https://myaccount.google.com/' not in check_cookie_results:
268 warn('Unable to log in')
b2e8bc1b 269 return False
e00eb564 270
b2e8bc1b
JMF
271 return True
272
30226342 273 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
274 query = kwargs.get('query', {}).copy()
275 query['disable_polymer'] = 'true'
276 kwargs['query'] = query
30226342 277 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
278 *args, **compat_kwargs(kwargs))
279
b2e8bc1b
JMF
280 def _real_initialize(self):
281 if self._downloader is None:
282 return
42939b61 283 self._set_language()
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
8377574c 287
8e7aad20 288class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 289 # Extract entries from page with "Load more" button
648e6a1f
S
290 def _entries(self, page, playlist_id):
291 more_widget_html = content_html = page
292 for page_num in itertools.count(1):
061a75ed
S
293 for entry in self._process_page(content_html):
294 yield entry
648e6a1f
S
295
296 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
297 if not mobj:
298 break
299
f8c55c66
S
300 count = 0
301 retries = 3
302 while count <= retries:
303 try:
304 # Downloading page may result in intermittent 5xx HTTP error
305 # that is usually worked around with a retry
306 more = self._download_json(
07af16b9 307 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
f8c55c66
S
308 'Downloading page #%s%s'
309 % (page_num, ' (retry #%d)' % count if count else ''),
d84b21b4
S
310 transform_source=uppercase_escape,
311 headers=self._YOUTUBE_CLIENT_HEADERS)
f8c55c66
S
312 break
313 except ExtractorError as e:
314 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
315 count += 1
316 if count <= retries:
317 continue
318 raise
319
648e6a1f
S
320 content_html = more['content_html']
321 if not content_html.strip():
322 # Some webpages show a "Load more" button but they don't
323 # have more videos
324 break
325 more_widget_html = more['load_more_widget_html']
326
061a75ed
S
327
328class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
329 def _process_page(self, content):
330 for video_id, video_title in self.extract_videos_from_page(content):
331 yield self.url_result(video_id, 'Youtube', video_id, video_title)
332
351f37c0
S
333 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
334 for mobj in re.finditer(video_re, page):
648e6a1f
S
335 # The link with index 0 is not the first video of the playlist (not sure if still actual)
336 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
337 continue
338 video_id = mobj.group('id')
351f37c0
S
339 video_title = unescapeHTML(
340 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
341 if video_title:
342 video_title = video_title.strip()
351f37c0
S
343 if video_title == '► Play all':
344 video_title = None
648e6a1f
S
345 try:
346 idx = ids_in_page.index(video_id)
347 if video_title and not titles_in_page[idx]:
348 titles_in_page[idx] = video_title
349 except ValueError:
350 ids_in_page.append(video_id)
351 titles_in_page.append(video_title)
351f37c0
S
352
353 def extract_videos_from_page(self, page):
354 ids_in_page = []
355 titles_in_page = []
356 self.extract_videos_from_page_impl(
357 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
358 return zip(ids_in_page, titles_in_page)
359
360
061a75ed
S
361class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
362 def _process_page(self, content):
6dee688e
S
363 for playlist_id in orderedSet(re.findall(
364 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
365 content)):
061a75ed
S
366 yield self.url_result(
367 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
368
0c148415
S
369 def _real_extract(self, url):
370 playlist_id = self._match_id(url)
371 webpage = self._download_webpage(url, playlist_id)
0c148415 372 title = self._og_search_title(webpage, fatal=False)
061a75ed 373 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
374
375
360e1ca5 376class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 377 IE_DESC = 'YouTube.com'
cb7dfeea 378 _VALID_URL = r"""(?x)^
c5e8d7af 379 (
edb53e2d 380 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 381 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 382 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 383 (?:www\.)?pwnyoutube\.com/|
8b561bfc 384 (?:www\.)?hooktube\.com/|
f7000f3a 385 (?:www\.)?yourepeat\.com/|
e69ae5b9 386 tube\.majestyc\.net/|
ba036333 387 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 388 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 389 (?:(?:www|no)\.)?invidiou\.sh/|
390 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 391 (?:www\.)?invidious\.kabi\.tk/|
ba036333 392 (?:www\.)?invidious\.13ad\.de/|
791d2e81 393 (?:www\.)?invidious\.mastodon\.host/|
494d664e 394 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 395 (?:www\.)?invidious\.drycat\.fr/|
ba036333 396 (?:www\.)?tube\.poal\.co/|
8ae113ca 397 (?:www\.)?vid\.wxzm\.sx/|
384bf91f 398 (?:www\.)?yewtu\.be/|
494d664e 399 (?:www\.)?yt\.elukerio\.org/|
894b3826 400 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 401 (?:www\.)?invidious\.ggc-project\.de/|
402 (?:www\.)?yt\.maisputain\.ovh/|
403 (?:www\.)?invidious\.13ad\.de/|
404 (?:www\.)?invidious\.toot\.koeln/|
405 (?:www\.)?invidious\.fdn\.fr/|
406 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 407 (?:www\.)?kgg2m7yk5aybusll\.onion/|
408 (?:www\.)?qklhadlycap4cnod\.onion/|
409 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
410 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
411 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
412 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 413 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 414 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 415 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
416 (?:.*?\#/)? # handle anchor (#/) redirect urls
417 (?: # the various things that can precede the ID:
ac7553d0 418 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 419 |(?: # or the v= param in all its forms
f7000f3a 420 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 421 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 422 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
423 v=
424 )
f4b05232 425 ))
cbaed4bb
S
426 |(?:
427 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
428 vid\.plus| # or vid.plus/xxxx
429 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 430 )/
edb53e2d 431 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 432 )
c5e8d7af 433 )? # all until now is optional -> you can pass the naked ID
8963d9c2 434 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
435 (?!.*?\blist=
436 (?:
437 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
438 WL # WL are handled by the watch later IE
439 )
440 )
c5e8d7af 441 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 442 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 443 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
444 _PLAYER_INFO_RE = (
445 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
446 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
447 )
2c62dc26 448 _formats = {
c2d3cb4c 449 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
450 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
451 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
452 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
453 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
454 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
455 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
456 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 457 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 458 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
459 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
460 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
461 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
462 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
463 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 464 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 465 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
466 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 467
468
469 # 3D videos
c2d3cb4c 470 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
471 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
472 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
473 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 474 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
475 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
476 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 477
96fb5605 478 # Apple HTTP Live Streaming
11f12195 479 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 480 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
481 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
482 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
483 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
484 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 485 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
486 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
487
488 # DASH mp4 video
d23028a8
S
489 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
491 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
492 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
493 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 494 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
495 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
496 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
497 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
498 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
499 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
500 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 501
f6f1fc92 502 # Dash mp4 audio
d23028a8
S
503 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
504 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
505 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
506 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
507 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
508 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
509 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
510
511 # Dash webm
d23028a8
S
512 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
513 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
514 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
515 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
516 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
517 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
518 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
519 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
521 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
524 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
525 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 527 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
528 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
529 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
530 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
531 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
532 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
534
535 # Dash webm audio
d23028a8
S
536 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
537 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 538
0857baad 539 # Dash webm audio with opus inside
d23028a8
S
540 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
541 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
542 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 543
ce6b9a2d
PH
544 # RTMP (unnamed)
545 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
546
547 # av01 video only formats sometimes served with "unknown" codecs
548 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
549 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
550 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
551 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 552 }
84da5d84 553 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 554
fd5c4aab
S
555 _GEO_BYPASS = False
556
78caa52a 557 IE_NAME = 'youtube'
2eb88d95
PH
558 _TESTS = [
559 {
2d3d2997 560 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
561 'info_dict': {
562 'id': 'BaW_jenozKc',
563 'ext': 'mp4',
3867038a 564 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
565 'uploader': 'Philipp Hagemeister',
566 'uploader_id': 'phihag',
ec85ded8 567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
568 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
569 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 570 'upload_date': '20121002',
3867038a 571 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 572 'categories': ['Science & Technology'],
3867038a 573 'tags': ['youtube-dl'],
556dbe7f 574 'duration': 10,
dbdaaa23 575 'view_count': int,
3e7c1224
PH
576 'like_count': int,
577 'dislike_count': int,
7c80519c 578 'start_time': 1,
297a564b 579 'end_time': 9,
2eb88d95 580 }
0e853ca4 581 },
0e853ca4 582 {
2d3d2997 583 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
584 'note': 'Test generic use_cipher_signature video (#897)',
585 'info_dict': {
586 'id': 'UxxajLWwzqY',
587 'ext': 'mp4',
588 'upload_date': '20120506',
589 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 590 'alt_title': 'I Love It (feat. Charli XCX)',
5429d6a9 591 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
000b6b5a
S
592 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
593 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
594 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 595 'duration': 180,
4bc3a23e
PH
596 'uploader': 'Icona Pop',
597 'uploader_id': 'IconaPop',
ec85ded8 598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 599 'creator': 'Icona Pop',
936784b2
S
600 'track': 'I Love It (feat. Charli XCX)',
601 'artist': 'Icona Pop',
2eb88d95 602 }
c108eb73
JMF
603 },
604 {
4bc3a23e
PH
605 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
606 'note': 'Test VEVO video with age protection (#956)',
607 'info_dict': {
608 'id': '07FYdnEawAQ',
609 'ext': 'mp4',
610 'upload_date': '20130703',
4fe54c12 611 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 612 'alt_title': 'Tunnel Vision',
4fe54c12 613 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 614 'duration': 419,
4bc3a23e
PH
615 'uploader': 'justintimberlakeVEVO',
616 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 618 'creator': 'Justin Timberlake',
7e72694b 619 'track': 'Tunnel Vision',
936784b2 620 'artist': 'Justin Timberlake',
34952f09 621 'age_limit': 18,
c108eb73
JMF
622 }
623 },
fccd3771 624 {
4bc3a23e
PH
625 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
626 'note': 'Embed-only video (#1746)',
627 'info_dict': {
628 'id': 'yZIXLfi8CZQ',
629 'ext': 'mp4',
630 'upload_date': '20120608',
631 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
632 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
633 'uploader': 'SET India',
94bfcd23 634 'uploader_id': 'setindia',
ec85ded8 635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 636 'age_limit': 18,
fccd3771
PH
637 }
638 },
11b56058 639 {
2d3d2997 640 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
641 'note': 'Use the first video ID in the URL',
642 'info_dict': {
643 'id': 'BaW_jenozKc',
644 'ext': 'mp4',
3867038a 645 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
646 'uploader': 'Philipp Hagemeister',
647 'uploader_id': 'phihag',
ec85ded8 648 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 649 'upload_date': '20121002',
3867038a 650 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 651 'categories': ['Science & Technology'],
3867038a 652 'tags': ['youtube-dl'],
556dbe7f 653 'duration': 10,
dbdaaa23 654 'view_count': int,
11b56058
PM
655 'like_count': int,
656 'dislike_count': int,
34a7de29
S
657 },
658 'params': {
659 'skip_download': True,
660 },
11b56058 661 },
dd27fd17 662 {
2d3d2997 663 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
664 'note': '256k DASH audio (format 141) via DASH manifest',
665 'info_dict': {
666 'id': 'a9LDPn-MO4I',
667 'ext': 'm4a',
668 'upload_date': '20121002',
669 'uploader_id': '8KVIDEO',
ec85ded8 670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
671 'description': '',
672 'uploader': '8KVIDEO',
673 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 674 },
4bc3a23e
PH
675 'params': {
676 'youtube_include_dash_manifest': True,
677 'format': '141',
4919603f 678 },
de3c7fe0 679 'skip': 'format 141 not served anymore',
dd27fd17 680 },
3489b7d2
JMF
681 # DASH manifest with encrypted signature
682 {
78caa52a
PH
683 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
684 'info_dict': {
685 'id': 'IB3lcPjvWLA',
686 'ext': 'm4a',
4fe54c12
S
687 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
688 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 689 'duration': 244,
78caa52a
PH
690 'uploader': 'AfrojackVEVO',
691 'uploader_id': 'AfrojackVEVO',
692 'upload_date': '20131011',
3489b7d2 693 },
4bc3a23e 694 'params': {
78caa52a 695 'youtube_include_dash_manifest': True,
de3c7fe0 696 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
697 },
698 },
aaeb86f6
S
699 # JS player signature function name containing $
700 {
701 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
702 'info_dict': {
703 'id': 'nfWlot6h_JM',
704 'ext': 'm4a',
705 'title': 'Taylor Swift - Shake It Off',
5429d6a9 706 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
556dbe7f 707 'duration': 242,
aaeb86f6
S
708 'uploader': 'TaylorSwiftVEVO',
709 'uploader_id': 'TaylorSwiftVEVO',
710 'upload_date': '20140818',
711 },
712 'params': {
713 'youtube_include_dash_manifest': True,
de3c7fe0 714 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
715 },
716 },
aa79ac0c
PH
717 # Controversy video
718 {
719 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
720 'info_dict': {
721 'id': 'T4XJQO3qol8',
722 'ext': 'mp4',
556dbe7f 723 'duration': 219,
aa79ac0c 724 'upload_date': '20100909',
4fe54c12 725 'uploader': 'Amazing Atheist',
aa79ac0c 726 'uploader_id': 'TheAmazingAtheist',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
728 'title': 'Burning Everyone\'s Koran',
729 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
730 }
c522adb1
JMF
731 },
732 # Normal age-gate video (No vevo, embed allowed)
733 {
2d3d2997 734 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
735 'info_dict': {
736 'id': 'HtVdAasjOgU',
737 'ext': 'mp4',
738 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 739 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 740 'duration': 142,
c522adb1
JMF
741 'uploader': 'The Witcher',
742 'uploader_id': 'WitcherGame',
ec85ded8 743 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 744 'upload_date': '20140605',
34952f09 745 'age_limit': 18,
c522adb1
JMF
746 },
747 },
fccae2b9
S
748 # Age-gate video with encrypted signature
749 {
2d3d2997 750 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
751 'info_dict': {
752 'id': '6kLq3WMV1nU',
4fe54c12 753 'ext': 'mp4',
fccae2b9
S
754 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
755 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 756 'duration': 246,
fccae2b9
S
757 'uploader': 'LloydVEVO',
758 'uploader_id': 'LloydVEVO',
ec85ded8 759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 760 'upload_date': '20110629',
34952f09 761 'age_limit': 18,
fccae2b9
S
762 },
763 },
067aa17e 764 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 765 # YouTube Red ad is not captured for creator
774e208f
PH
766 {
767 'url': '__2ABJjxzNo',
768 'info_dict': {
769 'id': '__2ABJjxzNo',
770 'ext': 'mp4',
556dbe7f 771 'duration': 266,
774e208f
PH
772 'upload_date': '20100430',
773 'uploader_id': 'deadmau5',
ec85ded8 774 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
5429d6a9 775 'creator': 'Dada Life, deadmau5',
774e208f
PH
776 'description': 'md5:12c56784b8032162bb936a5f76d55360',
777 'uploader': 'deadmau5',
778 'title': 'Deadmau5 - Some Chords (HD)',
5429d6a9 779 'alt_title': 'This Machine Kills Some Chords',
774e208f
PH
780 },
781 'expected_warnings': [
782 'DASH manifest missing',
783 ]
e52a40ab 784 },
067aa17e 785 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
786 {
787 'url': 'lqQg6PlCWgI',
788 'info_dict': {
789 'id': 'lqQg6PlCWgI',
790 'ext': 'mp4',
556dbe7f 791 'duration': 6085,
90227264 792 'upload_date': '20150827',
cbe2bd91 793 'uploader_id': 'olympic',
ec85ded8 794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 795 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 796 'uploader': 'Olympic',
cbe2bd91
PH
797 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
798 },
799 'params': {
800 'skip_download': 'requires avconv',
e52a40ab 801 }
cbe2bd91 802 },
6271f1ca
PH
803 # Non-square pixels
804 {
805 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
806 'info_dict': {
807 'id': '_b-2C3KPAM0',
808 'ext': 'mp4',
809 'stretched_ratio': 16 / 9.,
556dbe7f 810 'duration': 85,
6271f1ca
PH
811 'upload_date': '20110310',
812 'uploader_id': 'AllenMeow',
ec85ded8 813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 814 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 815 'uploader': '孫ᄋᄅ',
6271f1ca
PH
816 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
817 },
06b491eb
S
818 },
819 # url_encoded_fmt_stream_map is empty string
820 {
821 'url': 'qEJwOuvDf7I',
822 'info_dict': {
823 'id': 'qEJwOuvDf7I',
f57b7835 824 'ext': 'webm',
06b491eb
S
825 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
826 'description': '',
827 'upload_date': '20150404',
828 'uploader_id': 'spbelect',
829 'uploader': 'Наблюдатели Петербурга',
830 },
831 'params': {
832 'skip_download': 'requires avconv',
e323cf3f
S
833 },
834 'skip': 'This live event has ended.',
06b491eb 835 },
067aa17e 836 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
837 {
838 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
839 'info_dict': {
840 'id': 'FIl7x6_3R5Y',
eb6793ba 841 'ext': 'webm',
da77d856
S
842 'title': 'md5:7b81415841e02ecd4313668cde88737a',
843 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 844 'duration': 220,
da77d856
S
845 'upload_date': '20150625',
846 'uploader_id': 'dorappi2000',
ec85ded8 847 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 848 'uploader': 'dorappi2000',
eb6793ba 849 'formats': 'mincount:31',
da77d856 850 },
eb6793ba 851 'skip': 'not actual anymore',
2ee8f5d8 852 },
8a1a26ce
YCH
853 # DASH manifest with segment_list
854 {
855 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
856 'md5': '8ce563a1d667b599d21064e982ab9e31',
857 'info_dict': {
858 'id': 'CsmdDsKjzN8',
859 'ext': 'mp4',
17ee98e1 860 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
861 'uploader': 'Airtek',
862 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
863 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
864 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
865 },
866 'params': {
867 'youtube_include_dash_manifest': True,
868 'format': '135', # bestvideo
be49068d
S
869 },
870 'skip': 'This live event has ended.',
2ee8f5d8 871 },
cf7e015f
S
872 {
873 # Multifeed videos (multiple cameras), URL is for Main Camera
874 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
875 'info_dict': {
876 'id': 'jqWvoWXjCVs',
877 'title': 'teamPGP: Rocket League Noob Stream',
878 'description': 'md5:dc7872fb300e143831327f1bae3af010',
879 },
880 'playlist': [{
881 'info_dict': {
882 'id': 'jqWvoWXjCVs',
883 'ext': 'mp4',
884 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
885 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 886 'duration': 7335,
cf7e015f
S
887 'upload_date': '20150721',
888 'uploader': 'Beer Games Beer',
889 'uploader_id': 'beergamesbeer',
ec85ded8 890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 891 'license': 'Standard YouTube License',
cf7e015f
S
892 },
893 }, {
894 'info_dict': {
895 'id': '6h8e8xoXJzg',
896 'ext': 'mp4',
897 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
898 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 899 'duration': 7337,
cf7e015f
S
900 'upload_date': '20150721',
901 'uploader': 'Beer Games Beer',
902 'uploader_id': 'beergamesbeer',
ec85ded8 903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 904 'license': 'Standard YouTube License',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
908 'id': 'PUOgX5z9xZw',
909 'ext': 'mp4',
910 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
911 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 912 'duration': 7337,
cf7e015f
S
913 'upload_date': '20150721',
914 'uploader': 'Beer Games Beer',
915 'uploader_id': 'beergamesbeer',
ec85ded8 916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 917 'license': 'Standard YouTube License',
cf7e015f
S
918 },
919 }, {
920 'info_dict': {
921 'id': 'teuwxikvS5k',
922 'ext': 'mp4',
923 'title': 'teamPGP: Rocket League Noob Stream (zim)',
924 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 925 'duration': 7334,
cf7e015f
S
926 'upload_date': '20150721',
927 'uploader': 'Beer Games Beer',
928 'uploader_id': 'beergamesbeer',
ec85ded8 929 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 930 'license': 'Standard YouTube License',
cf7e015f
S
931 },
932 }],
933 'params': {
934 'skip_download': True,
935 },
4fe54c12 936 'skip': 'This video is not available.',
cbaed4bb 937 },
f9f49d87 938 {
067aa17e 939 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
940 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
941 'info_dict': {
942 'id': 'gVfLd0zydlo',
943 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
944 },
945 'playlist_count': 2,
be49068d 946 'skip': 'Not multifeed anymore',
f9f49d87 947 },
cbaed4bb 948 {
2d3d2997 949 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 950 'only_matching': True,
0e49d9a6 951 },
6d4fc66b 952 {
2d3d2997 953 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
954 'only_matching': True,
955 },
0e49d9a6 956 {
067aa17e 957 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 958 # Also tests cut-off URL expansion in video description (see
067aa17e
S
959 # https://github.com/ytdl-org/youtube-dl/issues/1892,
960 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
961 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
962 'info_dict': {
963 'id': 'lsguqyKfVQg',
964 'ext': 'mp4',
965 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 966 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 967 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 968 'duration': 133,
0e49d9a6
LL
969 'upload_date': '20151119',
970 'uploader_id': 'IronSoulElf',
ec85ded8 971 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 972 'uploader': 'IronSoulElf',
eb6793ba
S
973 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
974 'track': 'Dark Walk - Position Music',
975 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 976 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
977 },
978 'params': {
979 'skip_download': True,
980 },
981 },
61f92af1 982 {
067aa17e 983 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
984 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
985 'only_matching': True,
986 },
313dfc45
LL
987 {
988 # Video with yt:stretch=17:0
989 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
990 'info_dict': {
991 'id': 'Q39EVAstoRM',
992 'ext': 'mp4',
993 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
994 'description': 'md5:ee18a25c350637c8faff806845bddee9',
995 'upload_date': '20151107',
996 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
997 'uploader': 'CH GAMER DROID',
998 },
999 'params': {
1000 'skip_download': True,
1001 },
be49068d 1002 'skip': 'This video does not exist.',
313dfc45 1003 },
7caf9830
S
1004 {
1005 # Video licensed under Creative Commons
1006 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1007 'info_dict': {
1008 'id': 'M4gD1WSo5mA',
1009 'ext': 'mp4',
1010 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1011 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1012 'duration': 721,
7caf9830
S
1013 'upload_date': '20150127',
1014 'uploader_id': 'BerkmanCenter',
ec85ded8 1015 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1016 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1017 'license': 'Creative Commons Attribution license (reuse allowed)',
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
1022 },
fd050249
S
1023 {
1024 # Channel-like uploader_url
1025 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1026 'info_dict': {
1027 'id': 'eQcmzGIKrzg',
1028 'ext': 'mp4',
1029 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1030 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1031 'duration': 4060,
fd050249 1032 'upload_date': '20151119',
eb6793ba 1033 'uploader': 'Bernie Sanders',
fd050249 1034 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1035 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1036 'license': 'Creative Commons Attribution license (reuse allowed)',
1037 },
1038 'params': {
1039 'skip_download': True,
1040 },
1041 },
040ac686
S
1042 {
1043 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1044 'only_matching': True,
7f29cf54
S
1045 },
1046 {
067aa17e 1047 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1048 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1049 'only_matching': True,
6496ccb4
S
1050 },
1051 {
1052 # Rental video preview
1053 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1054 'info_dict': {
1055 'id': 'uGpuVWrhIzE',
1056 'ext': 'mp4',
1057 'title': 'Piku - Trailer',
1058 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1059 'upload_date': '20150811',
1060 'uploader': 'FlixMatrix',
1061 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1062 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1063 'license': 'Standard YouTube License',
1064 },
1065 'params': {
1066 'skip_download': True,
1067 },
eb6793ba 1068 'skip': 'This video is not available.',
022a5d66 1069 },
12afdc2a
S
1070 {
1071 # YouTube Red video with episode data
1072 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1073 'info_dict': {
1074 'id': 'iqKdEhx-dD4',
1075 'ext': 'mp4',
1076 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1077 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1078 'duration': 2085,
12afdc2a
S
1079 'upload_date': '20170118',
1080 'uploader': 'Vsauce',
1081 'uploader_id': 'Vsauce',
1082 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1083 'series': 'Mind Field',
1084 'season_number': 1,
1085 'episode_number': 1,
1086 },
1087 'params': {
1088 'skip_download': True,
1089 },
1090 'expected_warnings': [
1091 'Skipping DASH manifest',
1092 ],
1093 },
c7121fa7
S
1094 {
1095 # The following content has been identified by the YouTube community
1096 # as inappropriate or offensive to some audiences.
1097 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1098 'info_dict': {
1099 'id': '6SJNVb0GnPI',
1100 'ext': 'mp4',
1101 'title': 'Race Differences in Intelligence',
1102 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1103 'duration': 965,
1104 'upload_date': '20140124',
1105 'uploader': 'New Century Foundation',
1106 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1108 },
1109 'params': {
1110 'skip_download': True,
1111 },
1112 },
022a5d66
S
1113 {
1114 # itag 212
1115 'url': '1t24XAntNCY',
1116 'only_matching': True,
fd5c4aab
S
1117 },
1118 {
1119 # geo restricted to JP
1120 'url': 'sJL6WA-aGkQ',
1121 'only_matching': True,
1122 },
d0ba5587
S
1123 {
1124 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1125 'only_matching': True,
1126 },
cd5a74a2
S
1127 {
1128 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1129 'only_matching': True,
1130 },
825cd268
RA
1131 {
1132 # DRM protected
1133 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1134 'only_matching': True,
4fe54c12
S
1135 },
1136 {
1137 # Video with unsupported adaptive stream type formats
1138 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1139 'info_dict': {
1140 'id': 'Z4Vy8R84T1U',
1141 'ext': 'mp4',
1142 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1143 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1144 'duration': 433,
1145 'upload_date': '20130923',
1146 'uploader': 'Amelia Putri Harwita',
1147 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1148 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1149 'formats': 'maxcount:10',
1150 },
1151 'params': {
1152 'skip_download': True,
1153 'youtube_include_dash_manifest': False,
1154 },
5429d6a9 1155 'skip': 'not actual anymore',
5caabd3c 1156 },
1157 {
822b9d9c 1158 # Youtube Music Auto-generated description
5caabd3c 1159 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1160 'info_dict': {
1161 'id': 'MgNrAu2pzNs',
1162 'ext': 'mp4',
1163 'title': 'Voyeur Girl',
1164 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1165 'upload_date': '20190312',
5429d6a9
S
1166 'uploader': 'Stephen - Topic',
1167 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1168 'artist': 'Stephen',
1169 'track': 'Voyeur Girl',
1170 'album': 'it\'s too much love to know my dear',
1171 'release_date': '20190313',
1172 'release_year': 2019,
1173 },
1174 'params': {
1175 'skip_download': True,
1176 },
1177 },
1178 {
822b9d9c 1179 # Youtube Music Auto-generated description
5caabd3c 1180 # Retrieve 'artist' field from 'Artist:' in video description
1181 # when it is present on youtube music video
5caabd3c 1182 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1183 'info_dict': {
1184 'id': 'k0jLE7tTwjY',
1185 'ext': 'mp4',
1186 'title': 'Latch Feat. Sam Smith',
1187 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1188 'upload_date': '20150110',
1189 'uploader': 'Various Artists - Topic',
1190 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1191 'artist': 'Disclosure',
1192 'track': 'Latch Feat. Sam Smith',
1193 'album': 'Latch Featuring Sam Smith',
1194 'release_date': '20121008',
1195 'release_year': 2012,
1196 },
1197 'params': {
1198 'skip_download': True,
1199 },
1200 },
1201 {
822b9d9c 1202 # Youtube Music Auto-generated description
5caabd3c 1203 # handle multiple artists on youtube music video
1204 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1205 'info_dict': {
1206 'id': '74qn0eJSjpA',
1207 'ext': 'mp4',
1208 'title': 'Eastside',
1209 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1210 'upload_date': '20180710',
1211 'uploader': 'Benny Blanco - Topic',
1212 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1213 'artist': 'benny blanco, Halsey, Khalid',
1214 'track': 'Eastside',
1215 'album': 'Eastside',
1216 'release_date': '20180713',
1217 'release_year': 2018,
1218 },
1219 'params': {
1220 'skip_download': True,
1221 },
1222 },
1223 {
822b9d9c 1224 # Youtube Music Auto-generated description
5caabd3c 1225 # handle youtube music video with release_year and no release_date
1226 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1227 'info_dict': {
1228 'id': '-hcAI0g-f5M',
1229 'ext': 'mp4',
1230 'title': 'Put It On Me',
5429d6a9 1231 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
5caabd3c 1232 'upload_date': '20180426',
1233 'uploader': 'Matt Maeson - Topic',
1234 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1235 'artist': 'Matt Maeson',
1236 'track': 'Put It On Me',
1237 'album': 'The Hearse',
1238 'release_date': None,
1239 'release_year': 2018,
1240 },
1241 'params': {
1242 'skip_download': True,
1243 },
1244 },
66b48727
RA
1245 {
1246 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1247 'only_matching': True,
1248 },
011e75e6
S
1249 {
1250 # invalid -> valid video id redirection
1251 'url': 'DJztXj2GPfl',
1252 'info_dict': {
1253 'id': 'DJztXj2GPfk',
1254 'ext': 'mp4',
1255 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1256 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1257 'upload_date': '20090125',
1258 'uploader': 'Prochorowka',
1259 'uploader_id': 'Prochorowka',
1260 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1261 'artist': 'Panjabi MC',
1262 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1263 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1264 },
1265 'params': {
1266 'skip_download': True,
1267 },
ea74e00b
DP
1268 },
1269 {
1270 # empty description results in an empty string
1271 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1272 'info_dict': {
1273 'id': 'x41yOUIvK2k',
1274 'ext': 'mp4',
1275 'title': 'IMG 3456',
1276 'description': '',
1277 'upload_date': '20170613',
1278 'uploader_id': 'ElevageOrVert',
1279 'uploader': 'ElevageOrVert',
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
1284 },
2eb88d95
PH
1285 ]
1286
e0df6211
PH
1287 def __init__(self, *args, **kwargs):
1288 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1289 self._player_cache = {}
e0df6211 1290
c5e8d7af
PH
1291 def report_video_info_webpage_download(self, video_id):
1292 """Report attempt to download video info webpage."""
69ea8ca4 1293 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1294
c5e8d7af
PH
1295 def report_information_extraction(self, video_id):
1296 """Report attempt to extract video information."""
69ea8ca4 1297 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1298
1299 def report_unavailable_format(self, video_id, format):
1300 """Report extracted video URL."""
69ea8ca4 1301 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1302
1303 def report_rtmp_download(self):
1304 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1305 self.to_screen('RTMP download detected')
c5e8d7af 1306
60064c53
PH
1307 def _signature_cache_id(self, example_sig):
1308 """ Return a string representation of a signature """
78caa52a 1309 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1310
e40c758c
S
1311 @classmethod
1312 def _extract_player_info(cls, player_url):
1313 for player_re in cls._PLAYER_INFO_RE:
1314 id_m = re.search(player_re, player_url)
1315 if id_m:
1316 break
1317 else:
c081b35c 1318 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1319 return id_m.group('ext'), id_m.group('id')
1320
1321 def _extract_signature_function(self, video_id, player_url, example_sig):
1322 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1323
c4417ddb 1324 # Read from filesystem cache
60064c53
PH
1325 func_id = '%s_%s_%s' % (
1326 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1327 assert os.path.basename(func_id) == func_id
a0e07d31 1328
69ea8ca4 1329 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1330 if cache_spec is not None:
78caa52a 1331 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1332
6d1a55a5
PH
1333 download_note = (
1334 'Downloading player %s' % player_url
1335 if self._downloader.params.get('verbose') else
1336 'Downloading %s player %s' % (player_type, player_id)
1337 )
e0df6211
PH
1338 if player_type == 'js':
1339 code = self._download_webpage(
1340 player_url, video_id,
6d1a55a5 1341 note=download_note,
69ea8ca4 1342 errnote='Download of %s failed' % player_url)
83799698 1343 res = self._parse_sig_js(code)
c4417ddb 1344 elif player_type == 'swf':
e0df6211
PH
1345 urlh = self._request_webpage(
1346 player_url, video_id,
6d1a55a5 1347 note=download_note,
69ea8ca4 1348 errnote='Download of %s failed' % player_url)
e0df6211 1349 code = urlh.read()
83799698 1350 res = self._parse_sig_swf(code)
e0df6211
PH
1351 else:
1352 assert False, 'Invalid player type %r' % player_type
1353
785521bf
PH
1354 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1355 cache_res = res(test_string)
1356 cache_spec = [ord(c) for c in cache_res]
83799698 1357
69ea8ca4 1358 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1359 return res
1360
60064c53 1361 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1362 def gen_sig_code(idxs):
1363 def _genslice(start, end, step):
78caa52a 1364 starts = '' if start == 0 else str(start)
8bcc8756 1365 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1366 steps = '' if step == 1 else (':%d' % step)
78caa52a 1367 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1368
1369 step = None
7af808a5
PH
1370 # Quelch pyflakes warnings - start will be set when step is set
1371 start = '(Never used)'
edf3e38e
PH
1372 for i, prev in zip(idxs[1:], idxs[:-1]):
1373 if step is not None:
1374 if i - prev == step:
1375 continue
1376 yield _genslice(start, prev, step)
1377 step = None
1378 continue
1379 if i - prev in [-1, 1]:
1380 step = i - prev
1381 start = prev
1382 continue
1383 else:
78caa52a 1384 yield 's[%d]' % prev
edf3e38e 1385 if step is None:
78caa52a 1386 yield 's[%d]' % i
edf3e38e
PH
1387 else:
1388 yield _genslice(start, i, step)
1389
78caa52a 1390 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1391 cache_res = func(test_string)
edf3e38e 1392 cache_spec = [ord(c) for c in cache_res]
78caa52a 1393 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1394 signature_id_tuple = '(%s)' % (
1395 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1396 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1397 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1398 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1399
e0df6211
PH
1400 def _parse_sig_js(self, jscode):
1401 funcname = self._search_regex(
abefc03f
S
1402 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1404 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1405 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1406 # Obsolete patterns
1407 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1408 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1409 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1414 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1415 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1416
1417 jsi = JSInterpreter(jscode)
1418 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1419 return lambda s: initial_function([s])
1420
1421 def _parse_sig_swf(self, file_contents):
54256267 1422 swfi = SWFInterpreter(file_contents)
78caa52a 1423 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1424 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1425 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1426 return lambda s: initial_function([s])
1427
83799698 1428 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1429 """Turn the encrypted s field into a working signature"""
6b37f0be 1430
c8bf86d5 1431 if player_url is None:
69ea8ca4 1432 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1433
69ea8ca4 1434 if player_url.startswith('//'):
78caa52a 1435 player_url = 'https:' + player_url
3c90cc8b
S
1436 elif not re.match(r'https?://', player_url):
1437 player_url = compat_urlparse.urljoin(
1438 'https://www.youtube.com', player_url)
c8bf86d5 1439 try:
62af3a0e 1440 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1441 if player_id not in self._player_cache:
1442 func = self._extract_signature_function(
60064c53 1443 video_id, player_url, s
c8bf86d5
PH
1444 )
1445 self._player_cache[player_id] = func
1446 func = self._player_cache[player_id]
1447 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1448 self._print_sig_code(func, s)
c8bf86d5
PH
1449 return func(s)
1450 except Exception as e:
1451 tb = traceback.format_exc()
1452 raise ExtractorError(
78caa52a 1453 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1454
f96f5dda 1455 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1456 try:
60e47a26 1457 subs_doc = self._download_xml(
38c2e5b8 1458 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1459 video_id, note=False)
1460 except ExtractorError as err:
9b9c5355 1461 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1462 return {}
de7f3446
JMF
1463
1464 sub_lang_list = {}
60e47a26
JMF
1465 for track in subs_doc.findall('track'):
1466 lang = track.attrib['lang_code']
7e660ac1
LD
1467 if lang in sub_lang_list:
1468 continue
360e1ca5 1469 sub_formats = []
23d17e4b 1470 for ext in self._SUBTITLE_FORMATS:
15707c7e 1471 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1472 'lang': lang,
1473 'v': video_id,
1474 'fmt': ext,
1475 'name': track.attrib['name'].encode('utf-8'),
1476 })
1477 sub_formats.append({
1478 'url': 'https://www.youtube.com/api/timedtext?' + params,
1479 'ext': ext,
1480 })
1481 sub_lang_list[lang] = sub_formats
f96f5dda 1482 if has_live_chat_replay:
321bf820 1483 sub_lang_list['live_chat'] = [
1484 {
1485 'video_id': video_id,
1486 'ext': 'json',
1487 'protocol': 'youtube_live_chat_replay',
1488 },
1489 ]
de7f3446 1490 if not sub_lang_list:
69ea8ca4 1491 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1492 return {}
1493 return sub_lang_list
1494
a72778d3
S
1495 def _get_ytplayer_config(self, video_id, webpage):
1496 patterns = (
526b3b07
S
1497 # User data may contain arbitrary character sequences that may affect
1498 # JSON extraction with regex, e.g. when '};' is contained the second
1499 # regex won't capture the whole JSON. Yet working around by trying more
1500 # concrete regex first keeping in mind proper quoted string handling
1501 # to be implemented in future that will replace this workaround (see
067aa17e
S
1502 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1503 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1504 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1505 r';ytplayer\.config\s*=\s*({.+?});',
1506 )
1507 config = self._search_regex(
1508 patterns, webpage, 'ytplayer.config', default=None)
1509 if config:
1510 return self._parse_json(
1511 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1512
321bf820 1513 def _get_yt_initial_data(self, video_id, webpage):
1514 config = self._search_regex(
15eae44d 1515 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1516 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
321bf820 1517 webpage, 'ytInitialData', default=None)
1518 if config:
1519 return self._parse_json(
1520 uppercase_escape(config), video_id, fatal=False)
1521
360e1ca5 1522 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1523 """We need the webpage for getting the captions url, pass it as an
1524 argument to speed up the process."""
69ea8ca4 1525 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1526 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1527 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1528 if not player_config:
de7f3446
JMF
1529 self._downloader.report_warning(err_msg)
1530 return {}
de7f3446 1531 try:
0792d563 1532 args = player_config['args']
b78b292f
S
1533 caption_url = args.get('ttsurl')
1534 if caption_url:
1535 timestamp = args['timestamp']
1536 # We get the available subtitles
15707c7e 1537 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1538 'type': 'list',
1539 'tlangs': 1,
1540 'asrs': 1,
1541 })
1542 list_url = caption_url + '&' + list_params
1543 caption_list = self._download_xml(list_url, video_id)
1544 original_lang_node = caption_list.find('track')
1545 if original_lang_node is None:
1546 self._downloader.report_warning('Video doesn\'t have automatic captions')
1547 return {}
1548 original_lang = original_lang_node.attrib['lang_code']
1549 caption_kind = original_lang_node.attrib.get('kind', '')
1550
1551 sub_lang_list = {}
1552 for lang_node in caption_list.findall('target'):
1553 sub_lang = lang_node.attrib['lang_code']
1554 sub_formats = []
1555 for ext in self._SUBTITLE_FORMATS:
15707c7e 1556 params = compat_urllib_parse_urlencode({
b78b292f
S
1557 'lang': original_lang,
1558 'tlang': sub_lang,
1559 'fmt': ext,
1560 'ts': timestamp,
1561 'kind': caption_kind,
1562 })
1563 sub_formats.append({
1564 'url': caption_url + '&' + params,
1565 'ext': ext,
1566 })
1567 sub_lang_list[sub_lang] = sub_formats
1568 return sub_lang_list
1569
ddbb4c5c
S
1570 def make_captions(sub_url, sub_langs):
1571 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1572 caption_qs = compat_parse_qs(parsed_sub_url.query)
1573 captions = {}
1574 for sub_lang in sub_langs:
1575 sub_formats = []
1576 for ext in self._SUBTITLE_FORMATS:
1577 caption_qs.update({
1578 'tlang': [sub_lang],
1579 'fmt': [ext],
1580 })
1581 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1582 query=compat_urllib_parse_urlencode(caption_qs, True)))
1583 sub_formats.append({
1584 'url': sub_url,
1585 'ext': ext,
1586 })
1587 captions[sub_lang] = sub_formats
1588 return captions
1589
1590 # New captions format as of 22.06.2017
1591 player_response = args.get('player_response')
1592 if player_response and isinstance(player_response, compat_str):
1593 player_response = self._parse_json(
1594 player_response, video_id, fatal=False)
1595 if player_response:
1596 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
7e1cf1a4 1597 caption_tracks = renderer['captionTracks']
1598 for caption_track in caption_tracks:
1599 if 'kind' not in caption_track:
1600 # not an automatic transcription
1601 continue
1602 base_url = caption_track['baseUrl']
1603 sub_lang_list = []
1604 for lang in renderer['translationLanguages']:
1605 lang_code = lang.get('languageCode')
1606 if lang_code:
1607 sub_lang_list.append(lang_code)
1608 return make_captions(base_url, sub_lang_list)
bc842c27 1609
7e1cf1a4 1610 self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1611 return {}
b78b292f
S
1612 # Some videos don't provide ttsurl but rather caption_tracks and
1613 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1614 # Does not used anymore as of 22.06.2017
b78b292f
S
1615 caption_tracks = args['caption_tracks']
1616 caption_translation_languages = args['caption_translation_languages']
1617 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1618 sub_lang_list = []
b78b292f
S
1619 for lang in caption_translation_languages.split(','):
1620 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1621 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1622 if sub_lang:
1623 sub_lang_list.append(sub_lang)
1624 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1625 # An extractor error can be raise by the download process if there are
1626 # no automatic captions but there are subtitles
ddbb4c5c 1627 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1628 self._downloader.report_warning(err_msg)
1629 return {}
1630
21c340b8
S
1631 def _mark_watched(self, video_id, video_info, player_response):
1632 playback_url = url_or_none(try_get(
1633 player_response,
1634 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1635 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1636 if not playback_url:
1637 return
1638 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1639 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1640
1641 # cpn generation algorithm is reverse engineered from base.js.
1642 # In fact it works even with dummy cpn.
1643 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1644 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1645
1646 qs.update({
1647 'ver': ['2'],
1648 'cpn': [cpn],
1649 })
1650 playback_url = compat_urlparse.urlunparse(
15707c7e 1651 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1652
1653 self._download_webpage(
1654 playback_url, video_id, 'Marking watched',
1655 'Unable to mark watched', fatal=False)
1656
66c9fa36
S
1657 @staticmethod
1658 def _extract_urls(webpage):
1659 # Embedded YouTube player
1660 entries = [
1661 unescapeHTML(mobj.group('url'))
1662 for mobj in re.finditer(r'''(?x)
1663 (?:
1664 <iframe[^>]+?src=|
1665 data-video-url=|
1666 <embed[^>]+?src=|
1667 embedSWF\(?:\s*|
1668 <object[^>]+data=|
1669 new\s+SWFObject\(
1670 )
1671 (["\'])
1672 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1673 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1674 \1''', webpage)]
1675
1676 # lazyYT YouTube embed
1677 entries.extend(list(map(
1678 unescapeHTML,
1679 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1680
1681 # Wordpress "YouTube Video Importer" plugin
1682 matches = re.findall(r'''(?x)<div[^>]+
1683 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1684 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1685 entries.extend(m[-1] for m in matches)
1686
1687 return entries
1688
1689 @staticmethod
1690 def _extract_url(webpage):
1691 urls = YoutubeIE._extract_urls(webpage)
1692 return urls[0] if urls else None
1693
97665381
PH
1694 @classmethod
1695 def extract_id(cls, url):
1696 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1697 if mobj is None:
69ea8ca4 1698 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1699 video_id = mobj.group(2)
1700 return video_id
1701
84213ea8
S
1702 def _extract_chapters_from_json(self, webpage, video_id, duration):
1703 if not webpage:
1704 return
edd83104 1705 initial_data = self._parse_json(
84213ea8 1706 self._search_regex(
edd83104 1707 r'window\["ytInitialData"\] = (.+);\n', webpage,
84213ea8
S
1708 'player args', default='{}'),
1709 video_id, fatal=False)
edd83104 1710 if not initial_data or not isinstance(initial_data, dict):
84213ea8
S
1711 return
1712 chapters_list = try_get(
edd83104 1713 initial_data,
84213ea8
S
1714 lambda x: x['playerOverlays']
1715 ['playerOverlayRenderer']
1716 ['decoratedPlayerBarRenderer']
1717 ['decoratedPlayerBarRenderer']
1718 ['playerBar']
1719 ['chapteredPlayerBarRenderer']
1720 ['chapters'],
1721 list)
1722 if not chapters_list:
1723 return
1724
1725 def chapter_time(chapter):
1726 return float_or_none(
1727 try_get(
1728 chapter,
1729 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1730 int),
1731 scale=1000)
1732 chapters = []
1733 for next_num, chapter in enumerate(chapters_list, start=1):
1734 start_time = chapter_time(chapter)
1735 if start_time is None:
1736 continue
1737 end_time = (chapter_time(chapters_list[next_num])
1738 if next_num < len(chapters_list) else duration)
1739 if end_time is None:
1740 continue
1741 title = try_get(
1742 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1743 compat_str)
1744 chapters.append({
1745 'start_time': start_time,
1746 'end_time': end_time,
1747 'title': title,
1748 })
1749 return chapters
1750
9cafc3fd 1751 @staticmethod
84213ea8 1752 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1753 if not description:
1754 return None
1755 chapter_lines = re.findall(
1756 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1757 description)
1758 if not chapter_lines:
1759 return None
1760 chapters = []
1761 for next_num, (chapter_line, time_point) in enumerate(
1762 chapter_lines, start=1):
1763 start_time = parse_duration(time_point)
1764 if start_time is None:
1765 continue
39d4c1be
S
1766 if start_time > duration:
1767 break
9cafc3fd
S
1768 end_time = (duration if next_num == len(chapter_lines)
1769 else parse_duration(chapter_lines[next_num][1]))
1770 if end_time is None:
1771 continue
39d4c1be
S
1772 if end_time > duration:
1773 end_time = duration
1774 if start_time > end_time:
1775 break
9cafc3fd
S
1776 chapter_title = re.sub(
1777 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1778 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1779 chapters.append({
1780 'start_time': start_time,
1781 'end_time': end_time,
1782 'title': chapter_title,
1783 })
1784 return chapters
1785
84213ea8
S
1786 def _extract_chapters(self, webpage, description, video_id, duration):
1787 return (self._extract_chapters_from_json(webpage, video_id, duration)
1788 or self._extract_chapters_from_description(description, duration))
1789
c5e8d7af 1790 def _real_extract(self, url):
cf7e015f
S
1791 url, smuggled_data = unsmuggle_url(url, {})
1792
7e8c0af0 1793 proto = (
78caa52a
PH
1794 'http' if self._downloader.params.get('prefer_insecure', False)
1795 else 'https')
7e8c0af0 1796
7c80519c 1797 start_time = None
297a564b 1798 end_time = None
7c80519c
JMF
1799 parsed_url = compat_urllib_parse_urlparse(url)
1800 for component in [parsed_url.fragment, parsed_url.query]:
1801 query = compat_parse_qs(component)
297a564b 1802 if start_time is None and 't' in query:
7c80519c 1803 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1804 if start_time is None and 'start' in query:
1805 start_time = parse_duration(query['start'][0])
297a564b
JMF
1806 if end_time is None and 'end' in query:
1807 end_time = parse_duration(query['end'][0])
7c80519c 1808
c5e8d7af
PH
1809 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1810 mobj = re.search(self._NEXT_URL_RE, url)
1811 if mobj:
7fd002c0 1812 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1813 video_id = self.extract_id(url)
c5e8d7af
PH
1814
1815 # Get video webpage
aa79ac0c 1816 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1817 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1818
1819 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1820 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1821
1822 # Attempt to extract SWF player URL
e0df6211 1823 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1824 if mobj is not None:
1825 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1826 else:
1827 player_url = None
1828
d8d24a92
S
1829 dash_mpds = []
1830
1831 def add_dash_mpd(video_info):
1832 dash_mpd = video_info.get('dashmpd')
1833 if dash_mpd and dash_mpd[0] not in dash_mpds:
1834 dash_mpds.append(dash_mpd[0])
1835
561b456e
S
1836 def add_dash_mpd_pr(pl_response):
1837 dash_mpd = url_or_none(try_get(
1838 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1839 compat_str))
1840 if dash_mpd and dash_mpd not in dash_mpds:
1841 dash_mpds.append(dash_mpd)
1842
c7121fa7
S
1843 is_live = None
1844 view_count = None
1845
1846 def extract_view_count(v_info):
1847 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1848
c2d125d9
S
1849 def extract_player_response(player_response, video_id):
1850 pl_response = str_or_none(player_response)
1851 if not pl_response:
1852 return
1853 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1854 if isinstance(pl_response, dict):
1855 add_dash_mpd_pr(pl_response)
1856 return pl_response
1857
dbdaaa23
S
1858 player_response = {}
1859
c5e8d7af 1860 # Get video info
43ebf77d 1861 video_info = {}
6449cd80 1862 embed_webpage = None
39e7107d
U
1863 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1864 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1865 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1866 age_gate = True
1867 # We simulate the access to the video from www.youtube.com/v/{video_id}
1868 # this can be viewed without login into Youtube
beb95e77
CL
1869 url = proto + '://www.youtube.com/embed/%s' % video_id
1870 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
9d9314cb
U
1871 # check if video is only playable on youtube - if so it requires auth (cookies)
1872 if re.search(r'player-unavailable">', embed_webpage) is not None:
c73baf23
U
1873 '''
1874 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1875 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1876 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1877 '''
1878 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1879 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1880 age_gate = False
1881 # Try looking directly into the video webpage
1882 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1883 if ytplayer_config:
1884 args = ytplayer_config['args']
1885 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1886 # Convert to the same format returned by compat_parse_qs
1887 video_info = dict((k, [v]) for k, v in args.items())
1888 add_dash_mpd(video_info)
1889 # Rental video is not rented but preview is available (e.g.
1890 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1891 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1892 if not video_info and args.get('ypc_vid'):
1893 return self.url_result(
1894 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1895 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1896 is_live = True
1897 if not player_response:
1898 player_response = extract_player_response(args.get('player_response'), video_id)
1899 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1900 add_dash_mpd_pr(player_response)
9d9314cb
U
1901 else:
1902 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1903 else:
1904 data = compat_urllib_parse_urlencode({
1905 'video_id': video_id,
1906 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1907 'sts': self._search_regex(
1908 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1909 })
1910 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1911 try:
1912 video_info_webpage = self._download_webpage(
1913 video_info_url, video_id,
1914 note='Refetching age-gated info webpage',
1915 errnote='unable to download video info webpage')
1916 except ExtractorError:
1917 video_info_webpage = None
1918 if video_info_webpage:
1919 video_info = compat_parse_qs(video_info_webpage)
1920 pl_response = video_info.get('player_response', [None])[0]
1921 player_response = extract_player_response(pl_response, video_id)
1922 add_dash_mpd(video_info)
1923 view_count = extract_view_count(video_info)
c108eb73
JMF
1924 else:
1925 age_gate = False
d8d24a92 1926 # Try looking directly into the video webpage
a72778d3
S
1927 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1928 if ytplayer_config:
4e62ebe2 1929 args = ytplayer_config['args']
4c76aa06 1930 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1931 # Convert to the same format returned by compat_parse_qs
1932 video_info = dict((k, [v]) for k, v in args.items())
1933 add_dash_mpd(video_info)
6496ccb4
S
1934 # Rental video is not rented but preview is available (e.g.
1935 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1936 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1937 if not video_info and args.get('ypc_vid'):
1938 return self.url_result(
1939 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1940 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1941 is_live = True
dbdaaa23 1942 if not player_response:
c2d125d9 1943 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1944 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1945 add_dash_mpd_pr(player_response)
bbb7c3f7
YCH
1946
1947 def extract_unavailable_message():
0add33ab
S
1948 messages = []
1949 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1950 msg = self._html_search_regex(
1951 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1952 video_webpage, 'unavailable %s' % kind, default=None)
1953 if msg:
1954 messages.append(msg)
1955 if messages:
1956 return '\n'.join(messages)
bbb7c3f7 1957
f93abcf1 1958 if not video_info and not player_response:
15be3eb5
RA
1959 unavailable_message = extract_unavailable_message()
1960 if not unavailable_message:
1961 unavailable_message = 'Unable to extract video data'
1962 raise ExtractorError(
1963 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1964
f93abcf1
S
1965 if not isinstance(video_info, dict):
1966 video_info = {}
1967
dbdaaa23
S
1968 video_details = try_get(
1969 player_response, lambda x: x['videoDetails'], dict) or {}
1970
37357d21
S
1971 microformat = try_get(
1972 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1973
8dbf751a
RA
1974 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1975 if not video_title:
cf7e015f
S
1976 self._downloader.report_warning('Unable to extract video title')
1977 video_title = '_'
1978
9cafc3fd 1979 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1980 if video_description:
fa4bc6e7
RA
1981
1982 def replace_url(m):
1983 redir_url = compat_urlparse.urljoin(url, m.group(1))
1984 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1985 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1986 qs = compat_parse_qs(parsed_redir_url.query)
1987 q = qs.get('q')
1988 if q and q[0]:
1989 return q[0]
1990 return redir_url
1991
9cafc3fd 1992 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1993 <a\s+
25cb7a0e 1994 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1995 (?:title|href)="([^"]+)"\s+
25cb7a0e 1996 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1997 class="[^"]*"[^>]*>
23f13e97 1998 [^<]+\.{3}\s*
cf7e015f 1999 </a>
fa4bc6e7 2000 ''', replace_url, video_description)
cf7e015f
S
2001 video_description = clean_html(video_description)
2002 else:
ea74e00b
DP
2003 video_description = video_details.get('shortDescription')
2004 if video_description is None:
2005 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 2006
8fe10494 2007 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 2008 if not self._downloader.params.get('noplaylist'):
8fe10494
S
2009 multifeed_metadata_list = try_get(
2010 player_response,
2011 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2012 compat_str) or try_get(
2013 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
2014 if multifeed_metadata_list:
2015 entries = []
2016 feed_ids = []
2017 for feed in multifeed_metadata_list.split(','):
2018 # Unquote should take place before split on comma (,) since textual
2019 # fields may contain comma as well (see
067aa17e 2020 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 2021 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2022
2023 def feed_entry(name):
2024 return try_get(feed_data, lambda x: x[name][0], compat_str)
2025
2026 feed_id = feed_entry('id')
2027 if not feed_id:
2028 continue
2029 feed_title = feed_entry('title')
2030 title = video_title
2031 if feed_title:
2032 title += ' (%s)' % feed_title
8fe10494
S
2033 entries.append({
2034 '_type': 'url_transparent',
2035 'ie_key': 'Youtube',
2036 'url': smuggle_url(
2037 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
2038 {'force_singlefeed': True}),
6b09401b 2039 'title': title,
8fe10494 2040 })
6b09401b 2041 feed_ids.append(feed_id)
8fe10494
S
2042 self.to_screen(
2043 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2044 % (', '.join(feed_ids), video_id))
2045 return self.playlist_result(entries, video_id, video_title, video_description)
2046 else:
2047 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2048
c7121fa7 2049 if view_count is None:
1c9c8de2 2050 view_count = extract_view_count(video_info)
dbdaaa23
S
2051 if view_count is None and video_details:
2052 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
2053 if view_count is None and microformat:
2054 view_count = int_or_none(microformat.get('viewCount'))
1d699755 2055
27019dbb 2056 if is_live is None:
898238e9 2057 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 2058
321bf820 2059 has_live_chat_replay = False
f0f76a33 2060 if not is_live:
321bf820 2061 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2062 try:
2063 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2064 has_live_chat_replay = True
f0f76a33 2065 except (KeyError, IndexError, TypeError):
321bf820 2066 pass
2067
c5e8d7af
PH
2068 # Check for "rental" videos
2069 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 2070 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 2071
c63ca0ee
S
2072 def _extract_filesize(media_url):
2073 return int_or_none(self._search_regex(
2074 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2075
bf1317d2
S
2076 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2077 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2078
c5e8d7af
PH
2079 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2080 self.report_rtmp_download()
dd27fd17
PH
2081 formats = [{
2082 'format_id': '_rtmp',
2083 'protocol': 'rtmp',
2084 'url': video_info['conn'][0],
2085 'player_url': player_url,
2086 }]
bf1317d2 2087 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 2088 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 2089 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 2090 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 2091 formats = []
3318832e 2092 formats_spec = {}
82156fdb 2093 fmt_list = video_info.get('fmt_list', [''])[0]
2094 if fmt_list:
2095 for fmt in fmt_list.split(','):
2096 spec = fmt.split('/')
3318832e 2097 if len(spec) > 1:
2098 width_height = spec[1].split('x')
2099 if len(width_height) == 2:
2100 formats_spec[spec[0]] = {
2101 'resolution': spec[1],
2102 'width': int_or_none(width_height[0]),
2103 'height': int_or_none(width_height[1]),
2104 }
bf1317d2
S
2105 for fmt in streaming_formats:
2106 itag = str_or_none(fmt.get('itag'))
2107 if not itag:
201e9eaa 2108 continue
bf1317d2
S
2109 quality = fmt.get('quality')
2110 quality_label = fmt.get('qualityLabel') or quality
2111 formats_spec[itag] = {
2112 'asr': int_or_none(fmt.get('audioSampleRate')),
2113 'filesize': int_or_none(fmt.get('contentLength')),
2114 'format_note': quality_label,
2115 'fps': int_or_none(fmt.get('fps')),
2116 'height': int_or_none(fmt.get('height')),
bf1317d2
S
2117 # bitrate for itag 43 is always 2147483647
2118 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2119 'width': int_or_none(fmt.get('width')),
2120 }
2121
2122 for fmt in streaming_formats:
00eb865b 2123 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2124 continue
2125 url = url_or_none(fmt.get('url'))
2126
2127 if not url:
fa3db383 2128 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2129 if not cipher:
2130 continue
2131 url_data = compat_parse_qs(cipher)
2132 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2133 if not url:
2134 continue
2135 else:
2136 cipher = None
2137 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2138
2f483bc1
S
2139 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2140 # Unsupported FORMAT_STREAM_TYPE_OTF
2141 if stream_type == 3:
2142 continue
6449cd80 2143
bf1317d2
S
2144 format_id = fmt.get('itag') or url_data['itag'][0]
2145 if not format_id:
2146 continue
2147 format_id = compat_str(format_id)
a49eccdf 2148
bf1317d2
S
2149 if cipher:
2150 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2151 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2152 jsplayer_url_json = self._search_regex(
2153 ASSETS_RE,
2154 embed_webpage if age_gate else video_webpage,
2155 'JS player URL (1)', default=None)
2156 if not jsplayer_url_json and not age_gate:
2157 # We need the embed website after all
2158 if embed_webpage is None:
2159 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2160 embed_webpage = self._download_webpage(
2161 embed_url, video_id, 'Downloading embed webpage')
2162 jsplayer_url_json = self._search_regex(
2163 ASSETS_RE, embed_webpage, 'JS player URL')
2164
2165 player_url = json.loads(jsplayer_url_json)
cf010131 2166 if player_url is None:
bf1317d2
S
2167 player_url_json = self._search_regex(
2168 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2169 video_webpage, 'age gate player URL')
2170 player_url = json.loads(player_url_json)
2171
2172 if 'sig' in url_data:
2173 url += '&signature=' + url_data['sig'][0]
2174 elif 's' in url_data:
2175 encrypted_sig = url_data['s'][0]
2176
2177 if self._downloader.params.get('verbose'):
2178 if player_url is None:
bf1317d2 2179 player_desc = 'unknown'
cf010131 2180 else:
e40c758c
S
2181 player_type, player_version = self._extract_player_info(player_url)
2182 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2183 parts_sizes = self._signature_cache_id(encrypted_sig)
2184 self.to_screen('{%s} signature length %s, %s' %
2185 (format_id, parts_sizes, player_desc))
2186
2187 signature = self._decrypt_signature(
2188 encrypted_sig, video_id, player_url, age_gate)
2189 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2190 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2191 if 'ratebypass' not in url:
2192 url += '&ratebypass=yes'
c9afb51c 2193
94278f72
YCH
2194 dct = {
2195 'format_id': format_id,
2196 'url': url,
2197 'player_url': player_url,
2198 }
2199 if format_id in self._formats:
2200 dct.update(self._formats[format_id])
3318832e 2201 if format_id in formats_spec:
2202 dct.update(formats_spec[format_id])
94278f72 2203
aabc2be6 2204 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2205 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2206 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2207 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2208 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2209
bf1317d2
S
2210 if width is None:
2211 width = int_or_none(fmt.get('width'))
2212 if height is None:
2213 height = int_or_none(fmt.get('height'))
2214
c63ca0ee
S
2215 filesize = int_or_none(url_data.get(
2216 'clen', [None])[0]) or _extract_filesize(url)
2217
bf1317d2
S
2218 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2219 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2220
4878759f
S
2221 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2222 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2223 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2224
94278f72 2225 more_fields = {
c63ca0ee 2226 'filesize': filesize,
bf1317d2 2227 'tbr': tbr,
c9afb51c
AH
2228 'width': width,
2229 'height': height,
bf1317d2
S
2230 'fps': fps,
2231 'format_note': quality_label or quality,
c9afb51c 2232 }
94278f72
YCH
2233 for key, value in more_fields.items():
2234 if value:
2235 dct[key] = value
bf1317d2 2236 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2237 if type_:
2238 type_split = type_.split(';')
2239 kind_ext = type_split[0].split('/')
2240 if len(kind_ext) == 2:
94278f72
YCH
2241 kind, _ = kind_ext
2242 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2243 if kind in ('audio', 'video'):
2244 codecs = None
2245 for mobj in re.finditer(
2246 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2247 if mobj.group('key') == 'codecs':
2248 codecs = mobj.group('val')
2249 break
2250 if codecs:
6310acf5 2251 dct.update(parse_codecs(codecs))
e4a60912
S
2252 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2253 dct['downloader_options'] = {
2254 # Youtube throttles chunks >~10M
2255 'http_chunk_size': 10485760,
2256 }
aabc2be6 2257 formats.append(dct)
c5e8d7af 2258 else:
c3e54389
S
2259 manifest_url = (
2260 url_or_none(try_get(
2261 player_response,
2262 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2263 compat_str))
2264 or url_or_none(try_get(
c3e54389
S
2265 video_info, lambda x: x['hlsvp'][0], compat_str)))
2266 if manifest_url:
2267 formats = []
2268 m3u8_formats = self._extract_m3u8_formats(
2269 manifest_url, video_id, 'mp4', fatal=False)
2270 for a_format in m3u8_formats:
2271 itag = self._search_regex(
2272 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2273 if itag:
2274 a_format['format_id'] = itag
2275 if itag in self._formats:
2276 dct = self._formats[itag].copy()
2277 dct.update(a_format)
2278 a_format = dct
2279 a_format['player_url'] = player_url
2280 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2281 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2282 if self._downloader.params.get('youtube_include_hls_manifest', True):
2283 formats.append(a_format)
c3e54389 2284 else:
13577349 2285 error_message = extract_unavailable_message()
c3e54389 2286 if not error_message:
13577349
S
2287 error_message = clean_html(try_get(
2288 player_response, lambda x: x['playabilityStatus']['reason'],
2289 compat_str))
2290 if not error_message:
2291 error_message = clean_html(
2292 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2293 if error_message:
2294 raise ExtractorError(error_message, expected=True)
2295 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2296
7e72694b 2297 # uploader
dbdaaa23
S
2298 video_uploader = try_get(
2299 video_info, lambda x: x['author'][0],
2300 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2301 if video_uploader:
2302 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2303 else:
2304 self._downloader.report_warning('unable to extract uploader name')
2305
2306 # uploader_id
2307 video_uploader_id = None
2308 video_uploader_url = None
2309 mobj = re.search(
2310 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2311 video_webpage)
2312 if mobj is not None:
2313 video_uploader_id = mobj.group('uploader_id')
2314 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2315 else:
2316 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2317 if owner_profile_url:
2318 video_uploader_id = self._search_regex(
2319 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2320 default=None)
2321 video_uploader_url = owner_profile_url
7e72694b 2322
b45a9e69 2323 channel_id = (
3089bc74
S
2324 str_or_none(video_details.get('channelId'))
2325 or self._html_search_meta(
2326 'channelId', video_webpage, 'channel id', default=None)
2327 or self._search_regex(
b45a9e69 2328 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2329 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2330 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2331
b477fc13
S
2332 thumbnails = []
2333 thumbnails_list = try_get(
2334 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2335 for t in thumbnails_list:
2336 if not isinstance(t, dict):
2337 continue
2338 thumbnail_url = url_or_none(t.get('url'))
2339 if not thumbnail_url:
2340 continue
2341 thumbnails.append({
2342 'url': thumbnail_url,
2343 'width': int_or_none(t.get('width')),
2344 'height': int_or_none(t.get('height')),
2345 })
2346
2347 if not thumbnails:
7e72694b 2348 video_thumbnail = None
b477fc13
S
2349 # We try first to get a high quality image:
2350 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2351 video_webpage, re.DOTALL)
2352 if m_thumb is not None:
2353 video_thumbnail = m_thumb.group(1)
2354 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2355 if thumbnail_url:
2356 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2357 if video_thumbnail:
2358 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2359
2360 # upload date
2361 upload_date = self._html_search_meta(
2362 'datePublished', video_webpage, 'upload date', default=None)
2363 if not upload_date:
2364 upload_date = self._search_regex(
2365 [r'(?s)id="eow-date.*?>(.*?)</span>',
2366 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2367 video_webpage, 'upload date', default=None)
37357d21
S
2368 if not upload_date:
2369 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2370 upload_date = unified_strdate(upload_date)
2371
2372 video_license = self._html_search_regex(
2373 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2374 video_webpage, 'license', default=None)
2375
2376 m_music = re.search(
2377 r'''(?x)
2378 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2379 <ul[^>]*>\s*
2380 <li>(?P<title>.+?)
2381 by (?P<creator>.+?)
2382 (?:
2383 \(.+?\)|
2384 <a[^>]*
2385 (?:
2386 \bhref=["\']/red[^>]*>| # drop possible
2387 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2388 )
2389 .*?
2390 )?</li
2391 ''',
2392 video_webpage)
2393 if m_music:
2394 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2395 video_creator = clean_html(m_music.group('creator'))
2396 else:
2397 video_alt_title = video_creator = None
2398
2399 def extract_meta(field):
2400 return self._html_search_regex(
2401 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2402 video_webpage, field, default=None)
2403
2404 track = extract_meta('Song')
2405 artist = extract_meta('Artist')
92bc97d3 2406 album = extract_meta('Album')
822b9d9c
RA
2407
2408 # Youtube Music Auto-generated description
92bc97d3 2409 release_date = release_year = None
822b9d9c
RA
2410 if video_description:
2411 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2412 if mobj:
2413 if not track:
2414 track = mobj.group('track').strip()
2415 if not artist:
2416 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2417 if not album:
2418 album = mobj.group('album'.strip())
822b9d9c
RA
2419 release_year = mobj.group('release_year')
2420 release_date = mobj.group('release_date')
2421 if release_date:
2422 release_date = release_date.replace('-', '')
2423 if not release_year:
2424 release_year = int(release_date[:4])
2425 if release_year:
2426 release_year = int(release_year)
7e72694b
S
2427
2428 m_episode = re.search(
2429 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2430 video_webpage)
2431 if m_episode:
c2dd2dc0 2432 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2433 season_number = int(m_episode.group('season'))
2434 episode_number = int(m_episode.group('episode'))
2435 else:
2436 series = season_number = episode_number = None
2437
2438 m_cat_container = self._search_regex(
2439 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2440 video_webpage, 'categories', default=None)
dbeafce5 2441 category = None
7e72694b
S
2442 if m_cat_container:
2443 category = self._html_search_regex(
2444 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2445 default=None)
dbeafce5
S
2446 if not category:
2447 category = try_get(
2448 microformat, lambda x: x['category'], compat_str)
2449 video_categories = None if category is None else [category]
7e72694b
S
2450
2451 video_tags = [
2452 unescapeHTML(m.group('content'))
2453 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2454 if not video_tags:
2455 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2456
2457 def _extract_count(count_name):
2458 return str_to_int(self._search_regex(
a6c666d0 2459 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
7e72694b
S
2460 % re.escape(count_name),
2461 video_webpage, count_name, default=None))
2462
2463 like_count = _extract_count('like')
2464 dislike_count = _extract_count('dislike')
2465
dbdaaa23
S
2466 if view_count is None:
2467 view_count = str_to_int(self._search_regex(
2468 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2469 'view count', default=None))
2470
bf3c9326
S
2471 average_rating = (
2472 float_or_none(video_details.get('averageRating'))
2473 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2474
7e72694b 2475 # subtitles
321bf820 2476 video_subtitles = self.extract_subtitles(
2477 video_id, video_webpage, has_live_chat_replay)
7e72694b
S
2478 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2479
2480 video_duration = try_get(
2481 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2482 if not video_duration:
2483 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2484 if not video_duration:
2485 video_duration = parse_duration(self._html_search_meta(
2486 'duration', video_webpage, 'video duration'))
2487
b84071c0
JP
2488 # Get Subscriber Count of channel
2489 subscriber_count = parse_count(self._search_regex(
2490 r'"text":"([\d\.]+\w?) subscribers"',
2491 video_webpage,
2492 'subscriber count',
2493 default=None
2494 ))
2495
7e72694b
S
2496 # annotations
2497 video_annotations = None
2498 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2499 xsrf_token = self._search_regex(
2500 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2501 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2502 invideo_url = try_get(
2503 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2504 if xsrf_token and invideo_url:
2505 xsrf_field_name = self._search_regex(
2506 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2507 video_webpage, 'xsrf field name',
2508 group='xsrf_field_name', default='session_token')
2509 video_annotations = self._download_webpage(
2510 self._proto_relative_url(invideo_url),
2511 video_id, note='Downloading annotations',
2512 errnote='Unable to download video annotations', fatal=False,
2513 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2514
84213ea8 2515 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2516
dd27fd17 2517 # Look for the DASH manifest
203fb43f 2518 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2519 dash_mpd_fatal = True
8ff648e4 2520 for mpd_url in dash_mpds:
d8d24a92 2521 dash_formats = {}
774e208f 2522 try:
05d0d131
YCH
2523 def decrypt_sig(mobj):
2524 s = mobj.group(1)
2525 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2526 return '/signature/%s' % dec_s
2527
8ff648e4 2528 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2529
8ff648e4 2530 for df in self._extract_mpd_formats(
2531 mpd_url, video_id, fatal=dash_mpd_fatal,
2532 formats_dict=self._formats):
c63ca0ee
S
2533 if not df.get('filesize'):
2534 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2535 # Do not overwrite DASH format found in some previous DASH manifest
2536 if df['format_id'] not in dash_formats:
2537 dash_formats[df['format_id']] = df
77c6fb5b
S
2538 # Additional DASH manifests may end up in HTTP Error 403 therefore
2539 # allow them to fail without bug report message if we already have
2540 # some DASH manifest succeeded. This is temporary workaround to reduce
2541 # burst of bug reports until we figure out the reason and whether it
2542 # can be fixed at all.
2543 dash_mpd_fatal = False
774e208f
PH
2544 except (ExtractorError, KeyError) as e:
2545 self.report_warning(
2546 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2547 if dash_formats:
04b3b3df
JMF
2548 # Remove the formats we found through non-DASH, they
2549 # contain less info and it can be wrong, because we use
2550 # fixed values (for example the resolution). See
067aa17e 2551 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2552 # example.
d80265cc 2553 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2554 formats.extend(dash_formats.values())
d80044c2 2555
6271f1ca
PH
2556 # Check for malformed aspect ratio
2557 stretched_m = re.search(
2558 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2559 video_webpage)
2560 if stretched_m:
313dfc45
LL
2561 w = float(stretched_m.group('w'))
2562 h = float(stretched_m.group('h'))
5faf9fed
S
2563 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2564 # We will only process correct ratios.
313dfc45 2565 if w > 0 and h > 0:
41f24c32 2566 ratio = w / h
313dfc45
LL
2567 for f in formats:
2568 if f.get('vcodec') != 'none':
2569 f['stretched_ratio'] = ratio
6271f1ca 2570
026fbedc 2571 if not formats:
43ebf77d
S
2572 if 'reason' in video_info:
2573 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2574 regions_allowed = self._html_search_meta(
2575 'regionsAllowed', video_webpage, default=None)
2576 countries = regions_allowed.split(',') if regions_allowed else None
2577 self.raise_geo_restricted(
2578 msg=video_info['reason'][0], countries=countries)
2579 reason = video_info['reason'][0]
2580 if 'Invalid parameters' in reason:
2581 unavailable_message = extract_unavailable_message()
2582 if unavailable_message:
2583 reason = unavailable_message
2584 raise ExtractorError(
2585 'YouTube said: %s' % reason,
2586 expected=True, video_id=video_id)
2587 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2588 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2589
4bcc7bd1 2590 self._sort_formats(formats)
4ea3be0a 2591
21c340b8 2592 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2593
4ea3be0a 2594 return {
8bcc8756
JW
2595 'id': video_id,
2596 'uploader': video_uploader,
2597 'uploader_id': video_uploader_id,
fd050249 2598 'uploader_url': video_uploader_url,
dd4c4492
S
2599 'channel_id': channel_id,
2600 'channel_url': channel_url,
8bcc8756 2601 'upload_date': upload_date,
7caf9830 2602 'license': video_license,
936784b2 2603 'creator': video_creator or artist,
8bcc8756 2604 'title': video_title,
936784b2 2605 'alt_title': video_alt_title or track,
b477fc13 2606 'thumbnails': thumbnails,
8bcc8756
JW
2607 'description': video_description,
2608 'categories': video_categories,
000b6b5a 2609 'tags': video_tags,
8bcc8756 2610 'subtitles': video_subtitles,
360e1ca5 2611 'automatic_captions': automatic_captions,
8bcc8756
JW
2612 'duration': video_duration,
2613 'age_limit': 18 if age_gate else 0,
2614 'annotations': video_annotations,
9cafc3fd 2615 'chapters': chapters,
7e8c0af0 2616 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2617 'view_count': view_count,
4ea3be0a 2618 'like_count': like_count,
2619 'dislike_count': dislike_count,
bf3c9326 2620 'average_rating': average_rating,
8bcc8756 2621 'formats': formats,
2fe1ff85 2622 'is_live': is_live,
7c80519c 2623 'start_time': start_time,
297a564b 2624 'end_time': end_time,
12afdc2a
S
2625 'series': series,
2626 'season_number': season_number,
2627 'episode_number': episode_number,
936784b2
S
2628 'track': track,
2629 'artist': artist,
5caabd3c 2630 'album': album,
2631 'release_date': release_date,
2632 'release_year': release_year,
b84071c0 2633 'subscriber_count': subscriber_count,
4ea3be0a 2634 }
c5e8d7af 2635
5f6a1245 2636
8e7aad20 2637class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2638 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2639 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2640 (?:https?://)?
2641 (?:\w+\.)?
c5e8d7af 2642 (?:
c0345b82 2643 (?:
66b48727 2644 youtube(?:kids)?\.com|
c0345b82
S
2645 invidio\.us
2646 )
2647 /
feaa5ad7 2648 (?:
87dadd45 2649 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2650 \? (?:.*?[&;])*? (?:p|a|list)=
2651 | p/
2652 )|
2653 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2654 )
d67cc9fa 2655 (
66b48727 2656 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2657 # Top tracks, they can also include dots
d67cc9fa
JMF
2658 |(?:MC)[\w\.]*
2659 )
c5e8d7af
PH
2660 .*
2661 |
d0ba5587
S
2662 (%(playlist_id)s)
2663 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2664 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2665 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2666 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2667 IE_NAME = 'youtube:playlist'
81127aa5 2668 _TESTS = [{
0e30a7b9 2669 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2670 'info_dict': {
0e30a7b9 2671 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2672 'uploader': 'Sergey M.',
2673 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2674 'title': 'youtube-dl public playlist',
81127aa5 2675 },
0e30a7b9 2676 'playlist_count': 1,
9291475f 2677 }, {
0e30a7b9 2678 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2679 'info_dict': {
0e30a7b9 2680 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2681 'uploader': 'Sergey M.',
2682 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2683 'title': 'youtube-dl empty playlist',
9291475f
PH
2684 },
2685 'playlist_count': 0,
2686 }, {
2687 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2688 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2689 'info_dict': {
2690 'title': '29C3: Not my department',
acf757f4 2691 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2692 'uploader': 'Christiaan008',
2693 'uploader_id': 'ChRiStIaAn008',
9291475f 2694 },
0e30a7b9 2695 'playlist_count': 96,
9291475f
PH
2696 }, {
2697 'note': 'issue #673',
2698 'url': 'PLBB231211A4F62143',
2699 'info_dict': {
f46a8702 2700 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2701 'id': 'PLBB231211A4F62143',
13a75688
S
2702 'uploader': 'Wickydoo',
2703 'uploader_id': 'Wickydoo',
9291475f
PH
2704 },
2705 'playlist_mincount': 26,
2706 }, {
2707 'note': 'Large playlist',
2708 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2709 'info_dict': {
2710 'title': 'Uploads from Cauchemar',
acf757f4 2711 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2712 'uploader': 'Cauchemar',
2713 'uploader_id': 'Cauchemar89',
9291475f
PH
2714 },
2715 'playlist_mincount': 799,
2716 }, {
2717 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2718 'info_dict': {
2719 'title': 'YDL_safe_search',
acf757f4 2720 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2721 },
2722 'playlist_count': 2,
4201ba13 2723 'skip': 'This playlist is private',
ac7553d0
PH
2724 }, {
2725 'note': 'embedded',
2d3d2997 2726 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2727 'playlist_count': 4,
2728 'info_dict': {
2729 'title': 'JODA15',
acf757f4 2730 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2731 'uploader': 'milan',
2732 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2733 }
87dadd45
S
2734 }, {
2735 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2736 'playlist_mincount': 485,
2737 'info_dict': {
13a75688 2738 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2739 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2740 'uploader': 'LBK',
2741 'uploader_id': 'sdragonfang',
87dadd45 2742 }
6b08cdf6
PH
2743 }, {
2744 'note': 'Embedded SWF player',
2d3d2997 2745 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2746 'playlist_count': 4,
2747 'info_dict': {
2748 'title': 'JODA7',
acf757f4 2749 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2750 },
2751 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2752 }, {
2753 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2754 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2755 'info_dict': {
acf757f4
PH
2756 'title': 'Uploads from Interstellar Movie',
2757 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2758 'uploader': 'Interstellar Movie',
2759 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2760 },
481cc733 2761 'playlist_mincount': 21,
dacb3a86
S
2762 }, {
2763 # Playlist URL that does not actually serve a playlist
2764 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2765 'info_dict': {
2766 'id': 'FqZTN594JQw',
2767 'ext': 'webm',
2768 'title': "Smiley's People 01 detective, Adventure Series, Action",
2769 'uploader': 'STREEM',
2770 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2771 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2772 'upload_date': '20150526',
2773 'license': 'Standard YouTube License',
2774 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2775 'categories': ['People & Blogs'],
2776 'tags': list,
dbdaaa23 2777 'view_count': int,
dacb3a86
S
2778 'like_count': int,
2779 'dislike_count': int,
2780 },
2781 'params': {
2782 'skip_download': True,
2783 },
13a75688 2784 'skip': 'This video is not available.',
dacb3a86 2785 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2786 }, {
2787 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2788 'info_dict': {
2789 'id': 'yeWKywCrFtk',
2790 'ext': 'mp4',
2791 'title': 'Small Scale Baler and Braiding Rugs',
2792 'uploader': 'Backus-Page House Museum',
2793 'uploader_id': 'backuspagemuseum',
ec85ded8 2794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2795 'upload_date': '20161008',
481cc733
S
2796 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2797 'categories': ['Nonprofits & Activism'],
2798 'tags': list,
2799 'like_count': int,
2800 'dislike_count': int,
2801 },
2802 'params': {
2803 'noplaylist': True,
2804 'skip_download': True,
2805 },
2e18adec
S
2806 }, {
2807 # https://github.com/ytdl-org/youtube-dl/issues/21844
2808 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2809 'info_dict': {
2810 'title': 'Data Analysis with Dr Mike Pound',
2811 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2812 'uploader_id': 'Computerphile',
2813 'uploader': 'Computerphile',
2814 },
2815 'playlist_mincount': 11,
feaa5ad7
S
2816 }, {
2817 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2818 'only_matching': True,
a6857510
S
2819 }, {
2820 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2821 'only_matching': True,
409b9324
S
2822 }, {
2823 # music album playlist
2824 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2825 'only_matching': True,
c0345b82
S
2826 }, {
2827 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2828 'only_matching': True,
66b48727
RA
2829 }, {
2830 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2831 'only_matching': True,
81127aa5 2832 }]
c5e8d7af 2833
880e1c52
JMF
2834 def _real_initialize(self):
2835 self._login()
2836
351f37c0
S
2837 def extract_videos_from_page(self, page):
2838 ids_in_page = []
2839 titles_in_page = []
2840
2841 for item in re.findall(
2842 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2843 attrs = extract_attributes(item)
2844 video_id = attrs['data-video-id']
2845 video_title = unescapeHTML(attrs.get('data-title'))
2846 if video_title:
2847 video_title = video_title.strip()
2848 ids_in_page.append(video_id)
2849 titles_in_page.append(video_title)
2850
2851 # Fallback with old _VIDEO_RE
2852 self.extract_videos_from_page_impl(
2853 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2854
2855 # Relaxed fallbacks
2856 self.extract_videos_from_page_impl(
2857 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2858 ids_in_page, titles_in_page)
2859 self.extract_videos_from_page_impl(
2860 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2861 ids_in_page, titles_in_page)
2862
2863 return zip(ids_in_page, titles_in_page)
2864
652cdaa2 2865 def _extract_mix(self, playlist_id):
99209c29 2866 # The mixes are generated from a single video
652cdaa2 2867 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2868 ids = []
2869 last_id = playlist_id[-11:]
2870 for n in itertools.count(1):
07af16b9 2871 url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1b6182d8
JMF
2872 webpage = self._download_webpage(
2873 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2874 new_ids = orderedSet(re.findall(
2875 r'''(?xs)data-video-username=".*?".*?
2876 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2877 webpage))
2878 # Fetch new pages until all the videos are repeated, it seems that
2879 # there are always 51 unique videos.
2880 new_ids = [_id for _id in new_ids if _id not in ids]
2881 if not new_ids:
2882 break
2883 ids.extend(new_ids)
2884 last_id = ids[-1]
2885
2886 url_results = self._ids_to_results(ids)
2887
bc2f773b 2888 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2889 title_span = (
3089bc74
S
2890 search_title('playlist-title')
2891 or search_title('title long-title')
2892 or search_title('title'))
76d1700b 2893 title = clean_html(title_span)
652cdaa2
JMF
2894
2895 return self.playlist_result(url_results, playlist_id, title)
2896
448830ce 2897 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2898 url = self._TEMPLATE_URL % playlist_id
2899 page = self._download_webpage(url, playlist_id)
dbb94fb0 2900
067aa17e 2901 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2902 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2903 match = match.strip()
2904 # Check if the playlist exists or is private
4201ba13
S
2905 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2906 if mobj:
2907 reason = mobj.group('reason')
2908 message = 'This playlist %s' % reason
2909 if 'private' in reason:
2910 message += ', use --username or --netrc to access it'
2911 message += '.'
2912 raise ExtractorError(message, expected=True)
39b62db1
YCH
2913 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2914 raise ExtractorError(
2915 'Invalid parameters. Maybe URL is incorrect.',
2916 expected=True)
2917 elif re.match(r'[^<]*Choose your language[^<]*', match):
2918 continue
2919 else:
2920 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2921
dbb94fb0 2922 playlist_title = self._html_search_regex(
63b4295d 2923 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2924 page, 'title', default=None)
c5e8d7af 2925
07aeced6 2926 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2927 uploader = self._html_search_regex(
07aeced6
S
2928 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2929 page, 'uploader', default=None)
2930 mobj = re.search(
2931 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2932 page)
2933 if mobj:
2934 uploader_id = mobj.group('uploader_id')
2935 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2936 else:
2937 uploader_id = uploader_url = None
2938
dacb3a86
S
2939 has_videos = True
2940
2941 if not playlist_title:
2942 try:
2943 # Some playlist URLs don't actually serve a playlist (e.g.
2944 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2945 next(self._entries(page, playlist_id))
2946 except StopIteration:
2947 has_videos = False
2948
07aeced6 2949 playlist = self.playlist_result(
dacb3a86 2950 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2951 playlist.update({
2952 'uploader': uploader,
2953 'uploader_id': uploader_id,
2954 'uploader_url': uploader_url,
2955 })
2956
2957 return has_videos, playlist
c5e8d7af 2958
ebf1b291 2959 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2960 # Check if it's a video-specific URL
2961 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2962 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2963 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2964 'video id', default=None)
2965 if video_id:
448830ce
S
2966 if self._downloader.params.get('noplaylist'):
2967 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2968 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2969 else:
2970 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2971 return video_id, None
2972 return None, None
448830ce 2973
ebf1b291
S
2974 def _real_extract(self, url):
2975 # Extract playlist id
2976 mobj = re.match(self._VALID_URL, url)
2977 if mobj is None:
2978 raise ExtractorError('Invalid URL: %s' % url)
2979 playlist_id = mobj.group(1) or mobj.group(2)
2980
dacb3a86 2981 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2982 if video:
2983 return video
2984
466a6145 2985 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2986 # Mixes require a custom extraction process
2987 return self._extract_mix(playlist_id)
2988
dacb3a86
S
2989 has_videos, playlist = self._extract_playlist(playlist_id)
2990 if has_videos or not video_id:
2991 return playlist
2992
2993 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2994 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2995 # Fallback to plain video extraction if there is a video id
2996 # along with playlist id.
2997 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2998
c5e8d7af 2999
648e6a1f 3000class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 3001 IE_DESC = 'YouTube.com channels'
66b48727 3002 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 3003 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 3004 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 3005 IE_NAME = 'youtube:channel'
cdc628a4
PH
3006 _TESTS = [{
3007 'note': 'paginated channel',
3008 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
3009 'playlist_mincount': 91,
acf757f4 3010 'info_dict': {
9170ca5b
JMF
3011 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
3012 'title': 'Uploads from lex will',
13a75688
S
3013 'uploader': 'lex will',
3014 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 3015 }
5c43afd4
JMF
3016 }, {
3017 'note': 'Age restricted channel',
3018 # from https://www.youtube.com/user/DeusExOfficial
3019 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3020 'playlist_mincount': 64,
3021 'info_dict': {
3022 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3023 'title': 'Uploads from Deus Ex',
13a75688
S
3024 'uploader': 'Deus Ex',
3025 'uploader_id': 'DeusExOfficial',
5c43afd4 3026 },
cd5a74a2
S
3027 }, {
3028 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3029 'only_matching': True,
66b48727
RA
3030 }, {
3031 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3032 'only_matching': True,
cdc628a4 3033 }]
c5e8d7af 3034
e462474e
S
3035 @classmethod
3036 def suitable(cls, url):
f07e276a
S
3037 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3038 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 3039
9558dcec
S
3040 def _build_template_url(self, url, channel_id):
3041 return self._TEMPLATE_URL % channel_id
3042
c5e8d7af 3043 def _real_extract(self, url):
9ff67727 3044 channel_id = self._match_id(url)
c5e8d7af 3045
9558dcec 3046 url = self._build_template_url(url, channel_id)
386bdfa6
S
3047
3048 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3049 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3050 # otherwise fallback on channel by page extraction
3051 channel_page = self._download_webpage(
3052 url + '?view=57', channel_id,
3053 'Downloading channel page', fatal=False)
2b3c2546
PH
3054 if channel_page is False:
3055 channel_playlist_id = False
3056 else:
3057 channel_playlist_id = self._html_search_meta(
3058 'channelId', channel_page, 'channel id', default=None)
3059 if not channel_playlist_id:
73c4ac2c
S
3060 channel_url = self._html_search_meta(
3061 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3062 channel_page, 'channel url', default=None)
3063 if channel_url:
3064 channel_playlist_id = self._search_regex(
3065 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3066 channel_url, 'channel id', default=None)
386bdfa6
S
3067 if channel_playlist_id and channel_playlist_id.startswith('UC'):
3068 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
3069 return self.url_result(
3070 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 3071
60bf45c8 3072 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
3073 autogenerated = re.search(r'''(?x)
3074 class="[^"]*?(?:
3075 channel-header-autogenerated-label|
3076 yt-channel-title-autogenerated
3077 )[^"]*"''', channel_page) is not None
c5e8d7af 3078
b9643eed
JMF
3079 if autogenerated:
3080 # The videos are contained in a single page
3081 # the ajax pages can't be used, they are empty
b82f815f 3082 entries = [
fb69240c
S
3083 self.url_result(
3084 video_id, 'Youtube', video_id=video_id,
3085 video_title=video_title)
8f02ad4f 3086 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
3087 return self.playlist_result(entries, channel_id)
3088
73c4ac2c
S
3089 try:
3090 next(self._entries(channel_page, channel_id))
3091 except StopIteration:
3092 alert_message = self._html_search_regex(
3093 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3094 channel_page, 'alert', default=None, group='alert')
3095 if alert_message:
3096 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3097
648e6a1f 3098 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
3099
3100
eb0f3e7e 3101class YoutubeUserIE(YoutubeChannelIE):
78caa52a 3102 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
39e7107d 3103 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
9558dcec 3104 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 3105 IE_NAME = 'youtube:user'
c5e8d7af 3106
cdc628a4
PH
3107 _TESTS = [{
3108 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3109 'playlist_mincount': 320,
3110 'info_dict': {
73c4ac2c
S
3111 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3112 'title': 'Uploads from The Linux Foundation',
13a75688
S
3113 'uploader': 'The Linux Foundation',
3114 'uploader_id': 'TheLinuxFoundation',
cdc628a4 3115 }
9558dcec
S
3116 }, {
3117 # Only available via https://www.youtube.com/c/12minuteathlete/videos
3118 # but not https://www.youtube.com/user/12minuteathlete/videos
3119 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3120 'playlist_mincount': 249,
3121 'info_dict': {
3122 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3123 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
3124 'uploader': '12 Minute Athlete',
3125 'uploader_id': 'the12minuteathlete',
9558dcec 3126 }
cdc628a4
PH
3127 }, {
3128 'url': 'ytuser:phihag',
3129 'only_matching': True,
daa0df9e
YCH
3130 }, {
3131 'url': 'https://www.youtube.com/c/gametrailers',
3132 'only_matching': True,
39e7107d
U
3133 }, {
3134 'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3135 'only_matching': True,
9558dcec
S
3136 }, {
3137 'url': 'https://www.youtube.com/gametrailers',
3138 'only_matching': True,
73c4ac2c 3139 }, {
0e879f43 3140 # This channel is not available, geo restricted to JP
73c4ac2c
S
3141 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3142 'only_matching': True,
cdc628a4
PH
3143 }]
3144
e3ea4790 3145 @classmethod
f4b05232 3146 def suitable(cls, url):
e3ea4790
JMF
3147 # Don't return True if the url can be extracted with other youtube
3148 # extractor, the regex would is too permissive and it would match.
f3a58d46 3149 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3150 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
3151 return False
3152 else:
3153 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 3154
9558dcec
S
3155 def _build_template_url(self, url, channel_id):
3156 mobj = re.match(self._VALID_URL, url)
3157 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3158
b05654f0 3159
f07e276a
S
3160class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3161 IE_DESC = 'YouTube.com live streams'
073d5bf5 3162 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
3163 IE_NAME = 'youtube:live'
3164
3165 _TESTS = [{
2d3d2997 3166 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
3167 'info_dict': {
3168 'id': 'a48o2S1cPoo',
3169 'ext': 'mp4',
3170 'title': 'The Young Turks - Live Main Show',
3171 'uploader': 'The Young Turks',
3172 'uploader_id': 'TheYoungTurks',
ec85ded8 3173 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
3174 'upload_date': '20150715',
3175 'license': 'Standard YouTube License',
3176 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3177 'categories': ['News & Politics'],
3178 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3179 'like_count': int,
3180 'dislike_count': int,
3181 },
3182 'params': {
3183 'skip_download': True,
3184 },
3185 }, {
2d3d2997 3186 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3187 'only_matching': True,
c1b2a085
S
3188 }, {
3189 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3190 'only_matching': True,
073d5bf5
S
3191 }, {
3192 'url': 'https://www.youtube.com/TheYoungTurks/live',
3193 'only_matching': True,
f07e276a
S
3194 }]
3195
3196 def _real_extract(self, url):
3197 mobj = re.match(self._VALID_URL, url)
3198 channel_id = mobj.group('id')
3199 base_url = mobj.group('base_url')
3200 webpage = self._download_webpage(url, channel_id, fatal=False)
3201 if webpage:
3202 page_type = self._og_search_property(
e7f3529f 3203 'type', webpage, 'page type', default='')
f07e276a
S
3204 video_id = self._html_search_meta(
3205 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3206 if page_type.startswith('video') and video_id and re.match(
3207 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3208 return self.url_result(video_id, YoutubeIE.ie_key())
3209 return self.url_result(base_url)
3210
3211
e462474e
S
3212class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3213 IE_DESC = 'YouTube.com user/channel playlists'
e942cfd1 3214 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
e462474e 3215 IE_NAME = 'youtube:playlists'
0c148415 3216
e568c223 3217 _TESTS = [{
2d3d2997 3218 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3219 'playlist_mincount': 4,
3220 'info_dict': {
3221 'id': 'ThirstForScience',
13a75688 3222 'title': 'ThirstForScience',
0c148415 3223 },
e568c223
S
3224 }, {
3225 # with "Load more" button
2d3d2997 3226 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3227 'playlist_mincount': 70,
3228 'info_dict': {
3229 'id': 'igorkle1',
3230 'title': 'Игорь Клейнер',
3231 },
e462474e
S
3232 }, {
3233 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3234 'playlist_mincount': 17,
3235 'info_dict': {
3236 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3237 'title': 'Chem Player',
3238 },
13a75688 3239 'skip': 'Blocked',
e942cfd1
S
3240 }, {
3241 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3242 'only_matching': True,
e568c223 3243 }]
0c148415
S
3244
3245
870f3bfc
S
3246class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3247 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3248
3249
3250class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3251 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3252 # there doesn't appear to be a real limit, for example if you search for
3253 # 'python' you get more than 8.000.000 results
3254 _MAX_RESULTS = float('inf')
78caa52a 3255 IE_NAME = 'youtube:search'
b05654f0 3256 _SEARCH_KEY = 'ytsearch'
b4c08069 3257 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3258 _TESTS = []
b05654f0 3259
b05654f0
PH
3260 def _get_n_results(self, query, n):
3261 """Get a specified number of results for a query"""
3262
b4c08069 3263 videos = []
b05654f0
PH
3264 limit = n
3265
a22b2fd1
YCH
3266 url_query = {
3267 'search_query': query.encode('utf-8'),
3268 }
3269 url_query.update(self._EXTRA_QUERY_ARGS)
3270 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3271
b4c08069 3272 for pagenum in itertools.count(1):
b4c08069 3273 data = self._download_json(
69ea8ca4 3274 result_url, video_id='query "%s"' % query,
b4c08069 3275 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3276 errnote='Unable to download API page',
3277 query={'spf': 'navigate'})
b4c08069 3278 html_content = data[1]['body']['content']
7cc3570e 3279
b4c08069 3280 if 'class="search-message' in html_content:
07ad22b8 3281 raise ExtractorError(
78caa52a 3282 '[youtube] No video results', expected=True)
b05654f0 3283
870f3bfc 3284 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3285 videos += new_videos
3286 if not new_videos or len(videos) > limit:
3287 break
a22b2fd1
YCH
3288 next_link = self._html_search_regex(
3289 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3290 html_content, 'next link', default=None)
3291 if next_link is None:
3292 break
3293 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3294
b4c08069
JMF
3295 if len(videos) > n:
3296 videos = videos[:n]
b05654f0 3297 return self.playlist_result(videos, query)
75dff0ee 3298
c9ae7b95 3299
a3dd9248 3300class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3301 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3302 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3303 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3304 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3305
c9ae7b95 3306
870f3bfc 3307class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3308 IE_DESC = 'YouTube.com search URLs'
3309 IE_NAME = 'youtube:search_url'
d2c1f79f 3310 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
c0a1a892 3311 _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
cdc628a4 3312 _TESTS = [{
3867038a 3313 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
cdc628a4
PH
3314 'playlist_mincount': 5,
3315 'info_dict': {
3867038a 3316 'title': 'youtube-dl test video',
cdc628a4 3317 }
d2c1f79f
S
3318 }, {
3319 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3320 'only_matching': True,
cdc628a4 3321 }]
c9ae7b95 3322
e03b4f3e 3323 def _find_videos_in_json(self, extracted):
3324 videos = []
3325
3326 def _real_find(obj):
3327 if obj is None or isinstance(obj, str):
3328 return
3329
3330 if type(obj) is list:
3331 for elem in obj:
3332 _real_find(elem)
3333
3334 if type(obj) is dict:
3335 if "videoId" in obj:
3336 videos.append(obj)
3337 return
3338
3339 for _, o in obj.items():
3340 _real_find(o)
3341
3342 _real_find(extracted)
3343
3344 return videos
3345
19f671f8 3346 def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3347 search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3348
e03b4f3e 3349 result_items = self._find_videos_in_json(search_response)
19f671f8 3350
955c4cb6 3351 for renderer in result_items:
3352 video_id = try_get(renderer, lambda x: x['videoId'])
3353 video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
19f671f8 3354
3355 if video_id is None or video_title is None:
955c4cb6 3356 # we do not have a videoRenderer or title extraction broke
19f671f8 3357 continue
3358
3359 video_title = video_title.strip()
3360
3361 try:
3362 idx = ids_in_page.index(video_id)
3363 if video_title and not titles_in_page[idx]:
3364 titles_in_page[idx] = video_title
3365 except ValueError:
3366 ids_in_page.append(video_id)
3367 titles_in_page.append(video_title)
3368
3369 def extract_videos_from_page(self, page):
3370 ids_in_page = []
3371 titles_in_page = []
3372 self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3373 return zip(ids_in_page, titles_in_page)
3374
c9ae7b95
PH
3375 def _real_extract(self, url):
3376 mobj = re.match(self._VALID_URL, url)
7fd002c0 3377 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3378 webpage = self._download_webpage(url, query)
175c2e9e 3379 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3380
3381
136dadde 3382class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3383 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3384 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3385 IE_NAME = 'youtube:show'
cdc628a4 3386 _TESTS = [{
4003bd82 3387 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3388 'playlist_mincount': 5,
cdc628a4
PH
3389 'info_dict': {
3390 'id': 'airdisasters',
3391 'title': 'Air Disasters',
3392 }
3393 }]
75dff0ee
JMF
3394
3395 def _real_extract(self, url):
136dadde
S
3396 playlist_id = self._match_id(url)
3397 return super(YoutubeShowIE, self)._real_extract(
3398 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3399
3400
b2e8bc1b 3401class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3402 """
25f14e9f 3403 Base class for feed extractors
d7ae0639
JMF
3404 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3405 """
b2e8bc1b 3406 _LOGIN_REQUIRED = True
bea9b005 3407 _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
f5360807 3408 _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
d7ae0639
JMF
3409
3410 @property
3411 def IE_NAME(self):
78caa52a 3412 return 'youtube:%s' % self._FEED_NAME
04cc9617 3413
81f0259b 3414 def _real_initialize(self):
b2e8bc1b 3415 self._login()
81f0259b 3416
5c430b67 3417 def _find_videos_in_json(self, extracted):
3418 videos = []
299056ad 3419 c = {}
5c430b67 3420
3421 def _real_find(obj):
3422 if obj is None or isinstance(obj, str):
3423 return
3424
3425 if type(obj) is list:
3426 for elem in obj:
3427 _real_find(elem)
3428
3429 if type(obj) is dict:
3430 if "videoId" in obj:
3431 videos.append(obj)
3432 return
f5360807 3433
5c430b67 3434 if "nextContinuationData" in obj:
299056ad 3435 c["continuation"] = obj["nextContinuationData"]
f5360807 3436 return
3437
5c430b67 3438 for _, o in obj.items():
3439 _real_find(o)
3440
3441 _real_find(extracted)
3442
299056ad 3443 return videos, try_get(c, lambda x: x["continuation"])
f5360807 3444
3853309f 3445 def _entries(self, page):
5c430b67 3446 info = []
3447
1f93faf6 3448 yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
5c430b67 3449
3450 search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3451
2bc43303 3452 for page_num in itertools.count(1):
5c430b67 3453 video_info, continuation = self._find_videos_in_json(search_response)
62c95fd5 3454
f5360807 3455 new_info = []
5c430b67 3456
3457 for v in video_info:
3458 v_id = try_get(v, lambda x: x['videoId'])
3459 if not v_id:
3460 continue
3461
f5360807 3462 have_video = False
5c430b67 3463 for old in info:
3464 if old['videoId'] == v_id:
3465 have_video = True
3466 break
3467
3468 if not have_video:
3469 new_info.append(v)
3470
3471 if not new_info:
62c95fd5
S
3472 break
3473
5c430b67 3474 info.extend(new_info)
2bc43303 3475
5c430b67 3476 for video in new_info:
f442082a 3477 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3853309f 3478
1f93faf6 3479 if not continuation or not yt_conf:
2bc43303
JMF
3480 break
3481
5c430b67 3482 search_response = self._download_json(
3483 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
2bc43303 3484 'Downloading page #%s' % page_num,
d84b21b4 3485 transform_source=uppercase_escape,
5c430b67 3486 query={
3487 "ctoken": try_get(continuation, lambda x: x["continuation"]),
3488 "continuation": try_get(continuation, lambda x: x["continuation"]),
3489 "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3490 },
3491 headers={
3492 "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3493 "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3494 "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3495 "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3496 "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3497 "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
f5360807 3498 "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
5c430b67 3499 })
2bc43303 3500
3853309f
S
3501 def _real_extract(self, url):
3502 page = self._download_webpage(
3503 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3504 self._PLAYLIST_TITLE)
25f14e9f 3505 return self.playlist_result(
3853309f 3506 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3507
3508
3509class YoutubeWatchLaterIE(YoutubePlaylistIE):
3510 IE_NAME = 'youtube:watchlater'
3511 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3512 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3513
bc7a9cd8
S
3514 _TESTS = [{
3515 'url': 'https://www.youtube.com/playlist?list=WL',
3516 'only_matching': True,
3517 }, {
3518 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3519 'only_matching': True,
3520 }]
25f14e9f
S
3521
3522 def _real_extract(self, url):
7e5dc339 3523 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3524 if video:
3525 return video
dacb3a86
S
3526 _, playlist = self._extract_playlist('WL')
3527 return playlist
f459d170 3528
5f6a1245 3529
c626a3d9 3530class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3531 IE_NAME = 'youtube:favorites'
f3a34072 3532 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3533 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3534 _LOGIN_REQUIRED = True
3535
3536 def _real_extract(self, url):
3537 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3538 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3539 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3540
3541
25f14e9f
S
3542class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3543 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3544 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3545 _FEED_NAME = 'recommended'
3546 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3547
1ed5b5c9 3548
25f14e9f
S
3549class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3550 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3551 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3552 _FEED_NAME = 'subscriptions'
3553 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3554
1ed5b5c9 3555
25f14e9f
S
3556class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3557 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3558 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3559 _FEED_NAME = 'history'
3560 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3561
3562
15870e90
PH
3563class YoutubeTruncatedURLIE(InfoExtractor):
3564 IE_NAME = 'youtube:truncated_url'
3565 IE_DESC = False # Do not list
975d35db 3566 _VALID_URL = r'''(?x)
b95aab84
PH
3567 (?:https?://)?
3568 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3569 (?:watch\?(?:
c4808c60 3570 feature=[a-z_]+|
b95aab84
PH
3571 annotation_id=annotation_[^&]+|
3572 x-yt-cl=[0-9]+|
c1708b89 3573 hl=[^&]*|
287be8c6 3574 t=[0-9]+
b95aab84
PH
3575 )?
3576 |
3577 attribution_link\?a=[^&]+
3578 )
3579 $
975d35db 3580 '''
15870e90 3581
c4808c60 3582 _TESTS = [{
2d3d2997 3583 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3584 'only_matching': True,
dc2fc736 3585 }, {
2d3d2997 3586 'url': 'https://www.youtube.com/watch?',
dc2fc736 3587 'only_matching': True,
b95aab84
PH
3588 }, {
3589 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3590 'only_matching': True,
3591 }, {
3592 'url': 'https://www.youtube.com/watch?feature=foo',
3593 'only_matching': True,
c1708b89
PH
3594 }, {
3595 'url': 'https://www.youtube.com/watch?hl=en-GB',
3596 'only_matching': True,
287be8c6
PH
3597 }, {
3598 'url': 'https://www.youtube.com/watch?t=2372',
3599 'only_matching': True,
c4808c60
PH
3600 }]
3601
15870e90
PH
3602 def _real_extract(self, url):
3603 raise ExtractorError(
78caa52a
PH
3604 'Did you forget to quote the URL? Remember that & is a meta '
3605 'character in most shells, so you want to put the URL in quotes, '
3867038a 3606 'like youtube-dl '
2d3d2997 3607 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3608 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3609 expected=True)
772fd5cc
PH
3610
3611
3612class YoutubeTruncatedIDIE(InfoExtractor):
3613 IE_NAME = 'youtube:truncated_id'
3614 IE_DESC = False # Do not list
b95aab84 3615 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3616
3617 _TESTS = [{
3618 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3619 'only_matching': True,
3620 }]
3621
3622 def _real_extract(self, url):
3623 video_id = self._match_id(url)
3624 raise ExtractorError(
3625 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3626 expected=True)