]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
Merge remote-tracking branch 'origin/master'
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
351f37c0 33 extract_attributes,
c5e8d7af 34 ExtractorError,
2d30521a 35 float_or_none,
4bb4a188
PH
36 get_element_by_attribute,
37 get_element_by_id,
dd27fd17 38 int_or_none,
94278f72 39 mimetype2ext,
4bb4a188 40 orderedSet,
6310acf5 41 parse_codecs,
b84071c0 42 parse_count,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
66b48727 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
d84b21b4
S
74 _YOUTUBE_CLIENT_HEADERS = {
75 'x-youtube-client-name': '1',
76 'x-youtube-client-version': '1.20200609.04.02',
77 }
78
b2e8bc1b 79 def _set_language(self):
810fb84d 80 self._set_cookie(
ee0b726c 81 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 82 # YouTube sets the expire time to about two months
810fb84d 83 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 84
25f14e9f
S
85 def _ids_to_results(self, ids):
86 return [
87 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 for vid_id in ids]
89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
68217024 98 username, password = self._get_login_info()
b2e8bc1b
JMF
99 # No authentication to be performed
100 if username is None:
70d35d16 101 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 102 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
103 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
104 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 105 return True
b2e8bc1b 106
7cc3570e
PH
107 login_page = self._download_webpage(
108 self._LOGIN_URL, None,
69ea8ca4
PH
109 note='Downloading login page',
110 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
111 if login_page is False:
112 return
b2e8bc1b 113
1212e997 114 login_form = self._hidden_inputs(login_page)
c5e8d7af 115
e00eb564
S
116 def req(url, f_req, note, errnote):
117 data = login_form.copy()
118 data.update({
119 'pstMsg': 1,
120 'checkConnection': 'youtube',
121 'checkedDomains': 'youtube',
122 'hl': 'en',
123 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 124 'f.req': json.dumps(f_req),
e00eb564
S
125 'flowName': 'GlifWebSignIn',
126 'flowEntry': 'ServiceLogin',
baf67a60
S
127 # TODO: reverse actual botguard identifier generation algo
128 'bgRequest': '["identifier",""]',
041bc3ad 129 })
e00eb564
S
130 return self._download_json(
131 url, None, note=note, errnote=errnote,
132 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
133 fatal=False,
134 data=urlencode_postdata(data), headers={
135 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
136 'Google-Accounts-XSRF': 1,
137 })
138
3995d37d
S
139 def warn(message):
140 self._downloader.report_warning(message)
141
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
30226342 275 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
276 query = kwargs.get('query', {}).copy()
277 query['disable_polymer'] = 'true'
278 kwargs['query'] = query
30226342 279 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
280 *args, **compat_kwargs(kwargs))
281
b2e8bc1b
JMF
282 def _real_initialize(self):
283 if self._downloader is None:
284 return
42939b61 285 self._set_language()
b2e8bc1b
JMF
286 if not self._login():
287 return
c5e8d7af 288
8377574c 289
8e7aad20 290class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 291 # Extract entries from page with "Load more" button
648e6a1f
S
292 def _entries(self, page, playlist_id):
293 more_widget_html = content_html = page
294 for page_num in itertools.count(1):
061a75ed
S
295 for entry in self._process_page(content_html):
296 yield entry
648e6a1f
S
297
298 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
299 if not mobj:
300 break
301
f8c55c66
S
302 count = 0
303 retries = 3
304 while count <= retries:
305 try:
306 # Downloading page may result in intermittent 5xx HTTP error
307 # that is usually worked around with a retry
308 more = self._download_json(
07af16b9 309 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
f8c55c66
S
310 'Downloading page #%s%s'
311 % (page_num, ' (retry #%d)' % count if count else ''),
d84b21b4
S
312 transform_source=uppercase_escape,
313 headers=self._YOUTUBE_CLIENT_HEADERS)
f8c55c66
S
314 break
315 except ExtractorError as e:
316 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
317 count += 1
318 if count <= retries:
319 continue
320 raise
321
648e6a1f
S
322 content_html = more['content_html']
323 if not content_html.strip():
324 # Some webpages show a "Load more" button but they don't
325 # have more videos
326 break
327 more_widget_html = more['load_more_widget_html']
328
061a75ed
S
329
330class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
331 def _process_page(self, content):
332 for video_id, video_title in self.extract_videos_from_page(content):
333 yield self.url_result(video_id, 'Youtube', video_id, video_title)
334
351f37c0
S
335 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
336 for mobj in re.finditer(video_re, page):
648e6a1f
S
337 # The link with index 0 is not the first video of the playlist (not sure if still actual)
338 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
339 continue
340 video_id = mobj.group('id')
351f37c0
S
341 video_title = unescapeHTML(
342 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
343 if video_title:
344 video_title = video_title.strip()
351f37c0
S
345 if video_title == '► Play all':
346 video_title = None
648e6a1f
S
347 try:
348 idx = ids_in_page.index(video_id)
349 if video_title and not titles_in_page[idx]:
350 titles_in_page[idx] = video_title
351 except ValueError:
352 ids_in_page.append(video_id)
353 titles_in_page.append(video_title)
351f37c0
S
354
355 def extract_videos_from_page(self, page):
356 ids_in_page = []
357 titles_in_page = []
358 self.extract_videos_from_page_impl(
359 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
360 return zip(ids_in_page, titles_in_page)
361
362
061a75ed
S
363class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
364 def _process_page(self, content):
6dee688e
S
365 for playlist_id in orderedSet(re.findall(
366 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
367 content)):
061a75ed
S
368 yield self.url_result(
369 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
370
0c148415
S
371 def _real_extract(self, url):
372 playlist_id = self._match_id(url)
373 webpage = self._download_webpage(url, playlist_id)
0c148415 374 title = self._og_search_title(webpage, fatal=False)
061a75ed 375 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
376
377
360e1ca5 378class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 379 IE_DESC = 'YouTube.com'
cb7dfeea 380 _VALID_URL = r"""(?x)^
c5e8d7af 381 (
edb53e2d 382 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 383 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 384 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 385 (?:www\.)?pwnyoutube\.com/|
8b561bfc 386 (?:www\.)?hooktube\.com/|
f7000f3a 387 (?:www\.)?yourepeat\.com/|
e69ae5b9 388 tube\.majestyc\.net/|
ba036333 389 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 390 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 391 (?:(?:www|no)\.)?invidiou\.sh/|
392 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 393 (?:www\.)?invidious\.kabi\.tk/|
ba036333 394 (?:www\.)?invidious\.13ad\.de/|
791d2e81 395 (?:www\.)?invidious\.mastodon\.host/|
494d664e 396 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 397 (?:www\.)?invidious\.drycat\.fr/|
ba036333 398 (?:www\.)?tube\.poal\.co/|
8ae113ca 399 (?:www\.)?vid\.wxzm\.sx/|
384bf91f 400 (?:www\.)?yewtu\.be/|
494d664e 401 (?:www\.)?yt\.elukerio\.org/|
894b3826 402 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 403 (?:www\.)?invidious\.ggc-project\.de/|
404 (?:www\.)?yt\.maisputain\.ovh/|
405 (?:www\.)?invidious\.13ad\.de/|
406 (?:www\.)?invidious\.toot\.koeln/|
407 (?:www\.)?invidious\.fdn\.fr/|
408 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 409 (?:www\.)?kgg2m7yk5aybusll\.onion/|
410 (?:www\.)?qklhadlycap4cnod\.onion/|
411 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
412 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
413 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
414 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 415 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 416 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 417 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
418 (?:.*?\#/)? # handle anchor (#/) redirect urls
419 (?: # the various things that can precede the ID:
ac7553d0 420 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 421 |(?: # or the v= param in all its forms
f7000f3a 422 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 423 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 424 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
425 v=
426 )
f4b05232 427 ))
cbaed4bb
S
428 |(?:
429 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
430 vid\.plus| # or vid.plus/xxxx
431 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 432 )/
edb53e2d 433 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 434 )
c5e8d7af 435 )? # all until now is optional -> you can pass the naked ID
8963d9c2 436 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
437 (?!.*?\blist=
438 (?:
439 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
440 WL # WL are handled by the watch later IE
441 )
442 )
c5e8d7af 443 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 444 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 445 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
446 _PLAYER_INFO_RE = (
447 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
448 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
449 )
2c62dc26 450 _formats = {
c2d3cb4c 451 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
452 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
453 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
454 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
455 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
456 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
457 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
458 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 459 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 460 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
461 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
462 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
463 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
464 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
465 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 466 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 467 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
468 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 469
470
471 # 3D videos
c2d3cb4c 472 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
473 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
474 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
475 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 476 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
477 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
478 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 479
96fb5605 480 # Apple HTTP Live Streaming
11f12195 481 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 482 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
483 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
484 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
485 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
486 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 487 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
488 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
489
490 # DASH mp4 video
d23028a8
S
491 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
492 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
493 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
494 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
495 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 496 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
497 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
498 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
499 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
500 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
501 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
502 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 503
f6f1fc92 504 # Dash mp4 audio
d23028a8
S
505 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
506 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
507 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
508 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
509 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
510 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
511 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
512
513 # Dash webm
d23028a8
S
514 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
515 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
516 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
517 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
518 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
519 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
520 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
521 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
523 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
524 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
525 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
527 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
528 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 529 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
530 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
532 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
533 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
534 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
536
537 # Dash webm audio
d23028a8
S
538 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
539 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 540
0857baad 541 # Dash webm audio with opus inside
d23028a8
S
542 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
543 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
544 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 545
ce6b9a2d
PH
546 # RTMP (unnamed)
547 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
548
549 # av01 video only formats sometimes served with "unknown" codecs
550 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
551 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
552 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
553 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 554 }
84da5d84 555 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 556
fd5c4aab
S
557 _GEO_BYPASS = False
558
78caa52a 559 IE_NAME = 'youtube'
2eb88d95
PH
560 _TESTS = [
561 {
2d3d2997 562 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
563 'info_dict': {
564 'id': 'BaW_jenozKc',
565 'ext': 'mp4',
3867038a 566 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
567 'uploader': 'Philipp Hagemeister',
568 'uploader_id': 'phihag',
ec85ded8 569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
570 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
571 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 572 'upload_date': '20121002',
3867038a 573 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 574 'categories': ['Science & Technology'],
3867038a 575 'tags': ['youtube-dl'],
556dbe7f 576 'duration': 10,
dbdaaa23 577 'view_count': int,
3e7c1224
PH
578 'like_count': int,
579 'dislike_count': int,
7c80519c 580 'start_time': 1,
297a564b 581 'end_time': 9,
2eb88d95 582 }
0e853ca4 583 },
0e853ca4 584 {
2d3d2997 585 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
586 'note': 'Test generic use_cipher_signature video (#897)',
587 'info_dict': {
588 'id': 'UxxajLWwzqY',
589 'ext': 'mp4',
590 'upload_date': '20120506',
591 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 592 'alt_title': 'I Love It (feat. Charli XCX)',
5429d6a9 593 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
000b6b5a
S
594 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
595 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
596 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 597 'duration': 180,
4bc3a23e
PH
598 'uploader': 'Icona Pop',
599 'uploader_id': 'IconaPop',
ec85ded8 600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 601 'creator': 'Icona Pop',
936784b2
S
602 'track': 'I Love It (feat. Charli XCX)',
603 'artist': 'Icona Pop',
2eb88d95 604 }
c108eb73
JMF
605 },
606 {
4bc3a23e
PH
607 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
608 'note': 'Test VEVO video with age protection (#956)',
609 'info_dict': {
610 'id': '07FYdnEawAQ',
611 'ext': 'mp4',
612 'upload_date': '20130703',
4fe54c12 613 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 614 'alt_title': 'Tunnel Vision',
4fe54c12 615 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 616 'duration': 419,
4bc3a23e
PH
617 'uploader': 'justintimberlakeVEVO',
618 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 620 'creator': 'Justin Timberlake',
7e72694b 621 'track': 'Tunnel Vision',
936784b2 622 'artist': 'Justin Timberlake',
34952f09 623 'age_limit': 18,
c108eb73
JMF
624 }
625 },
fccd3771 626 {
4bc3a23e
PH
627 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
628 'note': 'Embed-only video (#1746)',
629 'info_dict': {
630 'id': 'yZIXLfi8CZQ',
631 'ext': 'mp4',
632 'upload_date': '20120608',
633 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
634 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
635 'uploader': 'SET India',
94bfcd23 636 'uploader_id': 'setindia',
ec85ded8 637 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 638 'age_limit': 18,
fccd3771
PH
639 }
640 },
11b56058 641 {
2d3d2997 642 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
643 'note': 'Use the first video ID in the URL',
644 'info_dict': {
645 'id': 'BaW_jenozKc',
646 'ext': 'mp4',
3867038a 647 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
648 'uploader': 'Philipp Hagemeister',
649 'uploader_id': 'phihag',
ec85ded8 650 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 651 'upload_date': '20121002',
3867038a 652 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 653 'categories': ['Science & Technology'],
3867038a 654 'tags': ['youtube-dl'],
556dbe7f 655 'duration': 10,
dbdaaa23 656 'view_count': int,
11b56058
PM
657 'like_count': int,
658 'dislike_count': int,
34a7de29
S
659 },
660 'params': {
661 'skip_download': True,
662 },
11b56058 663 },
dd27fd17 664 {
2d3d2997 665 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
666 'note': '256k DASH audio (format 141) via DASH manifest',
667 'info_dict': {
668 'id': 'a9LDPn-MO4I',
669 'ext': 'm4a',
670 'upload_date': '20121002',
671 'uploader_id': '8KVIDEO',
ec85ded8 672 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
673 'description': '',
674 'uploader': '8KVIDEO',
675 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 676 },
4bc3a23e
PH
677 'params': {
678 'youtube_include_dash_manifest': True,
679 'format': '141',
4919603f 680 },
de3c7fe0 681 'skip': 'format 141 not served anymore',
dd27fd17 682 },
3489b7d2
JMF
683 # DASH manifest with encrypted signature
684 {
78caa52a
PH
685 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
686 'info_dict': {
687 'id': 'IB3lcPjvWLA',
688 'ext': 'm4a',
4fe54c12
S
689 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
690 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 691 'duration': 244,
78caa52a
PH
692 'uploader': 'AfrojackVEVO',
693 'uploader_id': 'AfrojackVEVO',
694 'upload_date': '20131011',
3489b7d2 695 },
4bc3a23e 696 'params': {
78caa52a 697 'youtube_include_dash_manifest': True,
de3c7fe0 698 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
699 },
700 },
aaeb86f6
S
701 # JS player signature function name containing $
702 {
703 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
704 'info_dict': {
705 'id': 'nfWlot6h_JM',
706 'ext': 'm4a',
707 'title': 'Taylor Swift - Shake It Off',
5429d6a9 708 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
556dbe7f 709 'duration': 242,
aaeb86f6
S
710 'uploader': 'TaylorSwiftVEVO',
711 'uploader_id': 'TaylorSwiftVEVO',
712 'upload_date': '20140818',
713 },
714 'params': {
715 'youtube_include_dash_manifest': True,
de3c7fe0 716 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
717 },
718 },
aa79ac0c
PH
719 # Controversy video
720 {
721 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
722 'info_dict': {
723 'id': 'T4XJQO3qol8',
724 'ext': 'mp4',
556dbe7f 725 'duration': 219,
aa79ac0c 726 'upload_date': '20100909',
4fe54c12 727 'uploader': 'Amazing Atheist',
aa79ac0c 728 'uploader_id': 'TheAmazingAtheist',
ec85ded8 729 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
730 'title': 'Burning Everyone\'s Koran',
731 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
732 }
c522adb1
JMF
733 },
734 # Normal age-gate video (No vevo, embed allowed)
735 {
2d3d2997 736 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
737 'info_dict': {
738 'id': 'HtVdAasjOgU',
739 'ext': 'mp4',
740 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 741 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 742 'duration': 142,
c522adb1
JMF
743 'uploader': 'The Witcher',
744 'uploader_id': 'WitcherGame',
ec85ded8 745 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 746 'upload_date': '20140605',
34952f09 747 'age_limit': 18,
c522adb1
JMF
748 },
749 },
fccae2b9
S
750 # Age-gate video with encrypted signature
751 {
2d3d2997 752 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
753 'info_dict': {
754 'id': '6kLq3WMV1nU',
4fe54c12 755 'ext': 'mp4',
fccae2b9
S
756 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
757 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 758 'duration': 246,
fccae2b9
S
759 'uploader': 'LloydVEVO',
760 'uploader_id': 'LloydVEVO',
ec85ded8 761 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 762 'upload_date': '20110629',
34952f09 763 'age_limit': 18,
fccae2b9
S
764 },
765 },
067aa17e 766 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 767 # YouTube Red ad is not captured for creator
774e208f
PH
768 {
769 'url': '__2ABJjxzNo',
770 'info_dict': {
771 'id': '__2ABJjxzNo',
772 'ext': 'mp4',
556dbe7f 773 'duration': 266,
774e208f
PH
774 'upload_date': '20100430',
775 'uploader_id': 'deadmau5',
ec85ded8 776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
5429d6a9 777 'creator': 'Dada Life, deadmau5',
774e208f
PH
778 'description': 'md5:12c56784b8032162bb936a5f76d55360',
779 'uploader': 'deadmau5',
780 'title': 'Deadmau5 - Some Chords (HD)',
5429d6a9 781 'alt_title': 'This Machine Kills Some Chords',
774e208f
PH
782 },
783 'expected_warnings': [
784 'DASH manifest missing',
785 ]
e52a40ab 786 },
067aa17e 787 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
788 {
789 'url': 'lqQg6PlCWgI',
790 'info_dict': {
791 'id': 'lqQg6PlCWgI',
792 'ext': 'mp4',
556dbe7f 793 'duration': 6085,
90227264 794 'upload_date': '20150827',
cbe2bd91 795 'uploader_id': 'olympic',
ec85ded8 796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 797 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 798 'uploader': 'Olympic',
cbe2bd91
PH
799 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
800 },
801 'params': {
802 'skip_download': 'requires avconv',
e52a40ab 803 }
cbe2bd91 804 },
6271f1ca
PH
805 # Non-square pixels
806 {
807 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
808 'info_dict': {
809 'id': '_b-2C3KPAM0',
810 'ext': 'mp4',
811 'stretched_ratio': 16 / 9.,
556dbe7f 812 'duration': 85,
6271f1ca
PH
813 'upload_date': '20110310',
814 'uploader_id': 'AllenMeow',
ec85ded8 815 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 816 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 817 'uploader': '孫ᄋᄅ',
6271f1ca
PH
818 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
819 },
06b491eb
S
820 },
821 # url_encoded_fmt_stream_map is empty string
822 {
823 'url': 'qEJwOuvDf7I',
824 'info_dict': {
825 'id': 'qEJwOuvDf7I',
f57b7835 826 'ext': 'webm',
06b491eb
S
827 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
828 'description': '',
829 'upload_date': '20150404',
830 'uploader_id': 'spbelect',
831 'uploader': 'Наблюдатели Петербурга',
832 },
833 'params': {
834 'skip_download': 'requires avconv',
e323cf3f
S
835 },
836 'skip': 'This live event has ended.',
06b491eb 837 },
067aa17e 838 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
839 {
840 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
841 'info_dict': {
842 'id': 'FIl7x6_3R5Y',
eb6793ba 843 'ext': 'webm',
da77d856
S
844 'title': 'md5:7b81415841e02ecd4313668cde88737a',
845 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 846 'duration': 220,
da77d856
S
847 'upload_date': '20150625',
848 'uploader_id': 'dorappi2000',
ec85ded8 849 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 850 'uploader': 'dorappi2000',
eb6793ba 851 'formats': 'mincount:31',
da77d856 852 },
eb6793ba 853 'skip': 'not actual anymore',
2ee8f5d8 854 },
8a1a26ce
YCH
855 # DASH manifest with segment_list
856 {
857 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
858 'md5': '8ce563a1d667b599d21064e982ab9e31',
859 'info_dict': {
860 'id': 'CsmdDsKjzN8',
861 'ext': 'mp4',
17ee98e1 862 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
863 'uploader': 'Airtek',
864 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
865 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
866 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
867 },
868 'params': {
869 'youtube_include_dash_manifest': True,
870 'format': '135', # bestvideo
be49068d
S
871 },
872 'skip': 'This live event has ended.',
2ee8f5d8 873 },
cf7e015f
S
874 {
875 # Multifeed videos (multiple cameras), URL is for Main Camera
876 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
877 'info_dict': {
878 'id': 'jqWvoWXjCVs',
879 'title': 'teamPGP: Rocket League Noob Stream',
880 'description': 'md5:dc7872fb300e143831327f1bae3af010',
881 },
882 'playlist': [{
883 'info_dict': {
884 'id': 'jqWvoWXjCVs',
885 'ext': 'mp4',
886 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
887 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 888 'duration': 7335,
cf7e015f
S
889 'upload_date': '20150721',
890 'uploader': 'Beer Games Beer',
891 'uploader_id': 'beergamesbeer',
ec85ded8 892 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 893 'license': 'Standard YouTube License',
cf7e015f
S
894 },
895 }, {
896 'info_dict': {
897 'id': '6h8e8xoXJzg',
898 'ext': 'mp4',
899 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
900 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 901 'duration': 7337,
cf7e015f
S
902 'upload_date': '20150721',
903 'uploader': 'Beer Games Beer',
904 'uploader_id': 'beergamesbeer',
ec85ded8 905 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 906 'license': 'Standard YouTube License',
cf7e015f
S
907 },
908 }, {
909 'info_dict': {
910 'id': 'PUOgX5z9xZw',
911 'ext': 'mp4',
912 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
913 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 914 'duration': 7337,
cf7e015f
S
915 'upload_date': '20150721',
916 'uploader': 'Beer Games Beer',
917 'uploader_id': 'beergamesbeer',
ec85ded8 918 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 919 'license': 'Standard YouTube License',
cf7e015f
S
920 },
921 }, {
922 'info_dict': {
923 'id': 'teuwxikvS5k',
924 'ext': 'mp4',
925 'title': 'teamPGP: Rocket League Noob Stream (zim)',
926 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 927 'duration': 7334,
cf7e015f
S
928 'upload_date': '20150721',
929 'uploader': 'Beer Games Beer',
930 'uploader_id': 'beergamesbeer',
ec85ded8 931 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 932 'license': 'Standard YouTube License',
cf7e015f
S
933 },
934 }],
935 'params': {
936 'skip_download': True,
937 },
4fe54c12 938 'skip': 'This video is not available.',
cbaed4bb 939 },
f9f49d87 940 {
067aa17e 941 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
942 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
943 'info_dict': {
944 'id': 'gVfLd0zydlo',
945 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
946 },
947 'playlist_count': 2,
be49068d 948 'skip': 'Not multifeed anymore',
f9f49d87 949 },
cbaed4bb 950 {
2d3d2997 951 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 952 'only_matching': True,
0e49d9a6 953 },
6d4fc66b 954 {
2d3d2997 955 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
956 'only_matching': True,
957 },
0e49d9a6 958 {
067aa17e 959 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 960 # Also tests cut-off URL expansion in video description (see
067aa17e
S
961 # https://github.com/ytdl-org/youtube-dl/issues/1892,
962 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
963 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
964 'info_dict': {
965 'id': 'lsguqyKfVQg',
966 'ext': 'mp4',
967 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 968 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 969 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 970 'duration': 133,
0e49d9a6
LL
971 'upload_date': '20151119',
972 'uploader_id': 'IronSoulElf',
ec85ded8 973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 974 'uploader': 'IronSoulElf',
eb6793ba
S
975 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
976 'track': 'Dark Walk - Position Music',
977 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 978 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
979 },
980 'params': {
981 'skip_download': True,
982 },
983 },
61f92af1 984 {
067aa17e 985 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
986 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
987 'only_matching': True,
988 },
313dfc45
LL
989 {
990 # Video with yt:stretch=17:0
991 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
992 'info_dict': {
993 'id': 'Q39EVAstoRM',
994 'ext': 'mp4',
995 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
996 'description': 'md5:ee18a25c350637c8faff806845bddee9',
997 'upload_date': '20151107',
998 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
999 'uploader': 'CH GAMER DROID',
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
be49068d 1004 'skip': 'This video does not exist.',
313dfc45 1005 },
7caf9830
S
1006 {
1007 # Video licensed under Creative Commons
1008 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1009 'info_dict': {
1010 'id': 'M4gD1WSo5mA',
1011 'ext': 'mp4',
1012 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1013 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1014 'duration': 721,
7caf9830
S
1015 'upload_date': '20150127',
1016 'uploader_id': 'BerkmanCenter',
ec85ded8 1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1018 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 },
1021 'params': {
1022 'skip_download': True,
1023 },
1024 },
fd050249
S
1025 {
1026 # Channel-like uploader_url
1027 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1028 'info_dict': {
1029 'id': 'eQcmzGIKrzg',
1030 'ext': 'mp4',
1031 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1032 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1033 'duration': 4060,
fd050249 1034 'upload_date': '20151119',
eb6793ba 1035 'uploader': 'Bernie Sanders',
fd050249 1036 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1038 'license': 'Creative Commons Attribution license (reuse allowed)',
1039 },
1040 'params': {
1041 'skip_download': True,
1042 },
1043 },
040ac686
S
1044 {
1045 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1046 'only_matching': True,
7f29cf54
S
1047 },
1048 {
067aa17e 1049 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1050 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1051 'only_matching': True,
6496ccb4
S
1052 },
1053 {
1054 # Rental video preview
1055 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1056 'info_dict': {
1057 'id': 'uGpuVWrhIzE',
1058 'ext': 'mp4',
1059 'title': 'Piku - Trailer',
1060 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1061 'upload_date': '20150811',
1062 'uploader': 'FlixMatrix',
1063 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1065 'license': 'Standard YouTube License',
1066 },
1067 'params': {
1068 'skip_download': True,
1069 },
eb6793ba 1070 'skip': 'This video is not available.',
022a5d66 1071 },
12afdc2a
S
1072 {
1073 # YouTube Red video with episode data
1074 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1075 'info_dict': {
1076 'id': 'iqKdEhx-dD4',
1077 'ext': 'mp4',
1078 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1079 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1080 'duration': 2085,
12afdc2a
S
1081 'upload_date': '20170118',
1082 'uploader': 'Vsauce',
1083 'uploader_id': 'Vsauce',
1084 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1085 'series': 'Mind Field',
1086 'season_number': 1,
1087 'episode_number': 1,
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
1092 'expected_warnings': [
1093 'Skipping DASH manifest',
1094 ],
1095 },
c7121fa7
S
1096 {
1097 # The following content has been identified by the YouTube community
1098 # as inappropriate or offensive to some audiences.
1099 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1100 'info_dict': {
1101 'id': '6SJNVb0GnPI',
1102 'ext': 'mp4',
1103 'title': 'Race Differences in Intelligence',
1104 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1105 'duration': 965,
1106 'upload_date': '20140124',
1107 'uploader': 'New Century Foundation',
1108 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
1114 },
022a5d66
S
1115 {
1116 # itag 212
1117 'url': '1t24XAntNCY',
1118 'only_matching': True,
fd5c4aab
S
1119 },
1120 {
1121 # geo restricted to JP
1122 'url': 'sJL6WA-aGkQ',
1123 'only_matching': True,
1124 },
d0ba5587
S
1125 {
1126 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1127 'only_matching': True,
1128 },
cd5a74a2
S
1129 {
1130 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1131 'only_matching': True,
1132 },
825cd268
RA
1133 {
1134 # DRM protected
1135 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1136 'only_matching': True,
4fe54c12
S
1137 },
1138 {
1139 # Video with unsupported adaptive stream type formats
1140 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1141 'info_dict': {
1142 'id': 'Z4Vy8R84T1U',
1143 'ext': 'mp4',
1144 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1145 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1146 'duration': 433,
1147 'upload_date': '20130923',
1148 'uploader': 'Amelia Putri Harwita',
1149 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1150 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1151 'formats': 'maxcount:10',
1152 },
1153 'params': {
1154 'skip_download': True,
1155 'youtube_include_dash_manifest': False,
1156 },
5429d6a9 1157 'skip': 'not actual anymore',
5caabd3c 1158 },
1159 {
822b9d9c 1160 # Youtube Music Auto-generated description
5caabd3c 1161 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1162 'info_dict': {
1163 'id': 'MgNrAu2pzNs',
1164 'ext': 'mp4',
1165 'title': 'Voyeur Girl',
1166 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1167 'upload_date': '20190312',
5429d6a9
S
1168 'uploader': 'Stephen - Topic',
1169 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1170 'artist': 'Stephen',
1171 'track': 'Voyeur Girl',
1172 'album': 'it\'s too much love to know my dear',
1173 'release_date': '20190313',
1174 'release_year': 2019,
1175 },
1176 'params': {
1177 'skip_download': True,
1178 },
1179 },
1180 {
822b9d9c 1181 # Youtube Music Auto-generated description
5caabd3c 1182 # Retrieve 'artist' field from 'Artist:' in video description
1183 # when it is present on youtube music video
5caabd3c 1184 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1185 'info_dict': {
1186 'id': 'k0jLE7tTwjY',
1187 'ext': 'mp4',
1188 'title': 'Latch Feat. Sam Smith',
1189 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1190 'upload_date': '20150110',
1191 'uploader': 'Various Artists - Topic',
1192 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1193 'artist': 'Disclosure',
1194 'track': 'Latch Feat. Sam Smith',
1195 'album': 'Latch Featuring Sam Smith',
1196 'release_date': '20121008',
1197 'release_year': 2012,
1198 },
1199 'params': {
1200 'skip_download': True,
1201 },
1202 },
1203 {
822b9d9c 1204 # Youtube Music Auto-generated description
5caabd3c 1205 # handle multiple artists on youtube music video
1206 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1207 'info_dict': {
1208 'id': '74qn0eJSjpA',
1209 'ext': 'mp4',
1210 'title': 'Eastside',
1211 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1212 'upload_date': '20180710',
1213 'uploader': 'Benny Blanco - Topic',
1214 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1215 'artist': 'benny blanco, Halsey, Khalid',
1216 'track': 'Eastside',
1217 'album': 'Eastside',
1218 'release_date': '20180713',
1219 'release_year': 2018,
1220 },
1221 'params': {
1222 'skip_download': True,
1223 },
1224 },
1225 {
822b9d9c 1226 # Youtube Music Auto-generated description
5caabd3c 1227 # handle youtube music video with release_year and no release_date
1228 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1229 'info_dict': {
1230 'id': '-hcAI0g-f5M',
1231 'ext': 'mp4',
1232 'title': 'Put It On Me',
5429d6a9 1233 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
5caabd3c 1234 'upload_date': '20180426',
1235 'uploader': 'Matt Maeson - Topic',
1236 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1237 'artist': 'Matt Maeson',
1238 'track': 'Put It On Me',
1239 'album': 'The Hearse',
1240 'release_date': None,
1241 'release_year': 2018,
1242 },
1243 'params': {
1244 'skip_download': True,
1245 },
1246 },
66b48727
RA
1247 {
1248 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1249 'only_matching': True,
1250 },
011e75e6
S
1251 {
1252 # invalid -> valid video id redirection
1253 'url': 'DJztXj2GPfl',
1254 'info_dict': {
1255 'id': 'DJztXj2GPfk',
1256 'ext': 'mp4',
1257 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1258 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1259 'upload_date': '20090125',
1260 'uploader': 'Prochorowka',
1261 'uploader_id': 'Prochorowka',
1262 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1263 'artist': 'Panjabi MC',
1264 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1265 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1266 },
1267 'params': {
1268 'skip_download': True,
1269 },
ea74e00b
DP
1270 },
1271 {
1272 # empty description results in an empty string
1273 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1274 'info_dict': {
1275 'id': 'x41yOUIvK2k',
1276 'ext': 'mp4',
1277 'title': 'IMG 3456',
1278 'description': '',
1279 'upload_date': '20170613',
1280 'uploader_id': 'ElevageOrVert',
1281 'uploader': 'ElevageOrVert',
1282 },
1283 'params': {
1284 'skip_download': True,
1285 },
1286 },
2eb88d95
PH
1287 ]
1288
e0df6211
PH
1289 def __init__(self, *args, **kwargs):
1290 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1291 self._player_cache = {}
e0df6211 1292
c5e8d7af
PH
1293 def report_video_info_webpage_download(self, video_id):
1294 """Report attempt to download video info webpage."""
69ea8ca4 1295 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1296
c5e8d7af
PH
1297 def report_information_extraction(self, video_id):
1298 """Report attempt to extract video information."""
69ea8ca4 1299 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1300
1301 def report_unavailable_format(self, video_id, format):
1302 """Report extracted video URL."""
69ea8ca4 1303 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1304
1305 def report_rtmp_download(self):
1306 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1307 self.to_screen('RTMP download detected')
c5e8d7af 1308
60064c53
PH
1309 def _signature_cache_id(self, example_sig):
1310 """ Return a string representation of a signature """
78caa52a 1311 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1312
e40c758c
S
1313 @classmethod
1314 def _extract_player_info(cls, player_url):
1315 for player_re in cls._PLAYER_INFO_RE:
1316 id_m = re.search(player_re, player_url)
1317 if id_m:
1318 break
1319 else:
c081b35c 1320 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1321 return id_m.group('ext'), id_m.group('id')
1322
1323 def _extract_signature_function(self, video_id, player_url, example_sig):
1324 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1325
c4417ddb 1326 # Read from filesystem cache
60064c53
PH
1327 func_id = '%s_%s_%s' % (
1328 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1329 assert os.path.basename(func_id) == func_id
a0e07d31 1330
69ea8ca4 1331 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1332 if cache_spec is not None:
78caa52a 1333 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1334
6d1a55a5
PH
1335 download_note = (
1336 'Downloading player %s' % player_url
1337 if self._downloader.params.get('verbose') else
1338 'Downloading %s player %s' % (player_type, player_id)
1339 )
e0df6211
PH
1340 if player_type == 'js':
1341 code = self._download_webpage(
1342 player_url, video_id,
6d1a55a5 1343 note=download_note,
69ea8ca4 1344 errnote='Download of %s failed' % player_url)
83799698 1345 res = self._parse_sig_js(code)
c4417ddb 1346 elif player_type == 'swf':
e0df6211
PH
1347 urlh = self._request_webpage(
1348 player_url, video_id,
6d1a55a5 1349 note=download_note,
69ea8ca4 1350 errnote='Download of %s failed' % player_url)
e0df6211 1351 code = urlh.read()
83799698 1352 res = self._parse_sig_swf(code)
e0df6211
PH
1353 else:
1354 assert False, 'Invalid player type %r' % player_type
1355
785521bf
PH
1356 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1357 cache_res = res(test_string)
1358 cache_spec = [ord(c) for c in cache_res]
83799698 1359
69ea8ca4 1360 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1361 return res
1362
60064c53 1363 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1364 def gen_sig_code(idxs):
1365 def _genslice(start, end, step):
78caa52a 1366 starts = '' if start == 0 else str(start)
8bcc8756 1367 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1368 steps = '' if step == 1 else (':%d' % step)
78caa52a 1369 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1370
1371 step = None
7af808a5
PH
1372 # Quelch pyflakes warnings - start will be set when step is set
1373 start = '(Never used)'
edf3e38e
PH
1374 for i, prev in zip(idxs[1:], idxs[:-1]):
1375 if step is not None:
1376 if i - prev == step:
1377 continue
1378 yield _genslice(start, prev, step)
1379 step = None
1380 continue
1381 if i - prev in [-1, 1]:
1382 step = i - prev
1383 start = prev
1384 continue
1385 else:
78caa52a 1386 yield 's[%d]' % prev
edf3e38e 1387 if step is None:
78caa52a 1388 yield 's[%d]' % i
edf3e38e
PH
1389 else:
1390 yield _genslice(start, i, step)
1391
78caa52a 1392 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1393 cache_res = func(test_string)
edf3e38e 1394 cache_spec = [ord(c) for c in cache_res]
78caa52a 1395 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1396 signature_id_tuple = '(%s)' % (
1397 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1398 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1399 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1400 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1401
e0df6211
PH
1402 def _parse_sig_js(self, jscode):
1403 funcname = self._search_regex(
abefc03f
S
1404 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1405 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1406 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1407 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1408 # Obsolete patterns
1409 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1410 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1411 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1414 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1415 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1416 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1417 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1418
1419 jsi = JSInterpreter(jscode)
1420 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1421 return lambda s: initial_function([s])
1422
1423 def _parse_sig_swf(self, file_contents):
54256267 1424 swfi = SWFInterpreter(file_contents)
78caa52a 1425 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1426 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1427 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1428 return lambda s: initial_function([s])
1429
83799698 1430 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1431 """Turn the encrypted s field into a working signature"""
6b37f0be 1432
c8bf86d5 1433 if player_url is None:
69ea8ca4 1434 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1435
69ea8ca4 1436 if player_url.startswith('//'):
78caa52a 1437 player_url = 'https:' + player_url
3c90cc8b
S
1438 elif not re.match(r'https?://', player_url):
1439 player_url = compat_urlparse.urljoin(
1440 'https://www.youtube.com', player_url)
c8bf86d5 1441 try:
62af3a0e 1442 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1443 if player_id not in self._player_cache:
1444 func = self._extract_signature_function(
60064c53 1445 video_id, player_url, s
c8bf86d5
PH
1446 )
1447 self._player_cache[player_id] = func
1448 func = self._player_cache[player_id]
1449 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1450 self._print_sig_code(func, s)
c8bf86d5
PH
1451 return func(s)
1452 except Exception as e:
1453 tb = traceback.format_exc()
1454 raise ExtractorError(
78caa52a 1455 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1456
f96f5dda 1457 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1458 try:
60e47a26 1459 subs_doc = self._download_xml(
38c2e5b8 1460 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1461 video_id, note=False)
1462 except ExtractorError as err:
9b9c5355 1463 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1464 return {}
de7f3446
JMF
1465
1466 sub_lang_list = {}
60e47a26
JMF
1467 for track in subs_doc.findall('track'):
1468 lang = track.attrib['lang_code']
7e660ac1
LD
1469 if lang in sub_lang_list:
1470 continue
360e1ca5 1471 sub_formats = []
23d17e4b 1472 for ext in self._SUBTITLE_FORMATS:
15707c7e 1473 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1474 'lang': lang,
1475 'v': video_id,
1476 'fmt': ext,
1477 'name': track.attrib['name'].encode('utf-8'),
1478 })
1479 sub_formats.append({
1480 'url': 'https://www.youtube.com/api/timedtext?' + params,
1481 'ext': ext,
1482 })
1483 sub_lang_list[lang] = sub_formats
f96f5dda 1484 if has_live_chat_replay:
321bf820 1485 sub_lang_list['live_chat'] = [
1486 {
1487 'video_id': video_id,
1488 'ext': 'json',
1489 'protocol': 'youtube_live_chat_replay',
1490 },
1491 ]
de7f3446 1492 if not sub_lang_list:
69ea8ca4 1493 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1494 return {}
1495 return sub_lang_list
1496
a72778d3
S
1497 def _get_ytplayer_config(self, video_id, webpage):
1498 patterns = (
526b3b07
S
1499 # User data may contain arbitrary character sequences that may affect
1500 # JSON extraction with regex, e.g. when '};' is contained the second
1501 # regex won't capture the whole JSON. Yet working around by trying more
1502 # concrete regex first keeping in mind proper quoted string handling
1503 # to be implemented in future that will replace this workaround (see
067aa17e
S
1504 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1505 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1506 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1507 r';ytplayer\.config\s*=\s*({.+?});',
1508 )
1509 config = self._search_regex(
1510 patterns, webpage, 'ytplayer.config', default=None)
1511 if config:
1512 return self._parse_json(
1513 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1514
321bf820 1515 def _get_yt_initial_data(self, video_id, webpage):
1516 config = self._search_regex(
15eae44d 1517 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1518 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
321bf820 1519 webpage, 'ytInitialData', default=None)
1520 if config:
1521 return self._parse_json(
1522 uppercase_escape(config), video_id, fatal=False)
1523
360e1ca5 1524 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1525 """We need the webpage for getting the captions url, pass it as an
1526 argument to speed up the process."""
69ea8ca4 1527 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1528 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1529 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1530 if not player_config:
de7f3446
JMF
1531 self._downloader.report_warning(err_msg)
1532 return {}
de7f3446 1533 try:
0792d563 1534 args = player_config['args']
b78b292f
S
1535 caption_url = args.get('ttsurl')
1536 if caption_url:
1537 timestamp = args['timestamp']
1538 # We get the available subtitles
15707c7e 1539 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1540 'type': 'list',
1541 'tlangs': 1,
1542 'asrs': 1,
1543 })
1544 list_url = caption_url + '&' + list_params
1545 caption_list = self._download_xml(list_url, video_id)
1546 original_lang_node = caption_list.find('track')
1547 if original_lang_node is None:
1548 self._downloader.report_warning('Video doesn\'t have automatic captions')
1549 return {}
1550 original_lang = original_lang_node.attrib['lang_code']
1551 caption_kind = original_lang_node.attrib.get('kind', '')
1552
1553 sub_lang_list = {}
1554 for lang_node in caption_list.findall('target'):
1555 sub_lang = lang_node.attrib['lang_code']
1556 sub_formats = []
1557 for ext in self._SUBTITLE_FORMATS:
15707c7e 1558 params = compat_urllib_parse_urlencode({
b78b292f
S
1559 'lang': original_lang,
1560 'tlang': sub_lang,
1561 'fmt': ext,
1562 'ts': timestamp,
1563 'kind': caption_kind,
1564 })
1565 sub_formats.append({
1566 'url': caption_url + '&' + params,
1567 'ext': ext,
1568 })
1569 sub_lang_list[sub_lang] = sub_formats
1570 return sub_lang_list
1571
ddbb4c5c
S
1572 def make_captions(sub_url, sub_langs):
1573 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1574 caption_qs = compat_parse_qs(parsed_sub_url.query)
1575 captions = {}
1576 for sub_lang in sub_langs:
1577 sub_formats = []
1578 for ext in self._SUBTITLE_FORMATS:
1579 caption_qs.update({
1580 'tlang': [sub_lang],
1581 'fmt': [ext],
1582 })
1583 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1584 query=compat_urllib_parse_urlencode(caption_qs, True)))
1585 sub_formats.append({
1586 'url': sub_url,
1587 'ext': ext,
1588 })
1589 captions[sub_lang] = sub_formats
1590 return captions
1591
1592 # New captions format as of 22.06.2017
1593 player_response = args.get('player_response')
1594 if player_response and isinstance(player_response, compat_str):
1595 player_response = self._parse_json(
1596 player_response, video_id, fatal=False)
1597 if player_response:
1598 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
7e1cf1a4 1599 caption_tracks = renderer['captionTracks']
1600 for caption_track in caption_tracks:
1601 if 'kind' not in caption_track:
1602 # not an automatic transcription
1603 continue
1604 base_url = caption_track['baseUrl']
1605 sub_lang_list = []
1606 for lang in renderer['translationLanguages']:
1607 lang_code = lang.get('languageCode')
1608 if lang_code:
1609 sub_lang_list.append(lang_code)
1610 return make_captions(base_url, sub_lang_list)
bc842c27 1611
7e1cf1a4 1612 self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1613 return {}
b78b292f
S
1614 # Some videos don't provide ttsurl but rather caption_tracks and
1615 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1616 # Does not used anymore as of 22.06.2017
b78b292f
S
1617 caption_tracks = args['caption_tracks']
1618 caption_translation_languages = args['caption_translation_languages']
1619 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1620 sub_lang_list = []
b78b292f
S
1621 for lang in caption_translation_languages.split(','):
1622 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1623 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1624 if sub_lang:
1625 sub_lang_list.append(sub_lang)
1626 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1627 # An extractor error can be raise by the download process if there are
1628 # no automatic captions but there are subtitles
ddbb4c5c 1629 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1630 self._downloader.report_warning(err_msg)
1631 return {}
1632
21c340b8
S
1633 def _mark_watched(self, video_id, video_info, player_response):
1634 playback_url = url_or_none(try_get(
1635 player_response,
1636 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1637 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1638 if not playback_url:
1639 return
1640 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1641 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1642
1643 # cpn generation algorithm is reverse engineered from base.js.
1644 # In fact it works even with dummy cpn.
1645 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1646 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1647
1648 qs.update({
1649 'ver': ['2'],
1650 'cpn': [cpn],
1651 })
1652 playback_url = compat_urlparse.urlunparse(
15707c7e 1653 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1654
1655 self._download_webpage(
1656 playback_url, video_id, 'Marking watched',
1657 'Unable to mark watched', fatal=False)
1658
66c9fa36
S
1659 @staticmethod
1660 def _extract_urls(webpage):
1661 # Embedded YouTube player
1662 entries = [
1663 unescapeHTML(mobj.group('url'))
1664 for mobj in re.finditer(r'''(?x)
1665 (?:
1666 <iframe[^>]+?src=|
1667 data-video-url=|
1668 <embed[^>]+?src=|
1669 embedSWF\(?:\s*|
1670 <object[^>]+data=|
1671 new\s+SWFObject\(
1672 )
1673 (["\'])
1674 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1675 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1676 \1''', webpage)]
1677
1678 # lazyYT YouTube embed
1679 entries.extend(list(map(
1680 unescapeHTML,
1681 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1682
1683 # Wordpress "YouTube Video Importer" plugin
1684 matches = re.findall(r'''(?x)<div[^>]+
1685 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1686 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1687 entries.extend(m[-1] for m in matches)
1688
1689 return entries
1690
1691 @staticmethod
1692 def _extract_url(webpage):
1693 urls = YoutubeIE._extract_urls(webpage)
1694 return urls[0] if urls else None
1695
97665381
PH
1696 @classmethod
1697 def extract_id(cls, url):
1698 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1699 if mobj is None:
69ea8ca4 1700 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1701 video_id = mobj.group(2)
1702 return video_id
1703
84213ea8
S
1704 def _extract_chapters_from_json(self, webpage, video_id, duration):
1705 if not webpage:
1706 return
edd83104 1707 initial_data = self._parse_json(
84213ea8 1708 self._search_regex(
edd83104 1709 r'window\["ytInitialData"\] = (.+);\n', webpage,
84213ea8
S
1710 'player args', default='{}'),
1711 video_id, fatal=False)
edd83104 1712 if not initial_data or not isinstance(initial_data, dict):
84213ea8
S
1713 return
1714 chapters_list = try_get(
edd83104 1715 initial_data,
84213ea8
S
1716 lambda x: x['playerOverlays']
1717 ['playerOverlayRenderer']
1718 ['decoratedPlayerBarRenderer']
1719 ['decoratedPlayerBarRenderer']
1720 ['playerBar']
1721 ['chapteredPlayerBarRenderer']
1722 ['chapters'],
1723 list)
1724 if not chapters_list:
1725 return
1726
1727 def chapter_time(chapter):
1728 return float_or_none(
1729 try_get(
1730 chapter,
1731 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1732 int),
1733 scale=1000)
1734 chapters = []
1735 for next_num, chapter in enumerate(chapters_list, start=1):
1736 start_time = chapter_time(chapter)
1737 if start_time is None:
1738 continue
1739 end_time = (chapter_time(chapters_list[next_num])
1740 if next_num < len(chapters_list) else duration)
1741 if end_time is None:
1742 continue
1743 title = try_get(
1744 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1745 compat_str)
1746 chapters.append({
1747 'start_time': start_time,
1748 'end_time': end_time,
1749 'title': title,
1750 })
1751 return chapters
1752
9cafc3fd 1753 @staticmethod
84213ea8 1754 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1755 if not description:
1756 return None
1757 chapter_lines = re.findall(
1758 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1759 description)
1760 if not chapter_lines:
1761 return None
1762 chapters = []
1763 for next_num, (chapter_line, time_point) in enumerate(
1764 chapter_lines, start=1):
1765 start_time = parse_duration(time_point)
1766 if start_time is None:
1767 continue
39d4c1be
S
1768 if start_time > duration:
1769 break
9cafc3fd
S
1770 end_time = (duration if next_num == len(chapter_lines)
1771 else parse_duration(chapter_lines[next_num][1]))
1772 if end_time is None:
1773 continue
39d4c1be
S
1774 if end_time > duration:
1775 end_time = duration
1776 if start_time > end_time:
1777 break
9cafc3fd
S
1778 chapter_title = re.sub(
1779 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1780 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1781 chapters.append({
1782 'start_time': start_time,
1783 'end_time': end_time,
1784 'title': chapter_title,
1785 })
1786 return chapters
1787
84213ea8
S
1788 def _extract_chapters(self, webpage, description, video_id, duration):
1789 return (self._extract_chapters_from_json(webpage, video_id, duration)
1790 or self._extract_chapters_from_description(description, duration))
1791
c5e8d7af 1792 def _real_extract(self, url):
cf7e015f
S
1793 url, smuggled_data = unsmuggle_url(url, {})
1794
7e8c0af0 1795 proto = (
78caa52a
PH
1796 'http' if self._downloader.params.get('prefer_insecure', False)
1797 else 'https')
7e8c0af0 1798
7c80519c 1799 start_time = None
297a564b 1800 end_time = None
7c80519c
JMF
1801 parsed_url = compat_urllib_parse_urlparse(url)
1802 for component in [parsed_url.fragment, parsed_url.query]:
1803 query = compat_parse_qs(component)
297a564b 1804 if start_time is None and 't' in query:
7c80519c 1805 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1806 if start_time is None and 'start' in query:
1807 start_time = parse_duration(query['start'][0])
297a564b
JMF
1808 if end_time is None and 'end' in query:
1809 end_time = parse_duration(query['end'][0])
7c80519c 1810
c5e8d7af
PH
1811 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1812 mobj = re.search(self._NEXT_URL_RE, url)
1813 if mobj:
7fd002c0 1814 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1815 video_id = self.extract_id(url)
c5e8d7af
PH
1816
1817 # Get video webpage
aa79ac0c 1818 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1819 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1820
1821 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1822 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1823
1824 # Attempt to extract SWF player URL
e0df6211 1825 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1826 if mobj is not None:
1827 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1828 else:
1829 player_url = None
1830
d8d24a92
S
1831 dash_mpds = []
1832
1833 def add_dash_mpd(video_info):
1834 dash_mpd = video_info.get('dashmpd')
1835 if dash_mpd and dash_mpd[0] not in dash_mpds:
1836 dash_mpds.append(dash_mpd[0])
1837
561b456e
S
1838 def add_dash_mpd_pr(pl_response):
1839 dash_mpd = url_or_none(try_get(
1840 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1841 compat_str))
1842 if dash_mpd and dash_mpd not in dash_mpds:
1843 dash_mpds.append(dash_mpd)
1844
c7121fa7
S
1845 is_live = None
1846 view_count = None
1847
1848 def extract_view_count(v_info):
1849 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1850
c2d125d9
S
1851 def extract_player_response(player_response, video_id):
1852 pl_response = str_or_none(player_response)
1853 if not pl_response:
1854 return
1855 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1856 if isinstance(pl_response, dict):
1857 add_dash_mpd_pr(pl_response)
1858 return pl_response
1859
dbdaaa23
S
1860 player_response = {}
1861
c5e8d7af 1862 # Get video info
43ebf77d 1863 video_info = {}
6449cd80 1864 embed_webpage = None
39e7107d
U
1865 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1866 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1867 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1868 age_gate = True
1869 # We simulate the access to the video from www.youtube.com/v/{video_id}
1870 # this can be viewed without login into Youtube
beb95e77
CL
1871 url = proto + '://www.youtube.com/embed/%s' % video_id
1872 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
9d9314cb
U
1873 # check if video is only playable on youtube - if so it requires auth (cookies)
1874 if re.search(r'player-unavailable">', embed_webpage) is not None:
c73baf23
U
1875 '''
1876 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1877 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1878 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1879 '''
1880 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1881 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1882 age_gate = False
1883 # Try looking directly into the video webpage
1884 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1885 if ytplayer_config:
1886 args = ytplayer_config['args']
1887 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1888 # Convert to the same format returned by compat_parse_qs
1889 video_info = dict((k, [v]) for k, v in args.items())
1890 add_dash_mpd(video_info)
1891 # Rental video is not rented but preview is available (e.g.
1892 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1893 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1894 if not video_info and args.get('ypc_vid'):
1895 return self.url_result(
1896 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1897 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1898 is_live = True
1899 if not player_response:
1900 player_response = extract_player_response(args.get('player_response'), video_id)
1901 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1902 add_dash_mpd_pr(player_response)
9d9314cb
U
1903 else:
1904 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1905 else:
1906 data = compat_urllib_parse_urlencode({
1907 'video_id': video_id,
1908 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1909 'sts': self._search_regex(
1910 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1911 })
1912 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1913 try:
1914 video_info_webpage = self._download_webpage(
1915 video_info_url, video_id,
1916 note='Refetching age-gated info webpage',
1917 errnote='unable to download video info webpage')
1918 except ExtractorError:
1919 video_info_webpage = None
1920 if video_info_webpage:
1921 video_info = compat_parse_qs(video_info_webpage)
1922 pl_response = video_info.get('player_response', [None])[0]
1923 player_response = extract_player_response(pl_response, video_id)
1924 add_dash_mpd(video_info)
1925 view_count = extract_view_count(video_info)
c108eb73
JMF
1926 else:
1927 age_gate = False
d8d24a92 1928 # Try looking directly into the video webpage
a72778d3
S
1929 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1930 if ytplayer_config:
4e62ebe2 1931 args = ytplayer_config['args']
4c76aa06 1932 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1933 # Convert to the same format returned by compat_parse_qs
1934 video_info = dict((k, [v]) for k, v in args.items())
1935 add_dash_mpd(video_info)
6496ccb4
S
1936 # Rental video is not rented but preview is available (e.g.
1937 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1938 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1939 if not video_info and args.get('ypc_vid'):
1940 return self.url_result(
1941 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1942 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1943 is_live = True
dbdaaa23 1944 if not player_response:
c2d125d9 1945 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1946 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1947 add_dash_mpd_pr(player_response)
bbb7c3f7
YCH
1948
1949 def extract_unavailable_message():
0add33ab
S
1950 messages = []
1951 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1952 msg = self._html_search_regex(
1953 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1954 video_webpage, 'unavailable %s' % kind, default=None)
1955 if msg:
1956 messages.append(msg)
1957 if messages:
1958 return '\n'.join(messages)
bbb7c3f7 1959
f93abcf1 1960 if not video_info and not player_response:
15be3eb5
RA
1961 unavailable_message = extract_unavailable_message()
1962 if not unavailable_message:
1963 unavailable_message = 'Unable to extract video data'
1964 raise ExtractorError(
1965 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1966
f93abcf1
S
1967 if not isinstance(video_info, dict):
1968 video_info = {}
1969
dbdaaa23
S
1970 video_details = try_get(
1971 player_response, lambda x: x['videoDetails'], dict) or {}
1972
37357d21
S
1973 microformat = try_get(
1974 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1975
8dbf751a
RA
1976 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1977 if not video_title:
cf7e015f
S
1978 self._downloader.report_warning('Unable to extract video title')
1979 video_title = '_'
1980
9cafc3fd 1981 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1982 if video_description:
fa4bc6e7
RA
1983
1984 def replace_url(m):
1985 redir_url = compat_urlparse.urljoin(url, m.group(1))
1986 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1987 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1988 qs = compat_parse_qs(parsed_redir_url.query)
1989 q = qs.get('q')
1990 if q and q[0]:
1991 return q[0]
1992 return redir_url
1993
9cafc3fd 1994 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1995 <a\s+
25cb7a0e 1996 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1997 (?:title|href)="([^"]+)"\s+
25cb7a0e 1998 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1999 class="[^"]*"[^>]*>
23f13e97 2000 [^<]+\.{3}\s*
cf7e015f 2001 </a>
fa4bc6e7 2002 ''', replace_url, video_description)
cf7e015f
S
2003 video_description = clean_html(video_description)
2004 else:
ea74e00b
DP
2005 video_description = video_details.get('shortDescription')
2006 if video_description is None:
2007 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 2008
8fe10494 2009 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 2010 if not self._downloader.params.get('noplaylist'):
8fe10494
S
2011 multifeed_metadata_list = try_get(
2012 player_response,
2013 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2014 compat_str) or try_get(
2015 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
2016 if multifeed_metadata_list:
2017 entries = []
2018 feed_ids = []
2019 for feed in multifeed_metadata_list.split(','):
2020 # Unquote should take place before split on comma (,) since textual
2021 # fields may contain comma as well (see
067aa17e 2022 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 2023 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2024
2025 def feed_entry(name):
2026 return try_get(feed_data, lambda x: x[name][0], compat_str)
2027
2028 feed_id = feed_entry('id')
2029 if not feed_id:
2030 continue
2031 feed_title = feed_entry('title')
2032 title = video_title
2033 if feed_title:
2034 title += ' (%s)' % feed_title
8fe10494
S
2035 entries.append({
2036 '_type': 'url_transparent',
2037 'ie_key': 'Youtube',
2038 'url': smuggle_url(
2039 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
2040 {'force_singlefeed': True}),
6b09401b 2041 'title': title,
8fe10494 2042 })
6b09401b 2043 feed_ids.append(feed_id)
8fe10494
S
2044 self.to_screen(
2045 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2046 % (', '.join(feed_ids), video_id))
2047 return self.playlist_result(entries, video_id, video_title, video_description)
2048 else:
2049 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2050
c7121fa7 2051 if view_count is None:
1c9c8de2 2052 view_count = extract_view_count(video_info)
dbdaaa23
S
2053 if view_count is None and video_details:
2054 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
2055 if view_count is None and microformat:
2056 view_count = int_or_none(microformat.get('viewCount'))
1d699755 2057
27019dbb 2058 if is_live is None:
898238e9 2059 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 2060
321bf820 2061 has_live_chat_replay = False
f0f76a33 2062 if not is_live:
321bf820 2063 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2064 try:
2065 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2066 has_live_chat_replay = True
f0f76a33 2067 except (KeyError, IndexError, TypeError):
321bf820 2068 pass
2069
c5e8d7af
PH
2070 # Check for "rental" videos
2071 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 2072 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 2073
c63ca0ee
S
2074 def _extract_filesize(media_url):
2075 return int_or_none(self._search_regex(
2076 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2077
bf1317d2
S
2078 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2079 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2080
c5e8d7af
PH
2081 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2082 self.report_rtmp_download()
dd27fd17
PH
2083 formats = [{
2084 'format_id': '_rtmp',
2085 'protocol': 'rtmp',
2086 'url': video_info['conn'][0],
2087 'player_url': player_url,
2088 }]
bf1317d2 2089 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 2090 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 2091 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 2092 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 2093 formats = []
3318832e 2094 formats_spec = {}
82156fdb 2095 fmt_list = video_info.get('fmt_list', [''])[0]
2096 if fmt_list:
2097 for fmt in fmt_list.split(','):
2098 spec = fmt.split('/')
3318832e 2099 if len(spec) > 1:
2100 width_height = spec[1].split('x')
2101 if len(width_height) == 2:
2102 formats_spec[spec[0]] = {
2103 'resolution': spec[1],
2104 'width': int_or_none(width_height[0]),
2105 'height': int_or_none(width_height[1]),
2106 }
bf1317d2
S
2107 for fmt in streaming_formats:
2108 itag = str_or_none(fmt.get('itag'))
2109 if not itag:
201e9eaa 2110 continue
bf1317d2
S
2111 quality = fmt.get('quality')
2112 quality_label = fmt.get('qualityLabel') or quality
2113 formats_spec[itag] = {
2114 'asr': int_or_none(fmt.get('audioSampleRate')),
2115 'filesize': int_or_none(fmt.get('contentLength')),
2116 'format_note': quality_label,
2117 'fps': int_or_none(fmt.get('fps')),
2118 'height': int_or_none(fmt.get('height')),
bf1317d2
S
2119 # bitrate for itag 43 is always 2147483647
2120 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2121 'width': int_or_none(fmt.get('width')),
2122 }
2123
2124 for fmt in streaming_formats:
00eb865b 2125 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2126 continue
2127 url = url_or_none(fmt.get('url'))
2128
2129 if not url:
fa3db383 2130 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2131 if not cipher:
2132 continue
2133 url_data = compat_parse_qs(cipher)
2134 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2135 if not url:
2136 continue
2137 else:
2138 cipher = None
2139 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2140
2f483bc1
S
2141 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2142 # Unsupported FORMAT_STREAM_TYPE_OTF
2143 if stream_type == 3:
2144 continue
6449cd80 2145
bf1317d2
S
2146 format_id = fmt.get('itag') or url_data['itag'][0]
2147 if not format_id:
2148 continue
2149 format_id = compat_str(format_id)
a49eccdf 2150
bf1317d2
S
2151 if cipher:
2152 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2153 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2154 jsplayer_url_json = self._search_regex(
2155 ASSETS_RE,
2156 embed_webpage if age_gate else video_webpage,
2157 'JS player URL (1)', default=None)
2158 if not jsplayer_url_json and not age_gate:
2159 # We need the embed website after all
2160 if embed_webpage is None:
2161 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2162 embed_webpage = self._download_webpage(
2163 embed_url, video_id, 'Downloading embed webpage')
2164 jsplayer_url_json = self._search_regex(
2165 ASSETS_RE, embed_webpage, 'JS player URL')
2166
2167 player_url = json.loads(jsplayer_url_json)
cf010131 2168 if player_url is None:
bf1317d2
S
2169 player_url_json = self._search_regex(
2170 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2171 video_webpage, 'age gate player URL')
2172 player_url = json.loads(player_url_json)
2173
2174 if 'sig' in url_data:
2175 url += '&signature=' + url_data['sig'][0]
2176 elif 's' in url_data:
2177 encrypted_sig = url_data['s'][0]
2178
2179 if self._downloader.params.get('verbose'):
2180 if player_url is None:
bf1317d2 2181 player_desc = 'unknown'
cf010131 2182 else:
e40c758c
S
2183 player_type, player_version = self._extract_player_info(player_url)
2184 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2185 parts_sizes = self._signature_cache_id(encrypted_sig)
2186 self.to_screen('{%s} signature length %s, %s' %
2187 (format_id, parts_sizes, player_desc))
2188
2189 signature = self._decrypt_signature(
2190 encrypted_sig, video_id, player_url, age_gate)
2191 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2192 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2193 if 'ratebypass' not in url:
2194 url += '&ratebypass=yes'
c9afb51c 2195
94278f72
YCH
2196 dct = {
2197 'format_id': format_id,
2198 'url': url,
2199 'player_url': player_url,
2200 }
2201 if format_id in self._formats:
2202 dct.update(self._formats[format_id])
3318832e 2203 if format_id in formats_spec:
2204 dct.update(formats_spec[format_id])
94278f72 2205
aabc2be6 2206 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2207 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2208 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2209 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2210 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2211
bf1317d2
S
2212 if width is None:
2213 width = int_or_none(fmt.get('width'))
2214 if height is None:
2215 height = int_or_none(fmt.get('height'))
2216
c63ca0ee
S
2217 filesize = int_or_none(url_data.get(
2218 'clen', [None])[0]) or _extract_filesize(url)
2219
bf1317d2
S
2220 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2221 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2222
4878759f
S
2223 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2224 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2225 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2226
94278f72 2227 more_fields = {
c63ca0ee 2228 'filesize': filesize,
bf1317d2 2229 'tbr': tbr,
c9afb51c
AH
2230 'width': width,
2231 'height': height,
bf1317d2
S
2232 'fps': fps,
2233 'format_note': quality_label or quality,
c9afb51c 2234 }
94278f72
YCH
2235 for key, value in more_fields.items():
2236 if value:
2237 dct[key] = value
bf1317d2 2238 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2239 if type_:
2240 type_split = type_.split(';')
2241 kind_ext = type_split[0].split('/')
2242 if len(kind_ext) == 2:
94278f72
YCH
2243 kind, _ = kind_ext
2244 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2245 if kind in ('audio', 'video'):
2246 codecs = None
2247 for mobj in re.finditer(
2248 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2249 if mobj.group('key') == 'codecs':
2250 codecs = mobj.group('val')
2251 break
2252 if codecs:
6310acf5 2253 dct.update(parse_codecs(codecs))
e4a60912
S
2254 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2255 dct['downloader_options'] = {
2256 # Youtube throttles chunks >~10M
2257 'http_chunk_size': 10485760,
2258 }
aabc2be6 2259 formats.append(dct)
c5e8d7af 2260 else:
c3e54389
S
2261 manifest_url = (
2262 url_or_none(try_get(
2263 player_response,
2264 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2265 compat_str))
2266 or url_or_none(try_get(
c3e54389
S
2267 video_info, lambda x: x['hlsvp'][0], compat_str)))
2268 if manifest_url:
2269 formats = []
2270 m3u8_formats = self._extract_m3u8_formats(
2271 manifest_url, video_id, 'mp4', fatal=False)
2272 for a_format in m3u8_formats:
2273 itag = self._search_regex(
2274 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2275 if itag:
2276 a_format['format_id'] = itag
2277 if itag in self._formats:
2278 dct = self._formats[itag].copy()
2279 dct.update(a_format)
2280 a_format = dct
2281 a_format['player_url'] = player_url
2282 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2283 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2284 if self._downloader.params.get('youtube_include_hls_manifest', True):
2285 formats.append(a_format)
c3e54389 2286 else:
13577349 2287 error_message = extract_unavailable_message()
c3e54389 2288 if not error_message:
13577349
S
2289 error_message = clean_html(try_get(
2290 player_response, lambda x: x['playabilityStatus']['reason'],
2291 compat_str))
2292 if not error_message:
2293 error_message = clean_html(
2294 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2295 if error_message:
2296 raise ExtractorError(error_message, expected=True)
2297 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2298
7e72694b 2299 # uploader
dbdaaa23
S
2300 video_uploader = try_get(
2301 video_info, lambda x: x['author'][0],
2302 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2303 if video_uploader:
2304 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2305 else:
2306 self._downloader.report_warning('unable to extract uploader name')
2307
2308 # uploader_id
2309 video_uploader_id = None
2310 video_uploader_url = None
2311 mobj = re.search(
2312 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2313 video_webpage)
2314 if mobj is not None:
2315 video_uploader_id = mobj.group('uploader_id')
2316 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2317 else:
2318 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2319 if owner_profile_url:
2320 video_uploader_id = self._search_regex(
2321 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2322 default=None)
2323 video_uploader_url = owner_profile_url
7e72694b 2324
b45a9e69 2325 channel_id = (
3089bc74
S
2326 str_or_none(video_details.get('channelId'))
2327 or self._html_search_meta(
2328 'channelId', video_webpage, 'channel id', default=None)
2329 or self._search_regex(
b45a9e69 2330 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2331 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2332 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2333
b477fc13
S
2334 thumbnails = []
2335 thumbnails_list = try_get(
2336 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2337 for t in thumbnails_list:
2338 if not isinstance(t, dict):
2339 continue
2340 thumbnail_url = url_or_none(t.get('url'))
2341 if not thumbnail_url:
2342 continue
2343 thumbnails.append({
2344 'url': thumbnail_url,
2345 'width': int_or_none(t.get('width')),
2346 'height': int_or_none(t.get('height')),
2347 })
2348
2349 if not thumbnails:
7e72694b 2350 video_thumbnail = None
b477fc13
S
2351 # We try first to get a high quality image:
2352 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2353 video_webpage, re.DOTALL)
2354 if m_thumb is not None:
2355 video_thumbnail = m_thumb.group(1)
2356 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2357 if thumbnail_url:
2358 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2359 if video_thumbnail:
2360 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2361
2362 # upload date
2363 upload_date = self._html_search_meta(
2364 'datePublished', video_webpage, 'upload date', default=None)
2365 if not upload_date:
2366 upload_date = self._search_regex(
2367 [r'(?s)id="eow-date.*?>(.*?)</span>',
2368 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2369 video_webpage, 'upload date', default=None)
37357d21
S
2370 if not upload_date:
2371 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2372 upload_date = unified_strdate(upload_date)
2373
2374 video_license = self._html_search_regex(
2375 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2376 video_webpage, 'license', default=None)
2377
2378 m_music = re.search(
2379 r'''(?x)
2380 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2381 <ul[^>]*>\s*
2382 <li>(?P<title>.+?)
2383 by (?P<creator>.+?)
2384 (?:
2385 \(.+?\)|
2386 <a[^>]*
2387 (?:
2388 \bhref=["\']/red[^>]*>| # drop possible
2389 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2390 )
2391 .*?
2392 )?</li
2393 ''',
2394 video_webpage)
2395 if m_music:
2396 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2397 video_creator = clean_html(m_music.group('creator'))
2398 else:
2399 video_alt_title = video_creator = None
2400
2401 def extract_meta(field):
2402 return self._html_search_regex(
2403 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2404 video_webpage, field, default=None)
2405
2406 track = extract_meta('Song')
2407 artist = extract_meta('Artist')
92bc97d3 2408 album = extract_meta('Album')
822b9d9c
RA
2409
2410 # Youtube Music Auto-generated description
92bc97d3 2411 release_date = release_year = None
822b9d9c
RA
2412 if video_description:
2413 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2414 if mobj:
2415 if not track:
2416 track = mobj.group('track').strip()
2417 if not artist:
2418 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2419 if not album:
2420 album = mobj.group('album'.strip())
822b9d9c
RA
2421 release_year = mobj.group('release_year')
2422 release_date = mobj.group('release_date')
2423 if release_date:
2424 release_date = release_date.replace('-', '')
2425 if not release_year:
2426 release_year = int(release_date[:4])
2427 if release_year:
2428 release_year = int(release_year)
7e72694b
S
2429
2430 m_episode = re.search(
2431 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2432 video_webpage)
2433 if m_episode:
c2dd2dc0 2434 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2435 season_number = int(m_episode.group('season'))
2436 episode_number = int(m_episode.group('episode'))
2437 else:
2438 series = season_number = episode_number = None
2439
2440 m_cat_container = self._search_regex(
2441 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2442 video_webpage, 'categories', default=None)
dbeafce5 2443 category = None
7e72694b
S
2444 if m_cat_container:
2445 category = self._html_search_regex(
2446 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2447 default=None)
dbeafce5
S
2448 if not category:
2449 category = try_get(
2450 microformat, lambda x: x['category'], compat_str)
2451 video_categories = None if category is None else [category]
7e72694b
S
2452
2453 video_tags = [
2454 unescapeHTML(m.group('content'))
2455 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2456 if not video_tags:
2457 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2458
2459 def _extract_count(count_name):
2460 return str_to_int(self._search_regex(
a6c666d0 2461 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
7e72694b
S
2462 % re.escape(count_name),
2463 video_webpage, count_name, default=None))
2464
2465 like_count = _extract_count('like')
2466 dislike_count = _extract_count('dislike')
2467
dbdaaa23
S
2468 if view_count is None:
2469 view_count = str_to_int(self._search_regex(
2470 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2471 'view count', default=None))
2472
bf3c9326
S
2473 average_rating = (
2474 float_or_none(video_details.get('averageRating'))
2475 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2476
7e72694b 2477 # subtitles
321bf820 2478 video_subtitles = self.extract_subtitles(
2479 video_id, video_webpage, has_live_chat_replay)
7e72694b
S
2480 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2481
2482 video_duration = try_get(
2483 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2484 if not video_duration:
2485 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2486 if not video_duration:
2487 video_duration = parse_duration(self._html_search_meta(
2488 'duration', video_webpage, 'video duration'))
2489
b84071c0
JP
2490 # Get Subscriber Count of channel
2491 subscriber_count = parse_count(self._search_regex(
2492 r'"text":"([\d\.]+\w?) subscribers"',
2493 video_webpage,
2494 'subscriber count',
2495 default=None
2496 ))
2497
7e72694b
S
2498 # annotations
2499 video_annotations = None
2500 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2501 xsrf_token = self._search_regex(
2502 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2503 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2504 invideo_url = try_get(
2505 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2506 if xsrf_token and invideo_url:
2507 xsrf_field_name = self._search_regex(
2508 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2509 video_webpage, 'xsrf field name',
2510 group='xsrf_field_name', default='session_token')
2511 video_annotations = self._download_webpage(
2512 self._proto_relative_url(invideo_url),
2513 video_id, note='Downloading annotations',
2514 errnote='Unable to download video annotations', fatal=False,
2515 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2516
84213ea8 2517 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2518
dd27fd17 2519 # Look for the DASH manifest
203fb43f 2520 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2521 dash_mpd_fatal = True
8ff648e4 2522 for mpd_url in dash_mpds:
d8d24a92 2523 dash_formats = {}
774e208f 2524 try:
05d0d131
YCH
2525 def decrypt_sig(mobj):
2526 s = mobj.group(1)
2527 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2528 return '/signature/%s' % dec_s
2529
8ff648e4 2530 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2531
8ff648e4 2532 for df in self._extract_mpd_formats(
2533 mpd_url, video_id, fatal=dash_mpd_fatal,
2534 formats_dict=self._formats):
c63ca0ee
S
2535 if not df.get('filesize'):
2536 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2537 # Do not overwrite DASH format found in some previous DASH manifest
2538 if df['format_id'] not in dash_formats:
2539 dash_formats[df['format_id']] = df
77c6fb5b
S
2540 # Additional DASH manifests may end up in HTTP Error 403 therefore
2541 # allow them to fail without bug report message if we already have
2542 # some DASH manifest succeeded. This is temporary workaround to reduce
2543 # burst of bug reports until we figure out the reason and whether it
2544 # can be fixed at all.
2545 dash_mpd_fatal = False
774e208f
PH
2546 except (ExtractorError, KeyError) as e:
2547 self.report_warning(
2548 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2549 if dash_formats:
04b3b3df
JMF
2550 # Remove the formats we found through non-DASH, they
2551 # contain less info and it can be wrong, because we use
2552 # fixed values (for example the resolution). See
067aa17e 2553 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2554 # example.
d80265cc 2555 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2556 formats.extend(dash_formats.values())
d80044c2 2557
6271f1ca
PH
2558 # Check for malformed aspect ratio
2559 stretched_m = re.search(
2560 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2561 video_webpage)
2562 if stretched_m:
313dfc45
LL
2563 w = float(stretched_m.group('w'))
2564 h = float(stretched_m.group('h'))
5faf9fed
S
2565 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2566 # We will only process correct ratios.
313dfc45 2567 if w > 0 and h > 0:
41f24c32 2568 ratio = w / h
313dfc45
LL
2569 for f in formats:
2570 if f.get('vcodec') != 'none':
2571 f['stretched_ratio'] = ratio
6271f1ca 2572
026fbedc 2573 if not formats:
43ebf77d
S
2574 if 'reason' in video_info:
2575 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2576 regions_allowed = self._html_search_meta(
2577 'regionsAllowed', video_webpage, default=None)
2578 countries = regions_allowed.split(',') if regions_allowed else None
2579 self.raise_geo_restricted(
2580 msg=video_info['reason'][0], countries=countries)
2581 reason = video_info['reason'][0]
2582 if 'Invalid parameters' in reason:
2583 unavailable_message = extract_unavailable_message()
2584 if unavailable_message:
2585 reason = unavailable_message
2586 raise ExtractorError(
2587 'YouTube said: %s' % reason,
2588 expected=True, video_id=video_id)
2589 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2590 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2591
4bcc7bd1 2592 self._sort_formats(formats)
4ea3be0a 2593
21c340b8 2594 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2595
4ea3be0a 2596 return {
8bcc8756
JW
2597 'id': video_id,
2598 'uploader': video_uploader,
2599 'uploader_id': video_uploader_id,
fd050249 2600 'uploader_url': video_uploader_url,
dd4c4492
S
2601 'channel_id': channel_id,
2602 'channel_url': channel_url,
8bcc8756 2603 'upload_date': upload_date,
7caf9830 2604 'license': video_license,
936784b2 2605 'creator': video_creator or artist,
8bcc8756 2606 'title': video_title,
936784b2 2607 'alt_title': video_alt_title or track,
b477fc13 2608 'thumbnails': thumbnails,
8bcc8756
JW
2609 'description': video_description,
2610 'categories': video_categories,
000b6b5a 2611 'tags': video_tags,
8bcc8756 2612 'subtitles': video_subtitles,
360e1ca5 2613 'automatic_captions': automatic_captions,
8bcc8756
JW
2614 'duration': video_duration,
2615 'age_limit': 18 if age_gate else 0,
2616 'annotations': video_annotations,
9cafc3fd 2617 'chapters': chapters,
7e8c0af0 2618 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2619 'view_count': view_count,
4ea3be0a 2620 'like_count': like_count,
2621 'dislike_count': dislike_count,
bf3c9326 2622 'average_rating': average_rating,
8bcc8756 2623 'formats': formats,
2fe1ff85 2624 'is_live': is_live,
7c80519c 2625 'start_time': start_time,
297a564b 2626 'end_time': end_time,
12afdc2a
S
2627 'series': series,
2628 'season_number': season_number,
2629 'episode_number': episode_number,
936784b2
S
2630 'track': track,
2631 'artist': artist,
5caabd3c 2632 'album': album,
2633 'release_date': release_date,
2634 'release_year': release_year,
b84071c0 2635 'subscriber_count': subscriber_count,
4ea3be0a 2636 }
c5e8d7af 2637
5f6a1245 2638
8e7aad20 2639class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2640 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2641 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2642 (?:https?://)?
2643 (?:\w+\.)?
c5e8d7af 2644 (?:
c0345b82 2645 (?:
66b48727 2646 youtube(?:kids)?\.com|
c0345b82
S
2647 invidio\.us
2648 )
2649 /
feaa5ad7 2650 (?:
87dadd45 2651 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2652 \? (?:.*?[&;])*? (?:p|a|list)=
2653 | p/
2654 )|
2655 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2656 )
d67cc9fa 2657 (
66b48727 2658 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2659 # Top tracks, they can also include dots
d67cc9fa
JMF
2660 |(?:MC)[\w\.]*
2661 )
c5e8d7af
PH
2662 .*
2663 |
d0ba5587
S
2664 (%(playlist_id)s)
2665 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2666 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2667 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2668 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2669 IE_NAME = 'youtube:playlist'
81127aa5 2670 _TESTS = [{
0e30a7b9 2671 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2672 'info_dict': {
0e30a7b9 2673 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2674 'uploader': 'Sergey M.',
2675 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2676 'title': 'youtube-dl public playlist',
81127aa5 2677 },
0e30a7b9 2678 'playlist_count': 1,
9291475f 2679 }, {
0e30a7b9 2680 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2681 'info_dict': {
0e30a7b9 2682 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2683 'uploader': 'Sergey M.',
2684 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2685 'title': 'youtube-dl empty playlist',
9291475f
PH
2686 },
2687 'playlist_count': 0,
2688 }, {
2689 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2690 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2691 'info_dict': {
2692 'title': '29C3: Not my department',
acf757f4 2693 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2694 'uploader': 'Christiaan008',
2695 'uploader_id': 'ChRiStIaAn008',
9291475f 2696 },
0e30a7b9 2697 'playlist_count': 96,
9291475f
PH
2698 }, {
2699 'note': 'issue #673',
2700 'url': 'PLBB231211A4F62143',
2701 'info_dict': {
f46a8702 2702 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2703 'id': 'PLBB231211A4F62143',
13a75688
S
2704 'uploader': 'Wickydoo',
2705 'uploader_id': 'Wickydoo',
9291475f
PH
2706 },
2707 'playlist_mincount': 26,
2708 }, {
2709 'note': 'Large playlist',
2710 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2711 'info_dict': {
2712 'title': 'Uploads from Cauchemar',
acf757f4 2713 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2714 'uploader': 'Cauchemar',
2715 'uploader_id': 'Cauchemar89',
9291475f
PH
2716 },
2717 'playlist_mincount': 799,
2718 }, {
2719 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2720 'info_dict': {
2721 'title': 'YDL_safe_search',
acf757f4 2722 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2723 },
2724 'playlist_count': 2,
4201ba13 2725 'skip': 'This playlist is private',
ac7553d0
PH
2726 }, {
2727 'note': 'embedded',
2d3d2997 2728 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2729 'playlist_count': 4,
2730 'info_dict': {
2731 'title': 'JODA15',
acf757f4 2732 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2733 'uploader': 'milan',
2734 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2735 }
87dadd45
S
2736 }, {
2737 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2738 'playlist_mincount': 485,
2739 'info_dict': {
13a75688 2740 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2741 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2742 'uploader': 'LBK',
2743 'uploader_id': 'sdragonfang',
87dadd45 2744 }
6b08cdf6
PH
2745 }, {
2746 'note': 'Embedded SWF player',
2d3d2997 2747 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2748 'playlist_count': 4,
2749 'info_dict': {
2750 'title': 'JODA7',
acf757f4 2751 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2752 },
2753 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2754 }, {
2755 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2756 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2757 'info_dict': {
acf757f4
PH
2758 'title': 'Uploads from Interstellar Movie',
2759 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2760 'uploader': 'Interstellar Movie',
2761 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2762 },
481cc733 2763 'playlist_mincount': 21,
dacb3a86
S
2764 }, {
2765 # Playlist URL that does not actually serve a playlist
2766 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2767 'info_dict': {
2768 'id': 'FqZTN594JQw',
2769 'ext': 'webm',
2770 'title': "Smiley's People 01 detective, Adventure Series, Action",
2771 'uploader': 'STREEM',
2772 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2774 'upload_date': '20150526',
2775 'license': 'Standard YouTube License',
2776 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2777 'categories': ['People & Blogs'],
2778 'tags': list,
dbdaaa23 2779 'view_count': int,
dacb3a86
S
2780 'like_count': int,
2781 'dislike_count': int,
2782 },
2783 'params': {
2784 'skip_download': True,
2785 },
13a75688 2786 'skip': 'This video is not available.',
dacb3a86 2787 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2788 }, {
2789 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2790 'info_dict': {
2791 'id': 'yeWKywCrFtk',
2792 'ext': 'mp4',
2793 'title': 'Small Scale Baler and Braiding Rugs',
2794 'uploader': 'Backus-Page House Museum',
2795 'uploader_id': 'backuspagemuseum',
ec85ded8 2796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2797 'upload_date': '20161008',
481cc733
S
2798 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2799 'categories': ['Nonprofits & Activism'],
2800 'tags': list,
2801 'like_count': int,
2802 'dislike_count': int,
2803 },
2804 'params': {
2805 'noplaylist': True,
2806 'skip_download': True,
2807 },
2e18adec
S
2808 }, {
2809 # https://github.com/ytdl-org/youtube-dl/issues/21844
2810 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2811 'info_dict': {
2812 'title': 'Data Analysis with Dr Mike Pound',
2813 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2814 'uploader_id': 'Computerphile',
2815 'uploader': 'Computerphile',
2816 },
2817 'playlist_mincount': 11,
feaa5ad7
S
2818 }, {
2819 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2820 'only_matching': True,
a6857510
S
2821 }, {
2822 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2823 'only_matching': True,
409b9324
S
2824 }, {
2825 # music album playlist
2826 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2827 'only_matching': True,
c0345b82
S
2828 }, {
2829 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2830 'only_matching': True,
66b48727
RA
2831 }, {
2832 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2833 'only_matching': True,
81127aa5 2834 }]
c5e8d7af 2835
880e1c52
JMF
2836 def _real_initialize(self):
2837 self._login()
2838
351f37c0
S
2839 def extract_videos_from_page(self, page):
2840 ids_in_page = []
2841 titles_in_page = []
2842
2843 for item in re.findall(
2844 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2845 attrs = extract_attributes(item)
2846 video_id = attrs['data-video-id']
2847 video_title = unescapeHTML(attrs.get('data-title'))
2848 if video_title:
2849 video_title = video_title.strip()
2850 ids_in_page.append(video_id)
2851 titles_in_page.append(video_title)
2852
2853 # Fallback with old _VIDEO_RE
2854 self.extract_videos_from_page_impl(
2855 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2856
2857 # Relaxed fallbacks
2858 self.extract_videos_from_page_impl(
2859 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2860 ids_in_page, titles_in_page)
2861 self.extract_videos_from_page_impl(
2862 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2863 ids_in_page, titles_in_page)
2864
2865 return zip(ids_in_page, titles_in_page)
2866
652cdaa2 2867 def _extract_mix(self, playlist_id):
99209c29 2868 # The mixes are generated from a single video
652cdaa2 2869 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2870 ids = []
2871 last_id = playlist_id[-11:]
2872 for n in itertools.count(1):
07af16b9 2873 url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1b6182d8
JMF
2874 webpage = self._download_webpage(
2875 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2876 new_ids = orderedSet(re.findall(
2877 r'''(?xs)data-video-username=".*?".*?
2878 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2879 webpage))
2880 # Fetch new pages until all the videos are repeated, it seems that
2881 # there are always 51 unique videos.
2882 new_ids = [_id for _id in new_ids if _id not in ids]
2883 if not new_ids:
2884 break
2885 ids.extend(new_ids)
2886 last_id = ids[-1]
2887
2888 url_results = self._ids_to_results(ids)
2889
bc2f773b 2890 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2891 title_span = (
3089bc74
S
2892 search_title('playlist-title')
2893 or search_title('title long-title')
2894 or search_title('title'))
76d1700b 2895 title = clean_html(title_span)
652cdaa2
JMF
2896
2897 return self.playlist_result(url_results, playlist_id, title)
2898
448830ce 2899 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2900 url = self._TEMPLATE_URL % playlist_id
2901 page = self._download_webpage(url, playlist_id)
dbb94fb0 2902
067aa17e 2903 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2904 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2905 match = match.strip()
2906 # Check if the playlist exists or is private
4201ba13
S
2907 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2908 if mobj:
2909 reason = mobj.group('reason')
2910 message = 'This playlist %s' % reason
2911 if 'private' in reason:
2912 message += ', use --username or --netrc to access it'
2913 message += '.'
2914 raise ExtractorError(message, expected=True)
39b62db1
YCH
2915 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2916 raise ExtractorError(
2917 'Invalid parameters. Maybe URL is incorrect.',
2918 expected=True)
2919 elif re.match(r'[^<]*Choose your language[^<]*', match):
2920 continue
2921 else:
2922 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2923
dbb94fb0 2924 playlist_title = self._html_search_regex(
63b4295d 2925 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2926 page, 'title', default=None)
c5e8d7af 2927
07aeced6 2928 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2929 uploader = self._html_search_regex(
07aeced6
S
2930 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2931 page, 'uploader', default=None)
2932 mobj = re.search(
2933 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2934 page)
2935 if mobj:
2936 uploader_id = mobj.group('uploader_id')
2937 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2938 else:
2939 uploader_id = uploader_url = None
2940
dacb3a86
S
2941 has_videos = True
2942
2943 if not playlist_title:
2944 try:
2945 # Some playlist URLs don't actually serve a playlist (e.g.
2946 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2947 next(self._entries(page, playlist_id))
2948 except StopIteration:
2949 has_videos = False
2950
07aeced6 2951 playlist = self.playlist_result(
dacb3a86 2952 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2953 playlist.update({
2954 'uploader': uploader,
2955 'uploader_id': uploader_id,
2956 'uploader_url': uploader_url,
2957 })
2958
2959 return has_videos, playlist
c5e8d7af 2960
ebf1b291 2961 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2962 # Check if it's a video-specific URL
2963 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2964 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2965 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2966 'video id', default=None)
2967 if video_id:
448830ce
S
2968 if self._downloader.params.get('noplaylist'):
2969 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2970 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2971 else:
2972 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2973 return video_id, None
2974 return None, None
448830ce 2975
ebf1b291
S
2976 def _real_extract(self, url):
2977 # Extract playlist id
2978 mobj = re.match(self._VALID_URL, url)
2979 if mobj is None:
2980 raise ExtractorError('Invalid URL: %s' % url)
2981 playlist_id = mobj.group(1) or mobj.group(2)
2982
dacb3a86 2983 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2984 if video:
2985 return video
2986
466a6145 2987 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2988 # Mixes require a custom extraction process
2989 return self._extract_mix(playlist_id)
2990
dacb3a86
S
2991 has_videos, playlist = self._extract_playlist(playlist_id)
2992 if has_videos or not video_id:
2993 return playlist
2994
2995 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2996 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2997 # Fallback to plain video extraction if there is a video id
2998 # along with playlist id.
2999 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 3000
c5e8d7af 3001
648e6a1f 3002class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 3003 IE_DESC = 'YouTube.com channels'
66b48727 3004 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 3005 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 3006 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 3007 IE_NAME = 'youtube:channel'
cdc628a4
PH
3008 _TESTS = [{
3009 'note': 'paginated channel',
3010 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
3011 'playlist_mincount': 91,
acf757f4 3012 'info_dict': {
9170ca5b
JMF
3013 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
3014 'title': 'Uploads from lex will',
13a75688
S
3015 'uploader': 'lex will',
3016 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 3017 }
5c43afd4
JMF
3018 }, {
3019 'note': 'Age restricted channel',
3020 # from https://www.youtube.com/user/DeusExOfficial
3021 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3022 'playlist_mincount': 64,
3023 'info_dict': {
3024 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3025 'title': 'Uploads from Deus Ex',
13a75688
S
3026 'uploader': 'Deus Ex',
3027 'uploader_id': 'DeusExOfficial',
5c43afd4 3028 },
cd5a74a2
S
3029 }, {
3030 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3031 'only_matching': True,
66b48727
RA
3032 }, {
3033 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3034 'only_matching': True,
cdc628a4 3035 }]
c5e8d7af 3036
e462474e
S
3037 @classmethod
3038 def suitable(cls, url):
f07e276a
S
3039 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3040 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 3041
9558dcec
S
3042 def _build_template_url(self, url, channel_id):
3043 return self._TEMPLATE_URL % channel_id
3044
c5e8d7af 3045 def _real_extract(self, url):
9ff67727 3046 channel_id = self._match_id(url)
c5e8d7af 3047
9558dcec 3048 url = self._build_template_url(url, channel_id)
386bdfa6
S
3049
3050 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3051 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3052 # otherwise fallback on channel by page extraction
3053 channel_page = self._download_webpage(
3054 url + '?view=57', channel_id,
3055 'Downloading channel page', fatal=False)
2b3c2546
PH
3056 if channel_page is False:
3057 channel_playlist_id = False
3058 else:
3059 channel_playlist_id = self._html_search_meta(
3060 'channelId', channel_page, 'channel id', default=None)
3061 if not channel_playlist_id:
73c4ac2c
S
3062 channel_url = self._html_search_meta(
3063 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3064 channel_page, 'channel url', default=None)
3065 if channel_url:
3066 channel_playlist_id = self._search_regex(
3067 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3068 channel_url, 'channel id', default=None)
386bdfa6
S
3069 if channel_playlist_id and channel_playlist_id.startswith('UC'):
3070 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
3071 return self.url_result(
3072 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 3073
60bf45c8 3074 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
3075 autogenerated = re.search(r'''(?x)
3076 class="[^"]*?(?:
3077 channel-header-autogenerated-label|
3078 yt-channel-title-autogenerated
3079 )[^"]*"''', channel_page) is not None
c5e8d7af 3080
b9643eed
JMF
3081 if autogenerated:
3082 # The videos are contained in a single page
3083 # the ajax pages can't be used, they are empty
b82f815f 3084 entries = [
fb69240c
S
3085 self.url_result(
3086 video_id, 'Youtube', video_id=video_id,
3087 video_title=video_title)
8f02ad4f 3088 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
3089 return self.playlist_result(entries, channel_id)
3090
73c4ac2c
S
3091 try:
3092 next(self._entries(channel_page, channel_id))
3093 except StopIteration:
3094 alert_message = self._html_search_regex(
3095 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3096 channel_page, 'alert', default=None, group='alert')
3097 if alert_message:
3098 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3099
648e6a1f 3100 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
3101
3102
eb0f3e7e 3103class YoutubeUserIE(YoutubeChannelIE):
78caa52a 3104 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
39e7107d 3105 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
9558dcec 3106 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 3107 IE_NAME = 'youtube:user'
c5e8d7af 3108
cdc628a4
PH
3109 _TESTS = [{
3110 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3111 'playlist_mincount': 320,
3112 'info_dict': {
73c4ac2c
S
3113 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3114 'title': 'Uploads from The Linux Foundation',
13a75688
S
3115 'uploader': 'The Linux Foundation',
3116 'uploader_id': 'TheLinuxFoundation',
cdc628a4 3117 }
9558dcec
S
3118 }, {
3119 # Only available via https://www.youtube.com/c/12minuteathlete/videos
3120 # but not https://www.youtube.com/user/12minuteathlete/videos
3121 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3122 'playlist_mincount': 249,
3123 'info_dict': {
3124 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3125 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
3126 'uploader': '12 Minute Athlete',
3127 'uploader_id': 'the12minuteathlete',
9558dcec 3128 }
cdc628a4
PH
3129 }, {
3130 'url': 'ytuser:phihag',
3131 'only_matching': True,
daa0df9e
YCH
3132 }, {
3133 'url': 'https://www.youtube.com/c/gametrailers',
3134 'only_matching': True,
39e7107d
U
3135 }, {
3136 'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3137 'only_matching': True,
9558dcec
S
3138 }, {
3139 'url': 'https://www.youtube.com/gametrailers',
3140 'only_matching': True,
73c4ac2c 3141 }, {
0e879f43 3142 # This channel is not available, geo restricted to JP
73c4ac2c
S
3143 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3144 'only_matching': True,
cdc628a4
PH
3145 }]
3146
e3ea4790 3147 @classmethod
f4b05232 3148 def suitable(cls, url):
e3ea4790
JMF
3149 # Don't return True if the url can be extracted with other youtube
3150 # extractor, the regex would is too permissive and it would match.
f3a58d46 3151 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3152 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
3153 return False
3154 else:
3155 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 3156
9558dcec
S
3157 def _build_template_url(self, url, channel_id):
3158 mobj = re.match(self._VALID_URL, url)
3159 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3160
b05654f0 3161
f07e276a
S
3162class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3163 IE_DESC = 'YouTube.com live streams'
073d5bf5 3164 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
3165 IE_NAME = 'youtube:live'
3166
3167 _TESTS = [{
2d3d2997 3168 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
3169 'info_dict': {
3170 'id': 'a48o2S1cPoo',
3171 'ext': 'mp4',
3172 'title': 'The Young Turks - Live Main Show',
3173 'uploader': 'The Young Turks',
3174 'uploader_id': 'TheYoungTurks',
ec85ded8 3175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
3176 'upload_date': '20150715',
3177 'license': 'Standard YouTube License',
3178 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3179 'categories': ['News & Politics'],
3180 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3181 'like_count': int,
3182 'dislike_count': int,
3183 },
3184 'params': {
3185 'skip_download': True,
3186 },
3187 }, {
2d3d2997 3188 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3189 'only_matching': True,
c1b2a085
S
3190 }, {
3191 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3192 'only_matching': True,
073d5bf5
S
3193 }, {
3194 'url': 'https://www.youtube.com/TheYoungTurks/live',
3195 'only_matching': True,
f07e276a
S
3196 }]
3197
3198 def _real_extract(self, url):
3199 mobj = re.match(self._VALID_URL, url)
3200 channel_id = mobj.group('id')
3201 base_url = mobj.group('base_url')
3202 webpage = self._download_webpage(url, channel_id, fatal=False)
3203 if webpage:
3204 page_type = self._og_search_property(
e7f3529f 3205 'type', webpage, 'page type', default='')
f07e276a
S
3206 video_id = self._html_search_meta(
3207 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3208 if page_type.startswith('video') and video_id and re.match(
3209 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3210 return self.url_result(video_id, YoutubeIE.ie_key())
3211 return self.url_result(base_url)
3212
3213
e462474e
S
3214class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3215 IE_DESC = 'YouTube.com user/channel playlists'
e942cfd1 3216 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
e462474e 3217 IE_NAME = 'youtube:playlists'
0c148415 3218
e568c223 3219 _TESTS = [{
2d3d2997 3220 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3221 'playlist_mincount': 4,
3222 'info_dict': {
3223 'id': 'ThirstForScience',
13a75688 3224 'title': 'ThirstForScience',
0c148415 3225 },
e568c223
S
3226 }, {
3227 # with "Load more" button
2d3d2997 3228 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3229 'playlist_mincount': 70,
3230 'info_dict': {
3231 'id': 'igorkle1',
3232 'title': 'Игорь Клейнер',
3233 },
e462474e
S
3234 }, {
3235 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3236 'playlist_mincount': 17,
3237 'info_dict': {
3238 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3239 'title': 'Chem Player',
3240 },
13a75688 3241 'skip': 'Blocked',
e942cfd1
S
3242 }, {
3243 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3244 'only_matching': True,
e568c223 3245 }]
0c148415
S
3246
3247
870f3bfc
S
3248class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3249 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3250
3251
3252class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3253 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3254 # there doesn't appear to be a real limit, for example if you search for
3255 # 'python' you get more than 8.000.000 results
3256 _MAX_RESULTS = float('inf')
78caa52a 3257 IE_NAME = 'youtube:search'
b05654f0 3258 _SEARCH_KEY = 'ytsearch'
b4c08069 3259 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3260 _TESTS = []
b05654f0 3261
b05654f0
PH
3262 def _get_n_results(self, query, n):
3263 """Get a specified number of results for a query"""
3264
b4c08069 3265 videos = []
b05654f0
PH
3266 limit = n
3267
a22b2fd1
YCH
3268 url_query = {
3269 'search_query': query.encode('utf-8'),
3270 }
3271 url_query.update(self._EXTRA_QUERY_ARGS)
3272 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3273
b4c08069 3274 for pagenum in itertools.count(1):
b4c08069 3275 data = self._download_json(
69ea8ca4 3276 result_url, video_id='query "%s"' % query,
b4c08069 3277 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3278 errnote='Unable to download API page',
3279 query={'spf': 'navigate'})
b4c08069 3280 html_content = data[1]['body']['content']
7cc3570e 3281
b4c08069 3282 if 'class="search-message' in html_content:
07ad22b8 3283 raise ExtractorError(
78caa52a 3284 '[youtube] No video results', expected=True)
b05654f0 3285
870f3bfc 3286 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3287 videos += new_videos
3288 if not new_videos or len(videos) > limit:
3289 break
a22b2fd1
YCH
3290 next_link = self._html_search_regex(
3291 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3292 html_content, 'next link', default=None)
3293 if next_link is None:
3294 break
3295 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3296
b4c08069
JMF
3297 if len(videos) > n:
3298 videos = videos[:n]
b05654f0 3299 return self.playlist_result(videos, query)
75dff0ee 3300
c9ae7b95 3301
a3dd9248 3302class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3303 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3304 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3305 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3306 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3307
c9ae7b95 3308
870f3bfc 3309class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3310 IE_DESC = 'YouTube.com search URLs'
3311 IE_NAME = 'youtube:search_url'
d2c1f79f 3312 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
c0a1a892 3313 _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
cdc628a4 3314 _TESTS = [{
3867038a 3315 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
cdc628a4
PH
3316 'playlist_mincount': 5,
3317 'info_dict': {
3867038a 3318 'title': 'youtube-dl test video',
cdc628a4 3319 }
d2c1f79f
S
3320 }, {
3321 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3322 'only_matching': True,
cdc628a4 3323 }]
c9ae7b95 3324
e03b4f3e 3325 def _find_videos_in_json(self, extracted):
3326 videos = []
3327
3328 def _real_find(obj):
3329 if obj is None or isinstance(obj, str):
3330 return
3331
3332 if type(obj) is list:
3333 for elem in obj:
3334 _real_find(elem)
3335
3336 if type(obj) is dict:
3337 if "videoId" in obj:
3338 videos.append(obj)
3339 return
3340
3341 for _, o in obj.items():
3342 _real_find(o)
3343
3344 _real_find(extracted)
3345
3346 return videos
3347
19f671f8 3348 def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3349 search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3350
e03b4f3e 3351 result_items = self._find_videos_in_json(search_response)
19f671f8 3352
955c4cb6 3353 for renderer in result_items:
3354 video_id = try_get(renderer, lambda x: x['videoId'])
3355 video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
19f671f8 3356
3357 if video_id is None or video_title is None:
955c4cb6 3358 # we do not have a videoRenderer or title extraction broke
19f671f8 3359 continue
3360
3361 video_title = video_title.strip()
3362
3363 try:
3364 idx = ids_in_page.index(video_id)
3365 if video_title and not titles_in_page[idx]:
3366 titles_in_page[idx] = video_title
3367 except ValueError:
3368 ids_in_page.append(video_id)
3369 titles_in_page.append(video_title)
3370
3371 def extract_videos_from_page(self, page):
3372 ids_in_page = []
3373 titles_in_page = []
3374 self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3375 return zip(ids_in_page, titles_in_page)
3376
c9ae7b95
PH
3377 def _real_extract(self, url):
3378 mobj = re.match(self._VALID_URL, url)
7fd002c0 3379 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3380 webpage = self._download_webpage(url, query)
175c2e9e 3381 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3382
3383
136dadde 3384class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3385 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3386 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3387 IE_NAME = 'youtube:show'
cdc628a4 3388 _TESTS = [{
4003bd82 3389 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3390 'playlist_mincount': 5,
cdc628a4
PH
3391 'info_dict': {
3392 'id': 'airdisasters',
3393 'title': 'Air Disasters',
3394 }
3395 }]
75dff0ee
JMF
3396
3397 def _real_extract(self, url):
136dadde
S
3398 playlist_id = self._match_id(url)
3399 return super(YoutubeShowIE, self)._real_extract(
3400 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3401
3402
b2e8bc1b 3403class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3404 """
25f14e9f 3405 Base class for feed extractors
d7ae0639
JMF
3406 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3407 """
b2e8bc1b 3408 _LOGIN_REQUIRED = True
bea9b005 3409 _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
f5360807 3410 _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
d7ae0639
JMF
3411
3412 @property
3413 def IE_NAME(self):
78caa52a 3414 return 'youtube:%s' % self._FEED_NAME
04cc9617 3415
81f0259b 3416 def _real_initialize(self):
b2e8bc1b 3417 self._login()
81f0259b 3418
5c430b67 3419 def _find_videos_in_json(self, extracted):
3420 videos = []
299056ad 3421 c = {}
5c430b67 3422
3423 def _real_find(obj):
3424 if obj is None or isinstance(obj, str):
3425 return
3426
3427 if type(obj) is list:
3428 for elem in obj:
3429 _real_find(elem)
3430
3431 if type(obj) is dict:
3432 if "videoId" in obj:
3433 videos.append(obj)
3434 return
f5360807 3435
5c430b67 3436 if "nextContinuationData" in obj:
299056ad 3437 c["continuation"] = obj["nextContinuationData"]
f5360807 3438 return
3439
5c430b67 3440 for _, o in obj.items():
3441 _real_find(o)
3442
3443 _real_find(extracted)
3444
299056ad 3445 return videos, try_get(c, lambda x: x["continuation"])
f5360807 3446
3853309f 3447 def _entries(self, page):
5c430b67 3448 info = []
3449
1f93faf6 3450 yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
5c430b67 3451
3452 search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3453
2bc43303 3454 for page_num in itertools.count(1):
5c430b67 3455 video_info, continuation = self._find_videos_in_json(search_response)
62c95fd5 3456
f5360807 3457 new_info = []
5c430b67 3458
3459 for v in video_info:
3460 v_id = try_get(v, lambda x: x['videoId'])
3461 if not v_id:
3462 continue
3463
f5360807 3464 have_video = False
5c430b67 3465 for old in info:
3466 if old['videoId'] == v_id:
3467 have_video = True
3468 break
3469
3470 if not have_video:
3471 new_info.append(v)
3472
3473 if not new_info:
62c95fd5
S
3474 break
3475
5c430b67 3476 info.extend(new_info)
2bc43303 3477
5c430b67 3478 for video in new_info:
f442082a 3479 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3853309f 3480
1f93faf6 3481 if not continuation or not yt_conf:
2bc43303
JMF
3482 break
3483
5c430b67 3484 search_response = self._download_json(
3485 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
2bc43303 3486 'Downloading page #%s' % page_num,
d84b21b4 3487 transform_source=uppercase_escape,
5c430b67 3488 query={
3489 "ctoken": try_get(continuation, lambda x: x["continuation"]),
3490 "continuation": try_get(continuation, lambda x: x["continuation"]),
3491 "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3492 },
3493 headers={
3494 "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3495 "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3496 "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3497 "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3498 "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3499 "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
f5360807 3500 "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
5c430b67 3501 })
2bc43303 3502
3853309f
S
3503 def _real_extract(self, url):
3504 page = self._download_webpage(
3505 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3506 self._PLAYLIST_TITLE)
25f14e9f 3507 return self.playlist_result(
3853309f 3508 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3509
3510
3511class YoutubeWatchLaterIE(YoutubePlaylistIE):
3512 IE_NAME = 'youtube:watchlater'
3513 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3514 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3515
bc7a9cd8
S
3516 _TESTS = [{
3517 'url': 'https://www.youtube.com/playlist?list=WL',
3518 'only_matching': True,
3519 }, {
3520 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3521 'only_matching': True,
3522 }]
25f14e9f
S
3523
3524 def _real_extract(self, url):
7e5dc339 3525 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3526 if video:
3527 return video
dacb3a86
S
3528 _, playlist = self._extract_playlist('WL')
3529 return playlist
f459d170 3530
5f6a1245 3531
c626a3d9 3532class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3533 IE_NAME = 'youtube:favorites'
f3a34072 3534 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3535 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3536 _LOGIN_REQUIRED = True
3537
3538 def _real_extract(self, url):
3539 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3540 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3541 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3542
3543
25f14e9f
S
3544class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3545 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3546 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3547 _FEED_NAME = 'recommended'
3548 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3549
1ed5b5c9 3550
25f14e9f
S
3551class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3552 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3553 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3554 _FEED_NAME = 'subscriptions'
3555 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3556
1ed5b5c9 3557
25f14e9f
S
3558class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3559 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3560 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3561 _FEED_NAME = 'history'
3562 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3563
3564
15870e90
PH
3565class YoutubeTruncatedURLIE(InfoExtractor):
3566 IE_NAME = 'youtube:truncated_url'
3567 IE_DESC = False # Do not list
975d35db 3568 _VALID_URL = r'''(?x)
b95aab84
PH
3569 (?:https?://)?
3570 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3571 (?:watch\?(?:
c4808c60 3572 feature=[a-z_]+|
b95aab84
PH
3573 annotation_id=annotation_[^&]+|
3574 x-yt-cl=[0-9]+|
c1708b89 3575 hl=[^&]*|
287be8c6 3576 t=[0-9]+
b95aab84
PH
3577 )?
3578 |
3579 attribution_link\?a=[^&]+
3580 )
3581 $
975d35db 3582 '''
15870e90 3583
c4808c60 3584 _TESTS = [{
2d3d2997 3585 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3586 'only_matching': True,
dc2fc736 3587 }, {
2d3d2997 3588 'url': 'https://www.youtube.com/watch?',
dc2fc736 3589 'only_matching': True,
b95aab84
PH
3590 }, {
3591 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3592 'only_matching': True,
3593 }, {
3594 'url': 'https://www.youtube.com/watch?feature=foo',
3595 'only_matching': True,
c1708b89
PH
3596 }, {
3597 'url': 'https://www.youtube.com/watch?hl=en-GB',
3598 'only_matching': True,
287be8c6
PH
3599 }, {
3600 'url': 'https://www.youtube.com/watch?t=2372',
3601 'only_matching': True,
c4808c60
PH
3602 }]
3603
15870e90
PH
3604 def _real_extract(self, url):
3605 raise ExtractorError(
78caa52a
PH
3606 'Did you forget to quote the URL? Remember that & is a meta '
3607 'character in most shells, so you want to put the URL in quotes, '
3867038a 3608 'like youtube-dl '
2d3d2997 3609 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3610 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3611 expected=True)
772fd5cc
PH
3612
3613
3614class YoutubeTruncatedIDIE(InfoExtractor):
3615 IE_NAME = 'youtube:truncated_id'
3616 IE_DESC = False # Do not list
b95aab84 3617 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3618
3619 _TESTS = [{
3620 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3621 'only_matching': True,
3622 }]
3623
3624 def _real_extract(self, url):
3625 video_id = self._match_id(url)
3626 raise ExtractorError(
3627 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3628 expected=True)