]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
Update to ytdl-2021.02.10
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
d77ab8e2 8import random
c5e8d7af 9import re
8a784c74 10import time
e0df6211 11import traceback
c5e8d7af 12
b05654f0 13from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 14from ..compat import (
edf3e38e 15 compat_chr,
29f7c58a 16 compat_HTTPError,
8d81f3e3 17 compat_kwargs,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c5e8d7af 27 clean_html,
c5e8d7af 28 ExtractorError,
b60419c5 29 format_field,
2d30521a 30 float_or_none,
dd27fd17 31 int_or_none,
94278f72 32 mimetype2ext,
6310acf5 33 parse_codecs,
7c80519c 34 parse_duration,
cc2db878 35 # qualities, # TODO: Enable this after fixing formatSort
3995d37d 36 remove_start,
cf7e015f 37 smuggle_url,
dbdaaa23 38 str_or_none,
c93d53f5 39 str_to_int,
556dbe7f 40 try_get,
c5e8d7af
PH
41 unescapeHTML,
42 unified_strdate,
cf7e015f 43 unsmuggle_url,
8bdd16b4 44 update_url_query,
21c340b8 45 url_or_none,
6e6bc8da 46 urlencode_postdata,
8bdd16b4 47 urljoin,
c5e8d7af
PH
48)
49
5f6a1245 50
de7f3446 51class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
52 """Provide base functions for Youtube extractors"""
53 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 54 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
55
56 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
57 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
58 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 59
3462ffa8 60 _RESERVED_NAMES = (
29f7c58a 61 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
62 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
63 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 64
b2e8bc1b
JMF
65 _NETRC_MACHINE = 'youtube'
66 # If True it will raise an error if no login info is provided
67 _LOGIN_REQUIRED = False
68
70d5c17b 69 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 70
25f14e9f
S
71 def _ids_to_results(self, ids):
72 return [
73 self.url_result(vid_id, 'Youtube', video_id=vid_id)
74 for vid_id in ids]
75
b2e8bc1b 76 def _login(self):
83317f69 77 """
78 Attempt to log in to YouTube.
79 True is returned if successful or skipped.
80 False is returned if login failed.
81
82 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
83 """
68217024 84 username, password = self._get_login_info()
b2e8bc1b
JMF
85 # No authentication to be performed
86 if username is None:
70d35d16 87 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 88 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 89 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
90 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 91 return True
b2e8bc1b 92
7cc3570e
PH
93 login_page = self._download_webpage(
94 self._LOGIN_URL, None,
69ea8ca4
PH
95 note='Downloading login page',
96 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
97 if login_page is False:
98 return
b2e8bc1b 99
1212e997 100 login_form = self._hidden_inputs(login_page)
c5e8d7af 101
e00eb564
S
102 def req(url, f_req, note, errnote):
103 data = login_form.copy()
104 data.update({
105 'pstMsg': 1,
106 'checkConnection': 'youtube',
107 'checkedDomains': 'youtube',
108 'hl': 'en',
109 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 110 'f.req': json.dumps(f_req),
e00eb564
S
111 'flowName': 'GlifWebSignIn',
112 'flowEntry': 'ServiceLogin',
baf67a60
S
113 # TODO: reverse actual botguard identifier generation algo
114 'bgRequest': '["identifier",""]',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
9a6628aa
S
181 login_challenge = try_get(res, lambda x: x[0][0], list)
182 if login_challenge:
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 187 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
231 else:
232 CHALLENGES = {
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
236 }
237 challenge = CHALLENGES.get(
238 challenge_str,
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
241 return False
3995d37d
S
242 else:
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
244
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
247 return False
e00eb564
S
248
249 check_cookie_results = self._download_webpage(
3995d37d
S
250 check_cookie_url, None, 'Checking cookie', fatal=False)
251
252 if check_cookie_results is False:
253 return False
e00eb564 254
3995d37d
S
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
b2e8bc1b 257 return False
e00eb564 258
b2e8bc1b
JMF
259 return True
260
30226342 261 def _download_webpage_handle(self, *args, **kwargs):
c1148516 262 query = kwargs.get('query', {}).copy()
c1148516 263 kwargs['query'] = query
30226342 264 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
265 *args, **compat_kwargs(kwargs))
266
b2e8bc1b
JMF
267 def _real_initialize(self):
268 if self._downloader is None:
269 return
b2e8bc1b
JMF
270 if not self._login():
271 return
c5e8d7af 272
8bdd16b4 273 _DEFAULT_API_DATA = {
274 'context': {
275 'client': {
276 'clientName': 'WEB',
277 'clientVersion': '2.20201021.03.00',
278 }
279 },
280 }
8377574c 281
a0566bbf 282 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 283 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
284 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 285
545cc85d 286 def _call_api(self, ep, query, video_id, fatal=True):
8bdd16b4 287 data = self._DEFAULT_API_DATA.copy()
288 data.update(query)
9833e7a0 289
545cc85d 290 return self._download_json(
8bdd16b4 291 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
292 note='Downloading API JSON', errnote='Unable to download API page',
545cc85d 293 data=json.dumps(data).encode('utf8'), fatal=fatal,
8bdd16b4 294 headers={'content-type': 'application/json'},
295 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 296
8bdd16b4 297 def _extract_yt_initial_data(self, video_id, webpage):
298 return self._parse_json(
299 self._search_regex(
29f7c58a 300 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 301 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 302 video_id)
0c148415 303
29f7c58a 304 def _extract_ytcfg(self, video_id, webpage):
305 return self._parse_json(
306 self._search_regex(
307 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
308 default='{}'), video_id, fatal=False)
309
30a074c2 310 def _extract_video(self, renderer):
311 video_id = renderer.get('videoId')
312 title = try_get(
313 renderer,
314 (lambda x: x['title']['runs'][0]['text'],
315 lambda x: x['title']['simpleText']), compat_str)
316 description = try_get(
317 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
318 compat_str)
319 duration = parse_duration(try_get(
320 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
321 view_count_text = try_get(
322 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
323 view_count = str_to_int(self._search_regex(
324 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
325 'view count', default=None))
326 uploader = try_get(
327 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
328 return {
329 '_type': 'url_transparent',
330 'ie_key': YoutubeIE.ie_key(),
331 'id': video_id,
332 'url': video_id,
333 'title': title,
334 'description': description,
335 'duration': duration,
336 'view_count': view_count,
337 'uploader': uploader,
338 }
339
0c148415 340
360e1ca5 341class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 342 IE_DESC = 'YouTube.com'
cb7dfeea 343 _VALID_URL = r"""(?x)^
c5e8d7af 344 (
edb53e2d 345 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 346 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 347 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 348 (?:www\.)?pwnyoutube\.com/|
8b561bfc 349 (?:www\.)?hooktube\.com/|
f7000f3a 350 (?:www\.)?yourepeat\.com/|
e69ae5b9 351 tube\.majestyc\.net/|
ba036333 352 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 353 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 354 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 355 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 356 (?:www\.)?invidious\.kabi\.tk/|
ba036333 357 (?:www\.)?invidious\.13ad\.de/|
791d2e81 358 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 359 (?:www\.)?invidious\.zapashcanon\.fr/|
360 (?:www\.)?invidious\.kavin\.rocks/|
361 (?:www\.)?invidious\.tube/|
362 (?:www\.)?invidiou\.site/|
363 (?:www\.)?invidious\.site/|
364 (?:www\.)?invidious\.xyz/|
494d664e 365 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 366 (?:www\.)?invidious\.drycat\.fr/|
ba036333 367 (?:www\.)?tube\.poal\.co/|
29f7c58a 368 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 369 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 370 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 371 (?:www\.)?yewtu\.be/|
494d664e 372 (?:www\.)?yt\.elukerio\.org/|
894b3826 373 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 374 (?:www\.)?invidious\.ggc-project\.de/|
375 (?:www\.)?yt\.maisputain\.ovh/|
376 (?:www\.)?invidious\.13ad\.de/|
377 (?:www\.)?invidious\.toot\.koeln/|
378 (?:www\.)?invidious\.fdn\.fr/|
379 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 380 (?:www\.)?kgg2m7yk5aybusll\.onion/|
381 (?:www\.)?qklhadlycap4cnod\.onion/|
382 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
383 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
384 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
385 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 386 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 387 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 388 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
389 (?:.*?\#/)? # handle anchor (#/) redirect urls
390 (?: # the various things that can precede the ID:
ac7553d0 391 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 392 |(?: # or the v= param in all its forms
f7000f3a 393 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 394 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 395 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
396 v=
397 )
f4b05232 398 ))
cbaed4bb
S
399 |(?:
400 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
401 vid\.plus| # or vid.plus/xxxx
402 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 403 )/
edb53e2d 404 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 405 )
c5e8d7af 406 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 407 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
408 (?!.*?\blist=
409 (?:
410 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
411 WL # WL are handled by the watch later IE
412 )
413 )
c5e8d7af 414 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 415 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
e40c758c 416 _PLAYER_INFO_RE = (
cc2db878 417 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
418 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 419 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 420 )
2c62dc26 421 _formats = {
c2d3cb4c 422 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
423 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
424 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
425 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
426 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
427 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
429 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 430 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 431 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
432 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
433 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
434 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
435 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
436 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 437 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 438 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 440
441
442 # 3D videos
c2d3cb4c 443 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
444 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
445 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
446 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 447 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
448 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
449 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 450
96fb5605 451 # Apple HTTP Live Streaming
11f12195 452 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 453 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
455 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
456 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
457 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 458 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
459 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
460
461 # DASH mp4 video
d23028a8
S
462 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
463 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 467 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
468 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
469 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
470 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
472 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
473 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 474
f6f1fc92 475 # Dash mp4 audio
d23028a8
S
476 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
477 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
478 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
479 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
480 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
481 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
482 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
483
484 # Dash webm
d23028a8
S
485 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
487 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
488 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
489 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
490 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
491 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
492 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
495 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
496 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
498 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
499 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 500 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
501 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
503 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
504 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
505 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
507
508 # Dash webm audio
d23028a8
S
509 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
510 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 511
0857baad 512 # Dash webm audio with opus inside
d23028a8
S
513 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
514 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
515 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 516
ce6b9a2d
PH
517 # RTMP (unnamed)
518 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
519
520 # av01 video only formats sometimes served with "unknown" codecs
521 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
522 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
523 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
524 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 525 }
29f7c58a 526 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 527
fd5c4aab
S
528 _GEO_BYPASS = False
529
78caa52a 530 IE_NAME = 'youtube'
2eb88d95
PH
531 _TESTS = [
532 {
2d3d2997 533 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
534 'info_dict': {
535 'id': 'BaW_jenozKc',
536 'ext': 'mp4',
3867038a 537 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
538 'uploader': 'Philipp Hagemeister',
539 'uploader_id': 'phihag',
ec85ded8 540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
541 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
542 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 543 'upload_date': '20121002',
3867038a 544 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 545 'categories': ['Science & Technology'],
3867038a 546 'tags': ['youtube-dl'],
556dbe7f 547 'duration': 10,
dbdaaa23 548 'view_count': int,
3e7c1224
PH
549 'like_count': int,
550 'dislike_count': int,
7c80519c 551 'start_time': 1,
297a564b 552 'end_time': 9,
2eb88d95 553 }
0e853ca4 554 },
fccd3771 555 {
4bc3a23e
PH
556 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
557 'note': 'Embed-only video (#1746)',
558 'info_dict': {
559 'id': 'yZIXLfi8CZQ',
560 'ext': 'mp4',
561 'upload_date': '20120608',
562 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
563 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
564 'uploader': 'SET India',
94bfcd23 565 'uploader_id': 'setindia',
ec85ded8 566 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 567 'age_limit': 18,
545cc85d 568 },
569 'skip': 'Private video',
fccd3771 570 },
11b56058 571 {
8bdd16b4 572 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
573 'note': 'Use the first video ID in the URL',
574 'info_dict': {
575 'id': 'BaW_jenozKc',
576 'ext': 'mp4',
3867038a 577 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
578 'uploader': 'Philipp Hagemeister',
579 'uploader_id': 'phihag',
ec85ded8 580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 581 'upload_date': '20121002',
3867038a 582 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 583 'categories': ['Science & Technology'],
3867038a 584 'tags': ['youtube-dl'],
556dbe7f 585 'duration': 10,
dbdaaa23 586 'view_count': int,
11b56058
PM
587 'like_count': int,
588 'dislike_count': int,
34a7de29
S
589 },
590 'params': {
591 'skip_download': True,
592 },
11b56058 593 },
dd27fd17 594 {
2d3d2997 595 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
596 'note': '256k DASH audio (format 141) via DASH manifest',
597 'info_dict': {
598 'id': 'a9LDPn-MO4I',
599 'ext': 'm4a',
600 'upload_date': '20121002',
601 'uploader_id': '8KVIDEO',
ec85ded8 602 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
603 'description': '',
604 'uploader': '8KVIDEO',
605 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 606 },
4bc3a23e
PH
607 'params': {
608 'youtube_include_dash_manifest': True,
609 'format': '141',
4919603f 610 },
de3c7fe0 611 'skip': 'format 141 not served anymore',
dd27fd17 612 },
8bdd16b4 613 # DASH manifest with encrypted signature
614 {
615 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
616 'info_dict': {
617 'id': 'IB3lcPjvWLA',
618 'ext': 'm4a',
619 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
620 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
621 'duration': 244,
622 'uploader': 'AfrojackVEVO',
623 'uploader_id': 'AfrojackVEVO',
624 'upload_date': '20131011',
cc2db878 625 'abr': 129.495,
8bdd16b4 626 },
627 'params': {
628 'youtube_include_dash_manifest': True,
629 'format': '141/bestaudio[ext=m4a]',
630 },
631 },
aa79ac0c
PH
632 # Controversy video
633 {
634 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
635 'info_dict': {
636 'id': 'T4XJQO3qol8',
637 'ext': 'mp4',
556dbe7f 638 'duration': 219,
aa79ac0c 639 'upload_date': '20100909',
4fe54c12 640 'uploader': 'Amazing Atheist',
aa79ac0c 641 'uploader_id': 'TheAmazingAtheist',
ec85ded8 642 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 643 'title': 'Burning Everyone\'s Koran',
545cc85d 644 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 645 }
c522adb1 646 },
dd2d55f1 647 # Normal age-gate video (embed allowed)
c522adb1 648 {
2d3d2997 649 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
650 'info_dict': {
651 'id': 'HtVdAasjOgU',
652 'ext': 'mp4',
653 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 654 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 655 'duration': 142,
c522adb1
JMF
656 'uploader': 'The Witcher',
657 'uploader_id': 'WitcherGame',
ec85ded8 658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 659 'upload_date': '20140605',
34952f09 660 'age_limit': 18,
c522adb1
JMF
661 },
662 },
8bdd16b4 663 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
664 # YouTube Red ad is not captured for creator
665 {
666 'url': '__2ABJjxzNo',
667 'info_dict': {
668 'id': '__2ABJjxzNo',
669 'ext': 'mp4',
670 'duration': 266,
671 'upload_date': '20100430',
672 'uploader_id': 'deadmau5',
673 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 674 'creator': 'deadmau5',
675 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 676 'uploader': 'deadmau5',
677 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 678 'alt_title': 'Some Chords',
8bdd16b4 679 },
680 'expected_warnings': [
681 'DASH manifest missing',
682 ]
683 },
067aa17e 684 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
685 {
686 'url': 'lqQg6PlCWgI',
687 'info_dict': {
688 'id': 'lqQg6PlCWgI',
689 'ext': 'mp4',
556dbe7f 690 'duration': 6085,
90227264 691 'upload_date': '20150827',
cbe2bd91 692 'uploader_id': 'olympic',
ec85ded8 693 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 694 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 695 'uploader': 'Olympic',
cbe2bd91
PH
696 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
697 },
698 'params': {
699 'skip_download': 'requires avconv',
e52a40ab 700 }
cbe2bd91 701 },
6271f1ca
PH
702 # Non-square pixels
703 {
704 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
705 'info_dict': {
706 'id': '_b-2C3KPAM0',
707 'ext': 'mp4',
708 'stretched_ratio': 16 / 9.,
556dbe7f 709 'duration': 85,
6271f1ca
PH
710 'upload_date': '20110310',
711 'uploader_id': 'AllenMeow',
ec85ded8 712 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 713 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 714 'uploader': '孫ᄋᄅ',
6271f1ca
PH
715 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
716 },
06b491eb
S
717 },
718 # url_encoded_fmt_stream_map is empty string
719 {
720 'url': 'qEJwOuvDf7I',
721 'info_dict': {
722 'id': 'qEJwOuvDf7I',
f57b7835 723 'ext': 'webm',
06b491eb
S
724 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
725 'description': '',
726 'upload_date': '20150404',
727 'uploader_id': 'spbelect',
728 'uploader': 'Наблюдатели Петербурга',
729 },
730 'params': {
731 'skip_download': 'requires avconv',
e323cf3f
S
732 },
733 'skip': 'This live event has ended.',
06b491eb 734 },
067aa17e 735 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
736 {
737 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
738 'info_dict': {
739 'id': 'FIl7x6_3R5Y',
eb6793ba 740 'ext': 'webm',
da77d856
S
741 'title': 'md5:7b81415841e02ecd4313668cde88737a',
742 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 743 'duration': 220,
da77d856
S
744 'upload_date': '20150625',
745 'uploader_id': 'dorappi2000',
ec85ded8 746 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 747 'uploader': 'dorappi2000',
eb6793ba 748 'formats': 'mincount:31',
da77d856 749 },
eb6793ba 750 'skip': 'not actual anymore',
2ee8f5d8 751 },
8a1a26ce
YCH
752 # DASH manifest with segment_list
753 {
754 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
755 'md5': '8ce563a1d667b599d21064e982ab9e31',
756 'info_dict': {
757 'id': 'CsmdDsKjzN8',
758 'ext': 'mp4',
17ee98e1 759 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
760 'uploader': 'Airtek',
761 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
762 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
763 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
764 },
765 'params': {
766 'youtube_include_dash_manifest': True,
767 'format': '135', # bestvideo
be49068d
S
768 },
769 'skip': 'This live event has ended.',
2ee8f5d8 770 },
cf7e015f
S
771 {
772 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 773 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 774 'info_dict': {
545cc85d 775 'id': 'jvGDaLqkpTg',
776 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
777 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
778 },
779 'playlist': [{
780 'info_dict': {
545cc85d 781 'id': 'jvGDaLqkpTg',
cf7e015f 782 'ext': 'mp4',
545cc85d 783 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
784 'description': 'md5:e03b909557865076822aa169218d6a5d',
785 'duration': 10643,
786 'upload_date': '20161111',
787 'uploader': 'Team PGP',
788 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
790 },
791 }, {
792 'info_dict': {
545cc85d 793 'id': '3AKt1R1aDnw',
cf7e015f 794 'ext': 'mp4',
545cc85d 795 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
796 'description': 'md5:e03b909557865076822aa169218d6a5d',
797 'duration': 10991,
798 'upload_date': '20161111',
799 'uploader': 'Team PGP',
800 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
801 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
802 },
803 }, {
804 'info_dict': {
545cc85d 805 'id': 'RtAMM00gpVc',
cf7e015f 806 'ext': 'mp4',
545cc85d 807 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
808 'description': 'md5:e03b909557865076822aa169218d6a5d',
809 'duration': 10995,
810 'upload_date': '20161111',
811 'uploader': 'Team PGP',
812 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
545cc85d 817 'id': '6N2fdlP3C5U',
cf7e015f 818 'ext': 'mp4',
545cc85d 819 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
820 'description': 'md5:e03b909557865076822aa169218d6a5d',
821 'duration': 10990,
822 'upload_date': '20161111',
823 'uploader': 'Team PGP',
824 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
826 },
827 }],
828 'params': {
829 'skip_download': True,
830 },
cbaed4bb 831 },
f9f49d87 832 {
067aa17e 833 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
834 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
835 'info_dict': {
836 'id': 'gVfLd0zydlo',
837 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
838 },
839 'playlist_count': 2,
be49068d 840 'skip': 'Not multifeed anymore',
f9f49d87 841 },
cbaed4bb 842 {
2d3d2997 843 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 844 'only_matching': True,
0e49d9a6 845 },
6d4fc66b 846 {
2d3d2997 847 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
848 'only_matching': True,
849 },
0e49d9a6 850 {
067aa17e 851 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 852 # Also tests cut-off URL expansion in video description (see
067aa17e
S
853 # https://github.com/ytdl-org/youtube-dl/issues/1892,
854 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
855 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
856 'info_dict': {
857 'id': 'lsguqyKfVQg',
858 'ext': 'mp4',
859 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 860 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 861 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 862 'duration': 133,
0e49d9a6
LL
863 'upload_date': '20151119',
864 'uploader_id': 'IronSoulElf',
ec85ded8 865 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 866 'uploader': 'IronSoulElf',
eb6793ba
S
867 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
868 'track': 'Dark Walk - Position Music',
869 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 870 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
871 },
872 'params': {
873 'skip_download': True,
874 },
875 },
61f92af1 876 {
067aa17e 877 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
878 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
879 'only_matching': True,
880 },
313dfc45
LL
881 {
882 # Video with yt:stretch=17:0
883 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
884 'info_dict': {
885 'id': 'Q39EVAstoRM',
886 'ext': 'mp4',
887 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
888 'description': 'md5:ee18a25c350637c8faff806845bddee9',
889 'upload_date': '20151107',
890 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
891 'uploader': 'CH GAMER DROID',
892 },
893 'params': {
894 'skip_download': True,
895 },
be49068d 896 'skip': 'This video does not exist.',
313dfc45 897 },
7caf9830
S
898 {
899 # Video licensed under Creative Commons
900 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
901 'info_dict': {
902 'id': 'M4gD1WSo5mA',
903 'ext': 'mp4',
904 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
905 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 906 'duration': 721,
7caf9830
S
907 'upload_date': '20150127',
908 'uploader_id': 'BerkmanCenter',
ec85ded8 909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 910 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
911 'license': 'Creative Commons Attribution license (reuse allowed)',
912 },
913 'params': {
914 'skip_download': True,
915 },
916 },
fd050249
S
917 {
918 # Channel-like uploader_url
919 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
920 'info_dict': {
921 'id': 'eQcmzGIKrzg',
922 'ext': 'mp4',
923 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 924 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 925 'duration': 4060,
fd050249 926 'upload_date': '20151119',
eb6793ba 927 'uploader': 'Bernie Sanders',
fd050249 928 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 929 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
930 'license': 'Creative Commons Attribution license (reuse allowed)',
931 },
932 'params': {
933 'skip_download': True,
934 },
935 },
040ac686
S
936 {
937 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
938 'only_matching': True,
7f29cf54
S
939 },
940 {
067aa17e 941 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
942 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
943 'only_matching': True,
6496ccb4
S
944 },
945 {
946 # Rental video preview
947 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
948 'info_dict': {
949 'id': 'uGpuVWrhIzE',
950 'ext': 'mp4',
951 'title': 'Piku - Trailer',
952 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
953 'upload_date': '20150811',
954 'uploader': 'FlixMatrix',
955 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 956 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
957 'license': 'Standard YouTube License',
958 },
959 'params': {
960 'skip_download': True,
961 },
eb6793ba 962 'skip': 'This video is not available.',
022a5d66 963 },
12afdc2a
S
964 {
965 # YouTube Red video with episode data
966 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
967 'info_dict': {
968 'id': 'iqKdEhx-dD4',
969 'ext': 'mp4',
970 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 971 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 972 'duration': 2085,
12afdc2a
S
973 'upload_date': '20170118',
974 'uploader': 'Vsauce',
975 'uploader_id': 'Vsauce',
976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
977 'series': 'Mind Field',
978 'season_number': 1,
979 'episode_number': 1,
980 },
981 'params': {
982 'skip_download': True,
983 },
984 'expected_warnings': [
985 'Skipping DASH manifest',
986 ],
987 },
c7121fa7
S
988 {
989 # The following content has been identified by the YouTube community
990 # as inappropriate or offensive to some audiences.
991 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
992 'info_dict': {
993 'id': '6SJNVb0GnPI',
994 'ext': 'mp4',
995 'title': 'Race Differences in Intelligence',
996 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
997 'duration': 965,
998 'upload_date': '20140124',
999 'uploader': 'New Century Foundation',
1000 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1001 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1002 },
1003 'params': {
1004 'skip_download': True,
1005 },
545cc85d 1006 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1007 },
022a5d66
S
1008 {
1009 # itag 212
1010 'url': '1t24XAntNCY',
1011 'only_matching': True,
fd5c4aab
S
1012 },
1013 {
1014 # geo restricted to JP
1015 'url': 'sJL6WA-aGkQ',
1016 'only_matching': True,
1017 },
cd5a74a2
S
1018 {
1019 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1020 'only_matching': True,
1021 },
825cd268
RA
1022 {
1023 # DRM protected
1024 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1025 'only_matching': True,
4fe54c12
S
1026 },
1027 {
1028 # Video with unsupported adaptive stream type formats
1029 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1030 'info_dict': {
1031 'id': 'Z4Vy8R84T1U',
1032 'ext': 'mp4',
1033 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1034 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1035 'duration': 433,
1036 'upload_date': '20130923',
1037 'uploader': 'Amelia Putri Harwita',
1038 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1040 'formats': 'maxcount:10',
1041 },
1042 'params': {
1043 'skip_download': True,
1044 'youtube_include_dash_manifest': False,
1045 },
5429d6a9 1046 'skip': 'not actual anymore',
5caabd3c 1047 },
1048 {
822b9d9c 1049 # Youtube Music Auto-generated description
5caabd3c 1050 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1051 'info_dict': {
1052 'id': 'MgNrAu2pzNs',
1053 'ext': 'mp4',
1054 'title': 'Voyeur Girl',
1055 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1056 'upload_date': '20190312',
5429d6a9
S
1057 'uploader': 'Stephen - Topic',
1058 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1059 'artist': 'Stephen',
1060 'track': 'Voyeur Girl',
1061 'album': 'it\'s too much love to know my dear',
1062 'release_date': '20190313',
1063 'release_year': 2019,
1064 },
1065 'params': {
1066 'skip_download': True,
1067 },
1068 },
66b48727
RA
1069 {
1070 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1071 'only_matching': True,
1072 },
011e75e6
S
1073 {
1074 # invalid -> valid video id redirection
1075 'url': 'DJztXj2GPfl',
1076 'info_dict': {
1077 'id': 'DJztXj2GPfk',
1078 'ext': 'mp4',
1079 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1080 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1081 'upload_date': '20090125',
1082 'uploader': 'Prochorowka',
1083 'uploader_id': 'Prochorowka',
1084 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1085 'artist': 'Panjabi MC',
1086 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1087 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
545cc85d 1092 'skip': 'Video unavailable',
ea74e00b
DP
1093 },
1094 {
1095 # empty description results in an empty string
1096 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1097 'info_dict': {
1098 'id': 'x41yOUIvK2k',
1099 'ext': 'mp4',
1100 'title': 'IMG 3456',
1101 'description': '',
1102 'upload_date': '20170613',
1103 'uploader_id': 'ElevageOrVert',
1104 'uploader': 'ElevageOrVert',
1105 },
1106 'params': {
1107 'skip_download': True,
1108 },
1109 },
a0566bbf 1110 {
29f7c58a 1111 # with '};' inside yt initial data (see [1])
1112 # see [2] for an example with '};' inside ytInitialPlayerResponse
1113 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1114 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1115 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1116 'info_dict': {
1117 'id': 'CHqg6qOn4no',
1118 'ext': 'mp4',
1119 'title': 'Part 77 Sort a list of simple types in c#',
1120 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1121 'upload_date': '20130831',
1122 'uploader_id': 'kudvenkat',
1123 'uploader': 'kudvenkat',
1124 },
1125 'params': {
1126 'skip_download': True,
1127 },
1128 },
29f7c58a 1129 {
1130 # another example of '};' in ytInitialData
1131 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1132 'only_matching': True,
1133 },
1134 {
1135 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1136 'only_matching': True,
1137 },
545cc85d 1138 {
cc2db878 1139 # https://github.com/ytdl-org/youtube-dl/pull/28094
1140 'url': 'OtqTfy26tG0',
1141 'info_dict': {
1142 'id': 'OtqTfy26tG0',
1143 'ext': 'mp4',
1144 'title': 'Burn Out',
1145 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1146 'upload_date': '20141120',
1147 'uploader': 'The Cinematic Orchestra - Topic',
1148 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1149 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1150 'artist': 'The Cinematic Orchestra',
1151 'track': 'Burn Out',
1152 'album': 'Every Day',
1153 'release_data': None,
1154 'release_year': None,
1155 },
1156 'params': {
1157 'skip_download': True,
1158 },
545cc85d 1159 },
2eb88d95
PH
1160 ]
1161
e0df6211
PH
1162 def __init__(self, *args, **kwargs):
1163 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1164 self._code_cache = {}
83799698 1165 self._player_cache = {}
e0df6211 1166
60064c53
PH
1167 def _signature_cache_id(self, example_sig):
1168 """ Return a string representation of a signature """
78caa52a 1169 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1170
e40c758c
S
1171 @classmethod
1172 def _extract_player_info(cls, player_url):
1173 for player_re in cls._PLAYER_INFO_RE:
1174 id_m = re.search(player_re, player_url)
1175 if id_m:
1176 break
1177 else:
c081b35c 1178 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1179 return id_m.group('id')
e40c758c
S
1180
1181 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1182 player_id = self._extract_player_info(player_url)
e0df6211 1183
c4417ddb 1184 # Read from filesystem cache
545cc85d 1185 func_id = 'js_%s_%s' % (
1186 player_id, self._signature_cache_id(example_sig))
c4417ddb 1187 assert os.path.basename(func_id) == func_id
a0e07d31 1188
69ea8ca4 1189 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1190 if cache_spec is not None:
78caa52a 1191 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1192
545cc85d 1193 if player_id not in self._code_cache:
1194 self._code_cache[player_id] = self._download_webpage(
e0df6211 1195 player_url, video_id,
545cc85d 1196 note='Downloading player ' + player_id,
69ea8ca4 1197 errnote='Download of %s failed' % player_url)
545cc85d 1198 code = self._code_cache[player_id]
1199 res = self._parse_sig_js(code)
e0df6211 1200
785521bf
PH
1201 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1202 cache_res = res(test_string)
1203 cache_spec = [ord(c) for c in cache_res]
83799698 1204
69ea8ca4 1205 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1206 return res
1207
60064c53 1208 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1209 def gen_sig_code(idxs):
1210 def _genslice(start, end, step):
78caa52a 1211 starts = '' if start == 0 else str(start)
8bcc8756 1212 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1213 steps = '' if step == 1 else (':%d' % step)
78caa52a 1214 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1215
1216 step = None
7af808a5
PH
1217 # Quelch pyflakes warnings - start will be set when step is set
1218 start = '(Never used)'
edf3e38e
PH
1219 for i, prev in zip(idxs[1:], idxs[:-1]):
1220 if step is not None:
1221 if i - prev == step:
1222 continue
1223 yield _genslice(start, prev, step)
1224 step = None
1225 continue
1226 if i - prev in [-1, 1]:
1227 step = i - prev
1228 start = prev
1229 continue
1230 else:
78caa52a 1231 yield 's[%d]' % prev
edf3e38e 1232 if step is None:
78caa52a 1233 yield 's[%d]' % i
edf3e38e
PH
1234 else:
1235 yield _genslice(start, i, step)
1236
78caa52a 1237 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1238 cache_res = func(test_string)
edf3e38e 1239 cache_spec = [ord(c) for c in cache_res]
78caa52a 1240 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1241 signature_id_tuple = '(%s)' % (
1242 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1243 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1244 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1245 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1246
e0df6211
PH
1247 def _parse_sig_js(self, jscode):
1248 funcname = self._search_regex(
abefc03f
S
1249 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1250 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1251 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1252 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1253 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1254 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1255 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1256 # Obsolete patterns
1257 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1258 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1259 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1260 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1261 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1263 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1264 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1265 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1266
1267 jsi = JSInterpreter(jscode)
1268 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1269 return lambda s: initial_function([s])
1270
545cc85d 1271 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1272 """Turn the encrypted s field into a working signature"""
6b37f0be 1273
c8bf86d5 1274 if player_url is None:
69ea8ca4 1275 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1276
69ea8ca4 1277 if player_url.startswith('//'):
78caa52a 1278 player_url = 'https:' + player_url
3c90cc8b
S
1279 elif not re.match(r'https?://', player_url):
1280 player_url = compat_urlparse.urljoin(
1281 'https://www.youtube.com', player_url)
c8bf86d5 1282 try:
62af3a0e 1283 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1284 if player_id not in self._player_cache:
1285 func = self._extract_signature_function(
60064c53 1286 video_id, player_url, s
c8bf86d5
PH
1287 )
1288 self._player_cache[player_id] = func
1289 func = self._player_cache[player_id]
1290 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1291 self._print_sig_code(func, s)
c8bf86d5
PH
1292 return func(s)
1293 except Exception as e:
1294 tb = traceback.format_exc()
1295 raise ExtractorError(
78caa52a 1296 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1297
545cc85d 1298 def _mark_watched(self, video_id, player_response):
21c340b8
S
1299 playback_url = url_or_none(try_get(
1300 player_response,
545cc85d 1301 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1302 if not playback_url:
1303 return
1304 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1305 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1306
1307 # cpn generation algorithm is reverse engineered from base.js.
1308 # In fact it works even with dummy cpn.
1309 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1310 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1311
1312 qs.update({
1313 'ver': ['2'],
1314 'cpn': [cpn],
1315 })
1316 playback_url = compat_urlparse.urlunparse(
15707c7e 1317 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1318
1319 self._download_webpage(
1320 playback_url, video_id, 'Marking watched',
1321 'Unable to mark watched', fatal=False)
1322
66c9fa36
S
1323 @staticmethod
1324 def _extract_urls(webpage):
1325 # Embedded YouTube player
1326 entries = [
1327 unescapeHTML(mobj.group('url'))
1328 for mobj in re.finditer(r'''(?x)
1329 (?:
1330 <iframe[^>]+?src=|
1331 data-video-url=|
1332 <embed[^>]+?src=|
1333 embedSWF\(?:\s*|
1334 <object[^>]+data=|
1335 new\s+SWFObject\(
1336 )
1337 (["\'])
1338 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1339 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1340 \1''', webpage)]
1341
1342 # lazyYT YouTube embed
1343 entries.extend(list(map(
1344 unescapeHTML,
1345 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1346
1347 # Wordpress "YouTube Video Importer" plugin
1348 matches = re.findall(r'''(?x)<div[^>]+
1349 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1350 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1351 entries.extend(m[-1] for m in matches)
1352
1353 return entries
1354
1355 @staticmethod
1356 def _extract_url(webpage):
1357 urls = YoutubeIE._extract_urls(webpage)
1358 return urls[0] if urls else None
1359
97665381
PH
1360 @classmethod
1361 def extract_id(cls, url):
1362 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1363 if mobj is None:
69ea8ca4 1364 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1365 video_id = mobj.group(2)
1366 return video_id
1367
545cc85d 1368 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1369 chapters_list = try_get(
8bdd16b4 1370 data,
84213ea8
S
1371 lambda x: x['playerOverlays']
1372 ['playerOverlayRenderer']
1373 ['decoratedPlayerBarRenderer']
1374 ['decoratedPlayerBarRenderer']
1375 ['playerBar']
1376 ['chapteredPlayerBarRenderer']
1377 ['chapters'],
1378 list)
1379 if not chapters_list:
1380 return
1381
1382 def chapter_time(chapter):
1383 return float_or_none(
1384 try_get(
1385 chapter,
1386 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1387 int),
1388 scale=1000)
1389 chapters = []
1390 for next_num, chapter in enumerate(chapters_list, start=1):
1391 start_time = chapter_time(chapter)
1392 if start_time is None:
1393 continue
1394 end_time = (chapter_time(chapters_list[next_num])
1395 if next_num < len(chapters_list) else duration)
1396 if end_time is None:
1397 continue
1398 title = try_get(
1399 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1400 compat_str)
1401 chapters.append({
1402 'start_time': start_time,
1403 'end_time': end_time,
1404 'title': title,
1405 })
1406 return chapters
1407
545cc85d 1408 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1409 return self._parse_json(self._search_regex(
1410 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1411 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1412
c5e8d7af 1413 def _real_extract(self, url):
cf7e015f 1414 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1415 video_id = self._match_id(url)
1416 base_url = self.http_scheme() + '//www.youtube.com/'
1417 webpage_url = base_url + 'watch?v=' + video_id
1418 webpage = self._download_webpage(webpage_url, video_id, fatal=False)
1419
1420 player_response = None
1421 if webpage:
1422 player_response = self._extract_yt_initial_variable(
1423 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1424 video_id, 'initial player response')
1425 if not player_response:
1426 player_response = self._call_api(
1427 'player', {'videoId': video_id}, video_id)
1428
1429 playability_status = player_response.get('playabilityStatus') or {}
1430 if playability_status.get('reason') == 'Sign in to confirm your age':
1431 pr = self._parse_json(try_get(compat_parse_qs(
1432 self._download_webpage(
1433 base_url + 'get_video_info', video_id,
1434 'Refetching age-gated info webpage',
1435 'unable to download video info webpage', query={
1436 'video_id': video_id,
1437 'eurl': 'https://www.youtube.com/embed/' + video_id,
1438 }, fatal=False)),
1439 lambda x: x['player_response'][0],
1440 compat_str) or '{}', video_id)
1441 if pr:
1442 player_response = pr
1443
1444 trailer_video_id = try_get(
1445 playability_status,
1446 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1447 compat_str)
1448 if trailer_video_id:
1449 return self.url_result(
1450 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1451
545cc85d 1452 def get_text(x):
1453 if not x:
c2d125d9 1454 return
545cc85d 1455 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1456
545cc85d 1457 search_meta = (
1458 lambda x: self._html_search_meta(x, webpage, default=None)) \
1459 if webpage else lambda x: None
dbdaaa23 1460
545cc85d 1461 video_details = player_response.get('videoDetails') or {}
37357d21 1462 microformat = try_get(
545cc85d 1463 player_response,
1464 lambda x: x['microformat']['playerMicroformatRenderer'],
1465 dict) or {}
1466 video_title = video_details.get('title') \
1467 or get_text(microformat.get('title')) \
1468 or search_meta(['og:title', 'twitter:title', 'title'])
1469 video_description = video_details.get('shortDescription')
cf7e015f 1470
8fe10494 1471 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1472 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1473 multifeed_metadata_list = try_get(
1474 player_response,
1475 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1476 compat_str)
8fe10494
S
1477 if multifeed_metadata_list:
1478 entries = []
1479 feed_ids = []
1480 for feed in multifeed_metadata_list.split(','):
1481 # Unquote should take place before split on comma (,) since textual
1482 # fields may contain comma as well (see
067aa17e 1483 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1484 feed_data = compat_parse_qs(
1485 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1486
1487 def feed_entry(name):
545cc85d 1488 return try_get(
1489 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1490
1491 feed_id = feed_entry('id')
1492 if not feed_id:
1493 continue
1494 feed_title = feed_entry('title')
1495 title = video_title
1496 if feed_title:
1497 title += ' (%s)' % feed_title
8fe10494
S
1498 entries.append({
1499 '_type': 'url_transparent',
1500 'ie_key': 'Youtube',
1501 'url': smuggle_url(
545cc85d 1502 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1503 {'force_singlefeed': True}),
6b09401b 1504 'title': title,
8fe10494 1505 })
6b09401b 1506 feed_ids.append(feed_id)
8fe10494
S
1507 self.to_screen(
1508 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1509 % (', '.join(feed_ids), video_id))
545cc85d 1510 return self.playlist_result(
1511 entries, video_id, video_title, video_description)
8fe10494
S
1512 else:
1513 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1514
545cc85d 1515 formats = []
1516 itags = []
cc2db878 1517 itag_qualities = {}
545cc85d 1518 player_url = None
cc2db878 1519 # TODO: Enable this after fixing formatSort
8a784c74 1520 # q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1521 streaming_data = player_response.get('streamingData') or {}
1522 streaming_formats = streaming_data.get('formats') or []
1523 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1524 for fmt in streaming_formats:
1525 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1526 continue
321bf820 1527
cc2db878 1528 itag = str_or_none(fmt.get('itag'))
1529 quality = fmt.get('quality')
1530 if itag and quality:
1531 itag_qualities[itag] = quality
1532 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1533 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1534 # number of fragment that would subsequently requested with (`&sq=N`)
1535 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1536 continue
1537
545cc85d 1538 fmt_url = fmt.get('url')
1539 if not fmt_url:
1540 sc = compat_parse_qs(fmt.get('signatureCipher'))
1541 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1542 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1543 if not (sc and fmt_url and encrypted_sig):
1544 continue
1545 if not player_url:
1546 if not webpage:
1547 continue
1548 player_url = self._search_regex(
1549 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1550 webpage, 'player URL', fatal=False)
1551 if not player_url:
201e9eaa 1552 continue
545cc85d 1553 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1554 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1555 fmt_url += '&' + sp + '=' + signature
1556
545cc85d 1557 if itag:
1558 itags.append(itag)
cc2db878 1559 tbr = float_or_none(
1560 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1561 dct = {
1562 'asr': int_or_none(fmt.get('audioSampleRate')),
1563 'filesize': int_or_none(fmt.get('contentLength')),
1564 'format_id': itag,
1565 'format_note': fmt.get('qualityLabel') or quality,
1566 'fps': int_or_none(fmt.get('fps')),
1567 'height': int_or_none(fmt.get('height')),
cc2db878 1568 # 'quality': q(quality), # TODO: Enable this after fixing formatSort
1569 'tbr': tbr,
545cc85d 1570 'url': fmt_url,
1571 'width': fmt.get('width'),
1572 }
1573 mimetype = fmt.get('mimeType')
1574 if mimetype:
1575 mobj = re.match(
1576 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1577 if mobj:
1578 dct['ext'] = mimetype2ext(mobj.group(1))
1579 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1580 no_audio = dct.get('acodec') == 'none'
1581 no_video = dct.get('vcodec') == 'none'
1582 if no_audio:
1583 dct['vbr'] = tbr
1584 if no_video:
1585 dct['abr'] = tbr
1586 if no_audio or no_video:
545cc85d 1587 dct['downloader_options'] = {
1588 # Youtube throttles chunks >~10M
1589 'http_chunk_size': 10485760,
bf1317d2 1590 }
545cc85d 1591 formats.append(dct)
1592
1593 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1594 if hls_manifest_url:
1595 for f in self._extract_m3u8_formats(
1596 hls_manifest_url, video_id, 'mp4', fatal=False):
1597 itag = self._search_regex(
1598 r'/itag/(\d+)', f['url'], 'itag', default=None)
1599 if itag:
1600 f['format_id'] = itag
1601 formats.append(f)
1602
1603 if self._downloader.params.get('youtube_include_dash_manifest'):
1604 dash_manifest_url = streaming_data.get('dashManifestUrl')
1605 if dash_manifest_url:
545cc85d 1606 for f in self._extract_mpd_formats(
1607 dash_manifest_url, video_id, fatal=False):
cc2db878 1608 itag = f['format_id']
1609 if itag in itags:
1610 continue
1611 # if itag in itag_qualities: # TODO: Enable this after fixing formatSort
1612 # f['quality'] = q(itag_qualities[itag])
545cc85d 1613 filesize = int_or_none(self._search_regex(
1614 r'/clen/(\d+)', f.get('fragment_base_url')
1615 or f['url'], 'file size', default=None))
1616 if filesize:
1617 f['filesize'] = filesize
cc2db878 1618 formats.append(f)
bf1317d2 1619
545cc85d 1620 if not formats:
1621 if streaming_data.get('licenseInfos'):
1622 raise ExtractorError(
1623 'This video is DRM protected.', expected=True)
1624 pemr = try_get(
1625 playability_status,
1626 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1627 dict) or {}
1628 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1629 subreason = pemr.get('subreason')
1630 if subreason:
1631 subreason = clean_html(get_text(subreason))
1632 if subreason == 'The uploader has not made this video available in your country.':
1633 countries = microformat.get('availableCountries')
1634 if not countries:
1635 regions_allowed = search_meta('regionsAllowed')
1636 countries = regions_allowed.split(',') if regions_allowed else None
1637 self.raise_geo_restricted(
1638 subreason, countries)
1639 reason += '\n' + subreason
1640 if reason:
1641 raise ExtractorError(reason, expected=True)
bf1317d2 1642
545cc85d 1643 self._sort_formats(formats)
bf1317d2 1644
545cc85d 1645 keywords = video_details.get('keywords') or []
1646 if not keywords and webpage:
1647 keywords = [
1648 unescapeHTML(m.group('content'))
1649 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1650 for keyword in keywords:
1651 if keyword.startswith('yt:stretch='):
1652 w, h = keyword.split('=')[1].split(':')
1653 w, h = int(w), int(h)
1654 if w > 0 and h > 0:
1655 ratio = w / h
1656 for f in formats:
1657 if f.get('vcodec') != 'none':
1658 f['stretched_ratio'] = ratio
6449cd80 1659
545cc85d 1660 thumbnails = []
1661 for container in (video_details, microformat):
1662 for thumbnail in (try_get(
1663 container,
1664 lambda x: x['thumbnail']['thumbnails'], list) or []):
1665 thumbnail_url = thumbnail.get('url')
1666 if not thumbnail_url:
bf1317d2 1667 continue
545cc85d 1668 thumbnails.append({
1669 'height': int_or_none(thumbnail.get('height')),
1670 'url': thumbnail_url,
1671 'width': int_or_none(thumbnail.get('width')),
1672 })
1673 if thumbnails:
1674 break
a6211d23 1675 else:
545cc85d 1676 thumbnail = search_meta(['og:image', 'twitter:image'])
1677 if thumbnail:
1678 thumbnails = [{'url': thumbnail}]
1679
1680 category = microformat.get('category') or search_meta('genre')
1681 channel_id = video_details.get('channelId') \
1682 or microformat.get('externalChannelId') \
1683 or search_meta('channelId')
1684 duration = int_or_none(
1685 video_details.get('lengthSeconds')
1686 or microformat.get('lengthSeconds')) \
1687 or parse_duration(search_meta('duration'))
1688 is_live = video_details.get('isLive')
1689 owner_profile_url = microformat.get('ownerProfileUrl')
1690
1691 info = {
1692 'id': video_id,
1693 'title': self._live_title(video_title) if is_live else video_title,
1694 'formats': formats,
1695 'thumbnails': thumbnails,
1696 'description': video_description,
1697 'upload_date': unified_strdate(
1698 microformat.get('uploadDate')
1699 or search_meta('uploadDate')),
1700 'uploader': video_details['author'],
1701 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1702 'uploader_url': owner_profile_url,
1703 'channel_id': channel_id,
1704 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1705 'duration': duration,
1706 'view_count': int_or_none(
1707 video_details.get('viewCount')
1708 or microformat.get('viewCount')
1709 or search_meta('interactionCount')),
1710 'average_rating': float_or_none(video_details.get('averageRating')),
1711 'age_limit': 18 if (
1712 microformat.get('isFamilySafe') is False
1713 or search_meta('isFamilyFriendly') == 'false'
1714 or search_meta('og:restrictions:age') == '18+') else 0,
1715 'webpage_url': webpage_url,
1716 'categories': [category] if category else None,
1717 'tags': keywords,
1718 'is_live': is_live,
1719 'playable_in_embed': playability_status.get('playableInEmbed'),
1720 }
b477fc13 1721
545cc85d 1722 pctr = try_get(
1723 player_response,
1724 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1725 subtitles = {}
1726 if pctr:
1727 def process_language(container, base_url, lang_code, query):
1728 lang_subs = []
1729 for fmt in self._SUBTITLE_FORMATS:
1730 query.update({
1731 'fmt': fmt,
1732 })
1733 lang_subs.append({
1734 'ext': fmt,
1735 'url': update_url_query(base_url, query),
1736 })
1737 container[lang_code] = lang_subs
7e72694b 1738
545cc85d 1739 for caption_track in (pctr.get('captionTracks') or []):
1740 base_url = caption_track.get('baseUrl')
1741 if not base_url:
1742 continue
1743 if caption_track.get('kind') != 'asr':
1744 lang_code = caption_track.get('languageCode')
1745 if not lang_code:
1746 continue
1747 process_language(
1748 subtitles, base_url, lang_code, {})
1749 continue
1750 automatic_captions = {}
1751 for translation_language in (pctr.get('translationLanguages') or []):
1752 translation_language_code = translation_language.get('languageCode')
1753 if not translation_language_code:
1754 continue
1755 process_language(
1756 automatic_captions, base_url, translation_language_code,
1757 {'tlang': translation_language_code})
1758 info['automatic_captions'] = automatic_captions
1759 info['subtitles'] = subtitles
7e72694b 1760
545cc85d 1761 parsed_url = compat_urllib_parse_urlparse(url)
1762 for component in [parsed_url.fragment, parsed_url.query]:
1763 query = compat_parse_qs(component)
1764 for k, v in query.items():
1765 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1766 d_k += '_time'
1767 if d_k not in info and k in s_ks:
1768 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1769
1770 # Youtube Music Auto-generated description
822b9d9c 1771 if video_description:
38d70284 1772 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1773 if mobj:
822b9d9c
RA
1774 release_year = mobj.group('release_year')
1775 release_date = mobj.group('release_date')
1776 if release_date:
1777 release_date = release_date.replace('-', '')
1778 if not release_year:
545cc85d 1779 release_year = release_date[:4]
1780 info.update({
1781 'album': mobj.group('album'.strip()),
1782 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1783 'track': mobj.group('track').strip(),
1784 'release_date': release_date,
cc2db878 1785 'release_year': int_or_none(release_year),
545cc85d 1786 })
7e72694b 1787
545cc85d 1788 initial_data = None
1789 if webpage:
1790 initial_data = self._extract_yt_initial_variable(
1791 webpage, self._YT_INITIAL_DATA_RE, video_id,
1792 'yt initial data')
1793 if not initial_data:
1794 initial_data = self._call_api(
1795 'next', {'videoId': video_id}, video_id, fatal=False)
1796
1797 if not is_live:
1798 try:
1799 # This will error if there is no livechat
1800 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1801 info['subtitles']['live_chat'] = [{
1802 'video_id': video_id,
1803 'ext': 'json',
1804 'protocol': 'youtube_live_chat_replay',
1805 }]
1806 except (KeyError, IndexError, TypeError):
1807 pass
1808
1809 if initial_data:
1810 chapters = self._extract_chapters_from_json(
1811 initial_data, video_id, duration)
1812 if not chapters:
1813 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1814 contents = try_get(
1815 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1816 list)
1817 if not contents:
1818 continue
1819
1820 def chapter_time(mmlir):
1821 return parse_duration(
1822 get_text(mmlir.get('timeDescription')))
1823
1824 chapters = []
1825 for next_num, content in enumerate(contents, start=1):
1826 mmlir = content.get('macroMarkersListItemRenderer') or {}
1827 start_time = chapter_time(mmlir)
1828 end_time = chapter_time(try_get(
1829 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1830 if next_num < len(contents) else duration
1831 if start_time is None or end_time is None:
1832 continue
1833 chapters.append({
1834 'start_time': start_time,
1835 'end_time': end_time,
1836 'title': get_text(mmlir.get('title')),
1837 })
1838 if chapters:
1839 break
1840 if chapters:
1841 info['chapters'] = chapters
1842
1843 contents = try_get(
1844 initial_data,
1845 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1846 list) or []
1847 for content in contents:
1848 vpir = content.get('videoPrimaryInfoRenderer')
1849 if vpir:
1850 stl = vpir.get('superTitleLink')
1851 if stl:
1852 stl = get_text(stl)
1853 if try_get(
1854 vpir,
1855 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1856 info['location'] = stl
1857 else:
1858 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1859 if mobj:
1860 info.update({
1861 'series': mobj.group(1),
1862 'season_number': int(mobj.group(2)),
1863 'episode_number': int(mobj.group(3)),
1864 })
1865 for tlb in (try_get(
1866 vpir,
1867 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1868 list) or []):
1869 tbr = tlb.get('toggleButtonRenderer') or {}
1870 for getter, regex in [(
1871 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1872 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1873 lambda x: x['accessibility'],
1874 lambda x: x['accessibilityData']['accessibilityData'],
1875 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1876 label = (try_get(tbr, getter, dict) or {}).get('label')
1877 if label:
1878 mobj = re.match(regex, label)
1879 if mobj:
1880 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1881 break
1882 sbr_tooltip = try_get(
1883 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1884 if sbr_tooltip:
1885 like_count, dislike_count = sbr_tooltip.split(' / ')
1886 info.update({
1887 'like_count': str_to_int(like_count),
1888 'dislike_count': str_to_int(dislike_count),
1889 })
1890 vsir = content.get('videoSecondaryInfoRenderer')
1891 if vsir:
1892 info['channel'] = get_text(try_get(
1893 vsir,
1894 lambda x: x['owner']['videoOwnerRenderer']['title'],
1895 compat_str))
1896 rows = try_get(
1897 vsir,
1898 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1899 list) or []
1900 multiple_songs = False
1901 for row in rows:
1902 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1903 multiple_songs = True
1904 break
1905 for row in rows:
1906 mrr = row.get('metadataRowRenderer') or {}
1907 mrr_title = mrr.get('title')
1908 if not mrr_title:
1909 continue
1910 mrr_title = get_text(mrr['title'])
1911 mrr_contents_text = get_text(mrr['contents'][0])
1912 if mrr_title == 'License':
1913 info['license'] = mrr_contents_text
1914 elif not multiple_songs:
1915 if mrr_title == 'Album':
1916 info['album'] = mrr_contents_text
1917 elif mrr_title == 'Artist':
1918 info['artist'] = mrr_contents_text
1919 elif mrr_title == 'Song':
1920 info['track'] = mrr_contents_text
1921
1922 fallbacks = {
1923 'channel': 'uploader',
1924 'channel_id': 'uploader_id',
1925 'channel_url': 'uploader_url',
1926 }
1927 for to, frm in fallbacks.items():
1928 if not info.get(to):
1929 info[to] = info.get(frm)
1930
1931 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1932 v = info.get(s_k)
1933 if v:
1934 info[d_k] = v
b84071c0 1935
06167fbb 1936 # get xsrf for annotations or comments
1937 get_annotations = self._downloader.params.get('writeannotations', False)
1938 get_comments = self._downloader.params.get('getcomments', False)
1939 if get_annotations or get_comments:
29f7c58a 1940 xsrf_token = None
545cc85d 1941 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 1942 if ytcfg:
1943 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
1944 if not xsrf_token:
1945 xsrf_token = self._search_regex(
1946 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 1947 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 1948
1949 # annotations
06167fbb 1950 if get_annotations:
64b6a4e9
RA
1951 invideo_url = try_get(
1952 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
1953 if xsrf_token and invideo_url:
29f7c58a 1954 xsrf_field_name = None
1955 if ytcfg:
1956 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
1957 if not xsrf_field_name:
1958 xsrf_field_name = self._search_regex(
1959 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 1960 webpage, 'xsrf field name',
29f7c58a 1961 group='xsrf_field_name', default='session_token')
8a784c74 1962 info['annotations'] = self._download_webpage(
64b6a4e9
RA
1963 self._proto_relative_url(invideo_url),
1964 video_id, note='Downloading annotations',
1965 errnote='Unable to download video annotations', fatal=False,
1966 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 1967
06167fbb 1968 # Get comments
1969 # TODO: Refactor and move to seperate function
1970 if get_comments:
1971 expected_video_comment_count = 0
1972 video_comments = []
1973
1974 def find_value(html, key, num_chars=2, separator='"'):
1975 pos_begin = html.find(key) + len(key) + num_chars
1976 pos_end = html.find(separator, pos_begin)
1977 return html[pos_begin: pos_end]
1978
1979 def search_dict(partial, key):
1980 if isinstance(partial, dict):
1981 for k, v in partial.items():
1982 if k == key:
1983 yield v
1984 else:
1985 for o in search_dict(v, key):
1986 yield o
1987 elif isinstance(partial, list):
1988 for i in partial:
1989 for o in search_dict(i, key):
1990 yield o
1991
8a784c74 1992 continuations = []
1993 if initial_data:
1994 try:
1995 ncd = next(search_dict(initial_data, 'nextContinuationData'))
1996 continuations = [ncd['continuation']]
1997 # Handle videos where comments have been disabled entirely
1998 except StopIteration:
1999 pass
06167fbb 2000
8d0ea5f9 2001 def get_continuation(continuation, session_token, replies=False):
06167fbb 2002 query = {
66c935fb 2003 'pbj': 1,
2004 'ctoken': continuation,
06167fbb 2005 }
2006 if replies:
2007 query['action_get_comment_replies'] = 1
2008 else:
2009 query['action_get_comments'] = 1
2010
2011 while True:
2012 content, handle = self._download_webpage_handle(
2013 'https://www.youtube.com/comment_service_ajax',
2014 video_id,
2015 note=False,
2016 expected_status=[413],
2017 data=urlencode_postdata({
2018 'session_token': session_token
2019 }),
2020 query=query,
2021 headers={
2022 'Accept': '*/*',
2023 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2024 'X-YouTube-Client-Name': '1',
2025 'X-YouTube-Client-Version': '2.20201202.06.01'
2026 }
2027 )
2028
2029 response_code = handle.getcode()
2030 if (response_code == 200):
2031 return self._parse_json(content, video_id)
8d0ea5f9 2032 if (response_code == 413):
06167fbb 2033 return None
2034 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2035
2036 first_continuation = True
885d36d4 2037 chain_msg = ''
2038 self.to_screen('Downloading comments')
06167fbb 2039 while continuations:
885d36d4 2040 continuation = continuations.pop()
8d0ea5f9 2041 comment_response = get_continuation(continuation, xsrf_token)
06167fbb 2042 if not comment_response:
2043 continue
2044 if list(search_dict(comment_response, 'externalErrorMessage')):
2045 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2046
8d0ea5f9
B
2047 if 'continuationContents' not in comment_response['response']:
2048 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2049 continue
2050 # not sure if this actually helps
2051 if 'xsrf_token' in comment_response:
2052 xsrf_token = comment_response['xsrf_token']
2053
06167fbb 2054 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2055 if first_continuation:
2056 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2057 first_continuation = False
2058 if 'contents' not in item_section:
2059 # continuation returned no comments?
2060 # set an empty array as to not break the for loop
2061 item_section['contents'] = []
2062
2063 for meta_comment in item_section['contents']:
2064 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2065 video_comments.append({
2066 'id': comment['commentId'],
2067 'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
8d0ea5f9 2068 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2069 'author': comment.get('authorText', {}).get('simpleText', ''),
2070 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2071 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2072 'parent': 'root'
2073 })
2074 if 'replies' not in meta_comment['commentThreadRenderer']:
2075 continue
2076
8d0ea5f9
B
2077 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2078 while reply_continuations:
06167fbb 2079 time.sleep(1)
8d0ea5f9
B
2080 continuation = reply_continuations.pop()
2081 replies_data = get_continuation(continuation, xsrf_token, True)
06167fbb 2082 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2083 continue
06167fbb 2084
2085 if self._downloader.params.get('verbose', False):
885d36d4 2086 chain_msg = ' (chain %s)' % comment['commentId']
2087 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2088 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2089 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2090 reply_comment = reply_meta['commentRenderer']
2091 video_comments.append({
2092 'id': reply_comment['commentId'],
2093 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2094 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2095 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2096 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2097 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2098 'parent': comment['commentId']
2099 })
2100 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2101 continue
8d0ea5f9 2102 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2103
885d36d4 2104 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2105 if 'continuations' in item_section:
8d0ea5f9 2106 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2107 time.sleep(1)
2108
885d36d4 2109 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
545cc85d 2110 info.update({
2111 'comments': video_comments,
2112 'comment_count': expected_video_comment_count
2113 })
4ea3be0a 2114
545cc85d 2115 self.mark_watched(video_id, player_response)
d77ab8e2 2116
545cc85d 2117 return info
c5e8d7af 2118
5f6a1245 2119
8bdd16b4 2120class YoutubeTabIE(YoutubeBaseInfoExtractor):
2121 IE_DESC = 'YouTube.com tab'
70d5c17b 2122 _VALID_URL = r'''(?x)
2123 https?://
2124 (?:\w+\.)?
2125 (?:
2126 youtube(?:kids)?\.com|
2127 invidio\.us
2128 )/
2129 (?:
2130 (?:channel|c|user)/|
2131 (?P<not_channel>
3d3dddc9 2132 feed/|
70d5c17b 2133 (?:playlist|watch)\?.*?\blist=
2134 )|
29f7c58a 2135 (?!(?:%s)\b) # Direct URLs
70d5c17b 2136 )
2137 (?P<id>[^/?\#&]+)
2138 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2139 IE_NAME = 'youtube:tab'
2140
81127aa5 2141 _TESTS = [{
8bdd16b4 2142 # playlists, multipage
2143 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2144 'playlist_mincount': 94,
2145 'info_dict': {
2146 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2147 'title': 'Игорь Клейнер - Playlists',
2148 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2149 'uploader': 'Игорь Клейнер',
2150 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2151 },
2152 }, {
2153 # playlists, multipage, different order
2154 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2155 'playlist_mincount': 94,
2156 'info_dict': {
2157 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2158 'title': 'Игорь Клейнер - Playlists',
2159 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2160 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2161 'uploader': 'Игорь Клейнер',
8bdd16b4 2162 },
2163 }, {
2164 # playlists, singlepage
2165 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2166 'playlist_mincount': 4,
2167 'info_dict': {
2168 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2169 'title': 'ThirstForScience - Playlists',
2170 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2171 'uploader': 'ThirstForScience',
2172 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2173 }
2174 }, {
2175 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2176 'only_matching': True,
2177 }, {
2178 # basic, single video playlist
0e30a7b9 2179 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2180 'info_dict': {
0e30a7b9 2181 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2182 'uploader': 'Sergey M.',
2183 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2184 'title': 'youtube-dl public playlist',
81127aa5 2185 },
0e30a7b9 2186 'playlist_count': 1,
9291475f 2187 }, {
8bdd16b4 2188 # empty playlist
0e30a7b9 2189 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2190 'info_dict': {
0e30a7b9 2191 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2192 'uploader': 'Sergey M.',
2193 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2194 'title': 'youtube-dl empty playlist',
9291475f
PH
2195 },
2196 'playlist_count': 0,
2197 }, {
8bdd16b4 2198 # Home tab
2199 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2200 'info_dict': {
8bdd16b4 2201 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2202 'title': 'lex will - Home',
2203 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2204 'uploader': 'lex will',
2205 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2206 },
8bdd16b4 2207 'playlist_mincount': 2,
9291475f 2208 }, {
8bdd16b4 2209 # Videos tab
2210 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2211 'info_dict': {
8bdd16b4 2212 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2213 'title': 'lex will - Videos',
2214 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2215 'uploader': 'lex will',
2216 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2217 },
8bdd16b4 2218 'playlist_mincount': 975,
9291475f 2219 }, {
8bdd16b4 2220 # Videos tab, sorted by popular
2221 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2222 'info_dict': {
8bdd16b4 2223 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2224 'title': 'lex will - Videos',
2225 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2226 'uploader': 'lex will',
2227 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2228 },
8bdd16b4 2229 'playlist_mincount': 199,
9291475f 2230 }, {
8bdd16b4 2231 # Playlists tab
2232 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2233 'info_dict': {
8bdd16b4 2234 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2235 'title': 'lex will - Playlists',
2236 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2237 'uploader': 'lex will',
2238 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2239 },
8bdd16b4 2240 'playlist_mincount': 17,
ac7553d0 2241 }, {
8bdd16b4 2242 # Community tab
2243 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2244 'info_dict': {
8bdd16b4 2245 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2246 'title': 'lex will - Community',
2247 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2248 'uploader': 'lex will',
2249 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2250 },
2251 'playlist_mincount': 18,
87dadd45 2252 }, {
8bdd16b4 2253 # Channels tab
2254 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2255 'info_dict': {
8bdd16b4 2256 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2257 'title': 'lex will - Channels',
2258 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2259 'uploader': 'lex will',
2260 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2261 },
deaec5af 2262 'playlist_mincount': 12,
6b08cdf6 2263 }, {
a0566bbf 2264 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2265 'only_matching': True,
2266 }, {
a0566bbf 2267 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2268 'only_matching': True,
2269 }, {
a0566bbf 2270 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2271 'only_matching': True,
2272 }, {
2273 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2274 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2275 'info_dict': {
2276 'title': '29C3: Not my department',
2277 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2278 'uploader': 'Christiaan008',
2279 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2280 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2281 },
2282 'playlist_count': 96,
2283 }, {
2284 'note': 'Large playlist',
2285 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2286 'info_dict': {
8bdd16b4 2287 'title': 'Uploads from Cauchemar',
2288 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2289 'uploader': 'Cauchemar',
2290 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2291 },
8bdd16b4 2292 'playlist_mincount': 1123,
2293 }, {
2294 # even larger playlist, 8832 videos
2295 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2296 'only_matching': True,
4b7df0d3
JMF
2297 }, {
2298 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2299 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2300 'info_dict': {
acf757f4
PH
2301 'title': 'Uploads from Interstellar Movie',
2302 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2303 'uploader': 'Interstellar Movie',
8bdd16b4 2304 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2305 },
481cc733 2306 'playlist_mincount': 21,
8bdd16b4 2307 }, {
2308 # https://github.com/ytdl-org/youtube-dl/issues/21844
2309 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2310 'info_dict': {
2311 'title': 'Data Analysis with Dr Mike Pound',
2312 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2313 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2314 'uploader': 'Computerphile',
deaec5af 2315 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2316 },
2317 'playlist_mincount': 11,
2318 }, {
a0566bbf 2319 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2320 'only_matching': True,
dacb3a86
S
2321 }, {
2322 # Playlist URL that does not actually serve a playlist
2323 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2324 'info_dict': {
2325 'id': 'FqZTN594JQw',
2326 'ext': 'webm',
2327 'title': "Smiley's People 01 detective, Adventure Series, Action",
2328 'uploader': 'STREEM',
2329 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2330 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2331 'upload_date': '20150526',
2332 'license': 'Standard YouTube License',
2333 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2334 'categories': ['People & Blogs'],
2335 'tags': list,
dbdaaa23 2336 'view_count': int,
dacb3a86
S
2337 'like_count': int,
2338 'dislike_count': int,
2339 },
2340 'params': {
2341 'skip_download': True,
2342 },
13a75688 2343 'skip': 'This video is not available.',
dacb3a86 2344 'add_ie': [YoutubeIE.ie_key()],
481cc733 2345 }, {
8bdd16b4 2346 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2347 'only_matching': True,
66b48727 2348 }, {
8bdd16b4 2349 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2350 'only_matching': True,
a0566bbf 2351 }, {
2352 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2353 'info_dict': {
2354 'id': '9Auq9mYxFEE',
2355 'ext': 'mp4',
deaec5af 2356 'title': compat_str,
a0566bbf 2357 'uploader': 'Sky News',
2358 'uploader_id': 'skynews',
2359 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2360 'upload_date': '20191102',
deaec5af 2361 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2362 'categories': ['News & Politics'],
2363 'tags': list,
2364 'like_count': int,
2365 'dislike_count': int,
2366 },
2367 'params': {
2368 'skip_download': True,
2369 },
2370 }, {
2371 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2372 'info_dict': {
2373 'id': 'a48o2S1cPoo',
2374 'ext': 'mp4',
2375 'title': 'The Young Turks - Live Main Show',
2376 'uploader': 'The Young Turks',
2377 'uploader_id': 'TheYoungTurks',
2378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2379 'upload_date': '20150715',
2380 'license': 'Standard YouTube License',
2381 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2382 'categories': ['News & Politics'],
2383 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2384 'like_count': int,
2385 'dislike_count': int,
2386 },
2387 'params': {
2388 'skip_download': True,
2389 },
2390 'only_matching': True,
2391 }, {
2392 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2393 'only_matching': True,
2394 }, {
2395 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2396 'only_matching': True,
3d3dddc9 2397 }, {
2398 'url': 'https://www.youtube.com/feed/trending',
2399 'only_matching': True,
2400 }, {
2401 # needs auth
2402 'url': 'https://www.youtube.com/feed/library',
2403 'only_matching': True,
2404 }, {
2405 # needs auth
2406 'url': 'https://www.youtube.com/feed/history',
2407 'only_matching': True,
2408 }, {
2409 # needs auth
2410 'url': 'https://www.youtube.com/feed/subscriptions',
2411 'only_matching': True,
2412 }, {
2413 # needs auth
2414 'url': 'https://www.youtube.com/feed/watch_later',
2415 'only_matching': True,
2416 }, {
2417 # no longer available?
2418 'url': 'https://www.youtube.com/feed/recommended',
2419 'only_matching': True,
29f7c58a 2420 }, {
2421 # inline playlist with not always working continuations
2422 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2423 'only_matching': True,
2424 }, {
2425 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2426 'only_matching': True,
2427 }, {
2428 'url': 'https://www.youtube.com/course',
2429 'only_matching': True,
2430 }, {
2431 'url': 'https://www.youtube.com/zsecurity',
2432 'only_matching': True,
2433 }, {
2434 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2435 'only_matching': True,
2436 }, {
2437 'url': 'https://www.youtube.com/TheYoungTurks/live',
2438 'only_matching': True,
2439 }]
2440
2441 @classmethod
2442 def suitable(cls, url):
2443 return False if YoutubeIE.suitable(url) else super(
2444 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2445
2446 def _extract_channel_id(self, webpage):
2447 channel_id = self._html_search_meta(
2448 'channelId', webpage, 'channel id', default=None)
2449 if channel_id:
2450 return channel_id
2451 channel_url = self._html_search_meta(
2452 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2453 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2454 'twitter:app:url:googleplay'), webpage, 'channel url')
2455 return self._search_regex(
2456 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2457 channel_url, 'channel id')
15f6397c 2458
8bdd16b4 2459 @staticmethod
2460 def _extract_grid_item_renderer(item):
2461 for item_kind in ('Playlist', 'Video', 'Channel'):
2462 renderer = item.get('grid%sRenderer' % item_kind)
2463 if renderer:
2464 return renderer
2465
8bdd16b4 2466 def _grid_entries(self, grid_renderer):
2467 for item in grid_renderer['items']:
2468 if not isinstance(item, dict):
39b62db1 2469 continue
8bdd16b4 2470 renderer = self._extract_grid_item_renderer(item)
2471 if not isinstance(renderer, dict):
2472 continue
2473 title = try_get(
2474 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2475 # playlist
2476 playlist_id = renderer.get('playlistId')
2477 if playlist_id:
2478 yield self.url_result(
2479 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2480 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2481 video_title=title)
2482 # video
2483 video_id = renderer.get('videoId')
2484 if video_id:
2485 yield self._extract_video(renderer)
2486 # channel
2487 channel_id = renderer.get('channelId')
2488 if channel_id:
2489 title = try_get(
2490 renderer, lambda x: x['title']['simpleText'], compat_str)
2491 yield self.url_result(
2492 'https://www.youtube.com/channel/%s' % channel_id,
2493 ie=YoutubeTabIE.ie_key(), video_title=title)
2494
3d3dddc9 2495 def _shelf_entries_from_content(self, shelf_renderer):
2496 content = shelf_renderer.get('content')
2497 if not isinstance(content, dict):
8bdd16b4 2498 return
3d3dddc9 2499 renderer = content.get('gridRenderer')
2500 if renderer:
2501 # TODO: add support for nested playlists so each shelf is processed
2502 # as separate playlist
2503 # TODO: this includes only first N items
2504 for entry in self._grid_entries(renderer):
2505 yield entry
2506 renderer = content.get('horizontalListRenderer')
2507 if renderer:
2508 # TODO
2509 pass
8bdd16b4 2510
29f7c58a 2511 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2512 ep = try_get(
2513 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2514 compat_str)
2515 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2516 if shelf_url:
29f7c58a 2517 # Skipping links to another channels, note that checking for
2518 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2519 # will not work
2520 if skip_channels and '/channels?' in shelf_url:
2521 return
3d3dddc9 2522 title = try_get(
2523 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2524 yield self.url_result(shelf_url, video_title=title)
2525 # Shelf may not contain shelf URL, fallback to extraction from content
2526 for entry in self._shelf_entries_from_content(shelf_renderer):
2527 yield entry
c5e8d7af 2528
8bdd16b4 2529 def _playlist_entries(self, video_list_renderer):
2530 for content in video_list_renderer['contents']:
2531 if not isinstance(content, dict):
2532 continue
2533 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2534 if not isinstance(renderer, dict):
2535 continue
2536 video_id = renderer.get('videoId')
2537 if not video_id:
2538 continue
2539 yield self._extract_video(renderer)
07aeced6 2540
3d3dddc9 2541 r""" # Not needed in the new implementation
3462ffa8 2542 def _itemSection_entries(self, item_sect_renderer):
2543 for content in item_sect_renderer['contents']:
2544 if not isinstance(content, dict):
2545 continue
2546 renderer = content.get('videoRenderer', {})
2547 if not isinstance(renderer, dict):
2548 continue
2549 video_id = renderer.get('videoId')
2550 if not video_id:
2551 continue
2552 yield self._extract_video(renderer)
3d3dddc9 2553 """
3462ffa8 2554
2555 def _rich_entries(self, rich_grid_renderer):
2556 renderer = try_get(
70d5c17b 2557 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2558 video_id = renderer.get('videoId')
2559 if not video_id:
2560 return
2561 yield self._extract_video(renderer)
2562
8bdd16b4 2563 def _video_entry(self, video_renderer):
2564 video_id = video_renderer.get('videoId')
2565 if video_id:
2566 return self._extract_video(video_renderer)
dacb3a86 2567
8bdd16b4 2568 def _post_thread_entries(self, post_thread_renderer):
2569 post_renderer = try_get(
2570 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2571 if not post_renderer:
2572 return
2573 # video attachment
2574 video_renderer = try_get(
2575 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2576 video_id = None
2577 if video_renderer:
2578 entry = self._video_entry(video_renderer)
2579 if entry:
2580 yield entry
2581 # inline video links
2582 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2583 for run in runs:
2584 if not isinstance(run, dict):
2585 continue
2586 ep_url = try_get(
2587 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2588 if not ep_url:
2589 continue
2590 if not YoutubeIE.suitable(ep_url):
2591 continue
2592 ep_video_id = YoutubeIE._match_id(ep_url)
2593 if video_id == ep_video_id:
2594 continue
2595 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2596
8bdd16b4 2597 def _post_thread_continuation_entries(self, post_thread_continuation):
2598 contents = post_thread_continuation.get('contents')
2599 if not isinstance(contents, list):
2600 return
2601 for content in contents:
2602 renderer = content.get('backstagePostThreadRenderer')
2603 if not isinstance(renderer, dict):
2604 continue
2605 for entry in self._post_thread_entries(renderer):
2606 yield entry
07aeced6 2607
29f7c58a 2608 @staticmethod
2609 def _build_continuation_query(continuation, ctp=None):
2610 query = {
2611 'ctoken': continuation,
2612 'continuation': continuation,
2613 }
2614 if ctp:
2615 query['itct'] = ctp
2616 return query
2617
8bdd16b4 2618 @staticmethod
2619 def _extract_next_continuation_data(renderer):
2620 next_continuation = try_get(
2621 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2622 if not next_continuation:
2623 return
2624 continuation = next_continuation.get('continuation')
2625 if not continuation:
2626 return
2627 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2628 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2629
8bdd16b4 2630 @classmethod
2631 def _extract_continuation(cls, renderer):
2632 next_continuation = cls._extract_next_continuation_data(renderer)
2633 if next_continuation:
2634 return next_continuation
cc2db878 2635 contents = []
2636 for key in ('contents', 'items'):
2637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2638 for content in contents:
2639 if not isinstance(content, dict):
2640 continue
2641 continuation_ep = try_get(
2642 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2643 dict)
2644 if not continuation_ep:
2645 continue
2646 continuation = try_get(
2647 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2648 if not continuation:
2649 continue
2650 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2651 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2652
8bdd16b4 2653 def _entries(self, tab, identity_token):
3462ffa8 2654
70d5c17b 2655 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2656 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2657 for content in contents:
2658 if not isinstance(content, dict):
8bdd16b4 2659 continue
70d5c17b 2660 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2661 if not is_renderer:
70d5c17b 2662 renderer = content.get('richItemRenderer')
3462ffa8 2663 if renderer:
2664 for entry in self._rich_entries(renderer):
2665 yield entry
2666 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2667 continue
3462ffa8 2668 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2669 for isr_content in isr_contents:
2670 if not isinstance(isr_content, dict):
2671 continue
69184e41 2672
2673 known_renderers = {
2674 'playlistVideoListRenderer': self._playlist_entries,
2675 'gridRenderer': self._grid_entries,
2676 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2677 'backstagePostThreadRenderer': self._post_thread_entries,
2678 'videoRenderer': lambda x: [self._video_entry(x)],
2679 }
2680 for key, renderer in isr_content.items():
2681 if key not in known_renderers:
2682 continue
2683 for entry in known_renderers[key](renderer):
2684 if entry:
2685 yield entry
3462ffa8 2686 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2687 break
70d5c17b 2688
3462ffa8 2689 if not continuation_list[0]:
2690 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2691
2692 if not continuation_list[0]:
2693 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2694
2695 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2696 tab_content = try_get(tab, lambda x: x['content'], dict)
2697 if not tab_content:
2698 return
3462ffa8 2699 parent_renderer = (
29f7c58a 2700 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2701 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2702 for entry in extract_entries(parent_renderer):
2703 yield entry
3462ffa8 2704 continuation = continuation_list[0]
8bdd16b4 2705
2706 headers = {
2707 'x-youtube-client-name': '1',
2708 'x-youtube-client-version': '2.20201112.04.01',
2709 }
2710 if identity_token:
2711 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2712
8bdd16b4 2713 for page_num in itertools.count(1):
2714 if not continuation:
2715 break
29f7c58a 2716 count = 0
2717 retries = 3
2718 while count <= retries:
2719 try:
2720 # Downloading page may result in intermittent 5xx HTTP error
2721 # that is usually worked around with a retry
2722 browse = self._download_json(
2723 'https://www.youtube.com/browse_ajax', None,
2724 'Downloading page %d%s'
2725 % (page_num, ' (retry #%d)' % count if count else ''),
2726 headers=headers, query=continuation)
2727 break
2728 except ExtractorError as e:
2729 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
2730 count += 1
2731 if count <= retries:
2732 continue
2733 raise
8bdd16b4 2734 if not browse:
2735 break
2736 response = try_get(browse, lambda x: x[1]['response'], dict)
2737 if not response:
2738 break
ebf1b291 2739
69184e41 2740 known_continuation_renderers = {
2741 'playlistVideoListContinuation': self._playlist_entries,
2742 'gridContinuation': self._grid_entries,
2743 'itemSectionContinuation': self._post_thread_continuation_entries,
2744 'sectionListContinuation': extract_entries, # for feeds
2745 }
8bdd16b4 2746 continuation_contents = try_get(
69184e41 2747 response, lambda x: x['continuationContents'], dict) or {}
2748 continuation_renderer = None
2749 for key, value in continuation_contents.items():
2750 if key not in known_continuation_renderers:
3462ffa8 2751 continue
69184e41 2752 continuation_renderer = value
2753 continuation_list = [None]
2754 for entry in known_continuation_renderers[key](continuation_renderer):
2755 yield entry
2756 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2757 break
2758 if continuation_renderer:
2759 continue
c5e8d7af 2760
a1b535bd 2761 known_renderers = {
2762 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2763 'gridVideoRenderer': (self._grid_entries, 'items'),
2764 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
2765 'itemSectionRenderer': (self._playlist_entries, 'contents'),
2766 }
8bdd16b4 2767 continuation_items = try_get(
2768 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2769 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2770 video_items_renderer = None
2771 for key, value in continuation_item.items():
2772 if key not in known_renderers:
8bdd16b4 2773 continue
a1b535bd 2774 video_items_renderer = {known_renderers[key][1]: continuation_items}
2775 for entry in known_renderers[key][0](video_items_renderer):
2776 yield entry
2777 continuation = self._extract_continuation(video_items_renderer)
2778 break
2779 if video_items_renderer:
2780 continue
8bdd16b4 2781 break
9558dcec 2782
8bdd16b4 2783 @staticmethod
2784 def _extract_selected_tab(tabs):
2785 for tab in tabs:
2786 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2787 return tab['tabRenderer']
2b3c2546 2788 else:
8bdd16b4 2789 raise ExtractorError('Unable to find selected tab')
b82f815f 2790
8bdd16b4 2791 @staticmethod
2792 def _extract_uploader(data):
2793 uploader = {}
2794 sidebar_renderer = try_get(
2795 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2796 if sidebar_renderer:
2797 for item in sidebar_renderer:
2798 if not isinstance(item, dict):
2799 continue
2800 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2801 if not isinstance(renderer, dict):
2802 continue
2803 owner = try_get(
2804 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2805 if owner:
2806 uploader['uploader'] = owner.get('text')
2807 uploader['uploader_id'] = try_get(
2808 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2809 uploader['uploader_url'] = urljoin(
2810 'https://www.youtube.com/',
2811 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2812 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2813
2814 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2815 playlist_id = title = description = channel_url = channel_name = channel_id = None
2816 thumbnails_list = tags = []
2817
8bdd16b4 2818 selected_tab = self._extract_selected_tab(tabs)
2819 renderer = try_get(
2820 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2821 if renderer:
b60419c5 2822 channel_name = renderer.get('title')
2823 channel_url = renderer.get('channelUrl')
2824 channel_id = renderer.get('externalId')
64c0d954 2825
64c0d954 2826 if not renderer:
2827 renderer = try_get(
2828 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2829 if renderer:
2830 title = renderer.get('title')
ecc97af3 2831 description = renderer.get('description', '')
b60419c5 2832 playlist_id = channel_id
2833 tags = renderer.get('keywords', '').split()
2834 thumbnails_list = (
2835 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2836 or try_get(
2837 data,
2838 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2839 list)
b60419c5 2840 or [])
2841
2842 thumbnails = []
2843 for t in thumbnails_list:
2844 if not isinstance(t, dict):
2845 continue
2846 thumbnail_url = url_or_none(t.get('url'))
2847 if not thumbnail_url:
2848 continue
2849 thumbnails.append({
2850 'url': thumbnail_url,
2851 'width': int_or_none(t.get('width')),
2852 'height': int_or_none(t.get('height')),
2853 })
64c0d954 2854
3462ffa8 2855 if playlist_id is None:
70d5c17b 2856 playlist_id = item_id
2857 if title is None:
b60419c5 2858 title = playlist_id
2859 title += format_field(selected_tab, 'title', ' - %s')
2860
2861 metadata = {
2862 'playlist_id': playlist_id,
2863 'playlist_title': title,
2864 'playlist_description': description,
2865 'uploader': channel_name,
2866 'uploader_id': channel_id,
2867 'uploader_url': channel_url,
2868 'thumbnails': thumbnails,
2869 'tags': tags,
2870 }
2871 if not channel_id:
2872 metadata.update(self._extract_uploader(data))
2873 metadata.update({
2874 'channel': metadata['uploader'],
2875 'channel_id': metadata['uploader_id'],
2876 'channel_url': metadata['uploader_url']})
2877 return self.playlist_result(
29f7c58a 2878 self._entries(selected_tab, identity_token),
b60419c5 2879 **metadata)
73c4ac2c 2880
29f7c58a 2881 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2882 title = playlist.get('title') or try_get(
2883 data, lambda x: x['titleText']['simpleText'], compat_str)
2884 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 2885 # Inline playlist rendition continuation does not always work
2886 # at Youtube side, so delegating regular tab-based playlist URL
2887 # processing whenever possible.
2888 playlist_url = urljoin(url, try_get(
2889 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2890 compat_str))
2891 if playlist_url and playlist_url != url:
2892 return self.url_result(
2893 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2894 video_title=title)
8bdd16b4 2895 return self.playlist_result(
2896 self._playlist_entries(playlist), playlist_id=playlist_id,
2897 playlist_title=title)
c5e8d7af 2898
29f7c58a 2899 @staticmethod
2900 def _extract_alerts(data):
02ced43c 2901 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2902 if not isinstance(alert_dict, dict):
2903 continue
02ced43c 2904 for renderer in alert_dict:
2905 alert = alert_dict[renderer]
2906 alert_type = alert.get('type')
2907 if not alert_type:
2908 continue
2909 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2910 if message:
2911 yield alert_type, message
2912 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2913 message = try_get(run, lambda x: x['text'], compat_str)
2914 if message:
2915 yield alert_type, message
2916
29f7c58a 2917 def _extract_identity_token(self, webpage, item_id):
2918 ytcfg = self._extract_ytcfg(item_id, webpage)
2919 if ytcfg:
2920 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2921 if token:
2922 return token
2923 return self._search_regex(
2924 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
2925 'identity token', default=None)
2926
8bdd16b4 2927 def _real_extract(self, url):
2928 item_id = self._match_id(url)
2929 url = compat_urlparse.urlunparse(
2930 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 2931 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 2932 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 2933 self._downloader.report_warning(
2934 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 2935 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 2936 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
2937
8bdd16b4 2938 # Handle both video/playlist URLs
2939 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2940 video_id = qs.get('v', [None])[0]
2941 playlist_id = qs.get('list', [None])[0]
f0c532a4 2942
29f7c58a 2943 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 2944 if playlist_id:
2945 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
2946 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
2947 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
2948 else:
2949 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 2950 if video_id and playlist_id:
2951 if self._downloader.params.get('noplaylist'):
2952 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2953 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2954 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 2955
8bdd16b4 2956 webpage = self._download_webpage(url, item_id)
29f7c58a 2957 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 2958 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 2959 err_msg = None
02ced43c 2960 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 2961 if alert_type.lower() == 'error':
2962 if err_msg:
2963 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
2964 err_msg = alert_message
2965 else:
2966 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
2967 if err_msg:
2968 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 2969 tabs = try_get(
2970 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
2971 if tabs:
2972 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
2973 playlist = try_get(
2974 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2975 if playlist:
29f7c58a 2976 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 2977 # Fallback to video extraction if no playlist alike page is recognized.
2978 # First check for the current video then try the v attribute of URL query.
2979 video_id = try_get(
2980 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
2981 compat_str) or video_id
8bdd16b4 2982 if video_id:
2983 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2984 # Failed to recognize
2985 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 2986
c5e8d7af 2987
8bdd16b4 2988class YoutubePlaylistIE(InfoExtractor):
2989 IE_DESC = 'YouTube.com playlists'
2990 _VALID_URL = r'''(?x)(?:
2991 (?:https?://)?
2992 (?:\w+\.)?
2993 (?:
2994 (?:
2995 youtube(?:kids)?\.com|
29f7c58a 2996 invidio\.us
8bdd16b4 2997 )
2998 /.*?\?.*?\blist=
2999 )?
3000 (?P<id>%(playlist_id)s)
3001 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3002 IE_NAME = 'youtube:playlist'
cdc628a4 3003 _TESTS = [{
8bdd16b4 3004 'note': 'issue #673',
3005 'url': 'PLBB231211A4F62143',
cdc628a4 3006 'info_dict': {
8bdd16b4 3007 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3008 'id': 'PLBB231211A4F62143',
3009 'uploader': 'Wickydoo',
3010 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3011 },
3012 'playlist_mincount': 29,
3013 }, {
3014 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3015 'info_dict': {
3016 'title': 'YDL_safe_search',
3017 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3018 },
3019 'playlist_count': 2,
3020 'skip': 'This playlist is private',
9558dcec 3021 }, {
8bdd16b4 3022 'note': 'embedded',
3023 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3024 'playlist_count': 4,
9558dcec 3025 'info_dict': {
8bdd16b4 3026 'title': 'JODA15',
3027 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3028 'uploader': 'milan',
3029 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3030 }
cdc628a4 3031 }, {
8bdd16b4 3032 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3033 'playlist_mincount': 982,
3034 'info_dict': {
3035 'title': '2018 Chinese New Singles (11/6 updated)',
3036 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3037 'uploader': 'LBK',
3038 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3039 }
daa0df9e 3040 }, {
29f7c58a 3041 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3042 'only_matching': True,
3043 }, {
3044 # music album playlist
3045 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3046 'only_matching': True,
3047 }]
3048
3049 @classmethod
3050 def suitable(cls, url):
3051 return False if YoutubeTabIE.suitable(url) else super(
3052 YoutubePlaylistIE, cls).suitable(url)
3053
3054 def _real_extract(self, url):
3055 playlist_id = self._match_id(url)
3056 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3057 if not qs:
3058 qs = {'list': playlist_id}
3059 return self.url_result(
3060 update_url_query('https://www.youtube.com/playlist', qs),
3061 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3062
3063
3064class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3065 IE_DESC = 'youtu.be'
29f7c58a 3066 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3067 _TESTS = [{
8bdd16b4 3068 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3069 'info_dict': {
3070 'id': 'yeWKywCrFtk',
3071 'ext': 'mp4',
3072 'title': 'Small Scale Baler and Braiding Rugs',
3073 'uploader': 'Backus-Page House Museum',
3074 'uploader_id': 'backuspagemuseum',
3075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3076 'upload_date': '20161008',
3077 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3078 'categories': ['Nonprofits & Activism'],
3079 'tags': list,
3080 'like_count': int,
3081 'dislike_count': int,
3082 },
3083 'params': {
3084 'noplaylist': True,
3085 'skip_download': True,
3086 },
39e7107d 3087 }, {
8bdd16b4 3088 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3089 'only_matching': True,
cdc628a4
PH
3090 }]
3091
8bdd16b4 3092 def _real_extract(self, url):
29f7c58a 3093 mobj = re.match(self._VALID_URL, url)
3094 video_id = mobj.group('id')
3095 playlist_id = mobj.group('playlist_id')
8bdd16b4 3096 return self.url_result(
29f7c58a 3097 update_url_query('https://www.youtube.com/watch', {
3098 'v': video_id,
3099 'list': playlist_id,
3100 'feature': 'youtu.be',
3101 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3102
3103
3104class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3105 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3106 _VALID_URL = r'ytuser:(?P<id>.+)'
3107 _TESTS = [{
3108 'url': 'ytuser:phihag',
3109 'only_matching': True,
3110 }]
3111
3112 def _real_extract(self, url):
3113 user_id = self._match_id(url)
3114 return self.url_result(
3115 'https://www.youtube.com/user/%s' % user_id,
3116 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3117
b05654f0 3118
3d3dddc9 3119class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3120 IE_NAME = 'youtube:favorites'
3121 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3122 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3123 _LOGIN_REQUIRED = True
3124 _TESTS = [{
3125 'url': ':ytfav',
3126 'only_matching': True,
3127 }, {
3128 'url': ':ytfavorites',
3129 'only_matching': True,
3130 }]
3131
3132 def _real_extract(self, url):
3133 return self.url_result(
3134 'https://www.youtube.com/playlist?list=LL',
3135 ie=YoutubeTabIE.ie_key())
3136
3137
8bdd16b4 3138class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3139 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3140 # there doesn't appear to be a real limit, for example if you search for
3141 # 'python' you get more than 8.000.000 results
3142 _MAX_RESULTS = float('inf')
78caa52a 3143 IE_NAME = 'youtube:search'
b05654f0 3144 _SEARCH_KEY = 'ytsearch'
6c894ea1 3145 _SEARCH_PARAMS = None
9dd8e46a 3146 _TESTS = []
b05654f0 3147
6c894ea1
U
3148 def _entries(self, query, n):
3149 data = {
3150 'context': {
3151 'client': {
3152 'clientName': 'WEB',
3153 'clientVersion': '2.20201021.03.00',
3154 }
3155 },
3156 'query': query,
a22b2fd1 3157 }
6c894ea1
U
3158 if self._SEARCH_PARAMS:
3159 data['params'] = self._SEARCH_PARAMS
3160 total = 0
3161 for page_num in itertools.count(1):
3162 search = self._download_json(
3163 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3164 video_id='query "%s"' % query,
3165 note='Downloading page %s' % page_num,
3166 errnote='Unable to download API page', fatal=False,
3167 data=json.dumps(data).encode('utf8'),
3168 headers={'content-type': 'application/json'})
3169 if not search:
b4c08069 3170 break
6c894ea1
U
3171 slr_contents = try_get(
3172 search,
3173 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3174 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3175 list)
3176 if not slr_contents:
a22b2fd1 3177 break
0366ae87 3178
0366ae87
M
3179 # Youtube sometimes adds promoted content to searches,
3180 # changing the index location of videos and token.
3181 # So we search through all entries till we find them.
30a074c2 3182 continuation_token = None
3183 for slr_content in slr_contents:
3184 isr_contents = try_get(
3185 slr_content,
3186 lambda x: x['itemSectionRenderer']['contents'],
3187 list)
9da76d30 3188 if not isr_contents:
30a074c2 3189 continue
3190 for content in isr_contents:
3191 if not isinstance(content, dict):
3192 continue
3193 video = content.get('videoRenderer')
3194 if not isinstance(video, dict):
3195 continue
3196 video_id = video.get('videoId')
3197 if not video_id:
3198 continue
3199
3200 yield self._extract_video(video)
3201 total += 1
3202 if total == n:
3203 return
0366ae87
M
3204
3205 if continuation_token is None:
3206 continuation_token = try_get(
30a074c2 3207 slr_content,
3208 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
0366ae87 3209 compat_str)
0366ae87 3210
0366ae87 3211 if not continuation_token:
6c894ea1 3212 break
0366ae87 3213 data['continuation'] = continuation_token
b05654f0 3214
6c894ea1
U
3215 def _get_n_results(self, query, n):
3216 """Get a specified number of results for a query"""
3217 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3218
c9ae7b95 3219
a3dd9248 3220class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3221 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3222 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3223 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3224 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3225
c9ae7b95 3226
386e1dd9 3227class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3228 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3229 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3230 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3231 # _MAX_RESULTS = 100
3462ffa8 3232 _TESTS = [{
3233 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3234 'playlist_mincount': 5,
3235 'info_dict': {
3236 'title': 'youtube-dl test video',
3237 }
3238 }, {
3239 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3240 'only_matching': True,
3241 }]
3242
386e1dd9 3243 @classmethod
3244 def _make_valid_url(cls):
3245 return cls._VALID_URL
3246
3462ffa8 3247 def _real_extract(self, url):
386e1dd9 3248 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3249 query = (qs.get('search_query') or qs.get('q'))[0]
3250 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3251 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3252
3253
3254class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3255 """
25f14e9f 3256 Base class for feed extractors
3d3dddc9 3257 Subclasses must define the _FEED_NAME property.
d7ae0639 3258 """
b2e8bc1b 3259 _LOGIN_REQUIRED = True
3462ffa8 3260 # _MAX_PAGES = 5
ef2f3c7f 3261 _TESTS = []
d7ae0639
JMF
3262
3263 @property
3264 def IE_NAME(self):
78caa52a 3265 return 'youtube:%s' % self._FEED_NAME
04cc9617 3266
81f0259b 3267 def _real_initialize(self):
b2e8bc1b 3268 self._login()
81f0259b 3269
3853309f 3270 def _real_extract(self, url):
3d3dddc9 3271 return self.url_result(
3272 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3273 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3274
3275
ef2f3c7f 3276class YoutubeWatchLaterIE(InfoExtractor):
3277 IE_NAME = 'youtube:watchlater'
70d5c17b 3278 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3279 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3280 _TESTS = [{
8bdd16b4 3281 'url': ':ytwatchlater',
bc7a9cd8
S
3282 'only_matching': True,
3283 }]
25f14e9f
S
3284
3285 def _real_extract(self, url):
ef2f3c7f 3286 return self.url_result(
3287 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3288
3289
25f14e9f
S
3290class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3291 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3292 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3293 _FEED_NAME = 'recommended'
3d3dddc9 3294 _TESTS = [{
3295 'url': ':ytrec',
3296 'only_matching': True,
3297 }, {
3298 'url': ':ytrecommended',
3299 'only_matching': True,
3300 }, {
3301 'url': 'https://youtube.com',
3302 'only_matching': True,
3303 }]
1ed5b5c9 3304
1ed5b5c9 3305
25f14e9f 3306class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3307 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3308 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3309 _FEED_NAME = 'subscriptions'
3d3dddc9 3310 _TESTS = [{
3311 'url': ':ytsubs',
3312 'only_matching': True,
3313 }, {
3314 'url': ':ytsubscriptions',
3315 'only_matching': True,
3316 }]
1ed5b5c9 3317
1ed5b5c9 3318
25f14e9f
S
3319class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3320 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3321 _VALID_URL = r':ythistory'
25f14e9f 3322 _FEED_NAME = 'history'
3d3dddc9 3323 _TESTS = [{
3324 'url': ':ythistory',
3325 'only_matching': True,
3326 }]
1ed5b5c9
JMF
3327
3328
15870e90
PH
3329class YoutubeTruncatedURLIE(InfoExtractor):
3330 IE_NAME = 'youtube:truncated_url'
3331 IE_DESC = False # Do not list
975d35db 3332 _VALID_URL = r'''(?x)
b95aab84
PH
3333 (?:https?://)?
3334 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3335 (?:watch\?(?:
c4808c60 3336 feature=[a-z_]+|
b95aab84
PH
3337 annotation_id=annotation_[^&]+|
3338 x-yt-cl=[0-9]+|
c1708b89 3339 hl=[^&]*|
287be8c6 3340 t=[0-9]+
b95aab84
PH
3341 )?
3342 |
3343 attribution_link\?a=[^&]+
3344 )
3345 $
975d35db 3346 '''
15870e90 3347
c4808c60 3348 _TESTS = [{
2d3d2997 3349 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3350 'only_matching': True,
dc2fc736 3351 }, {
2d3d2997 3352 'url': 'https://www.youtube.com/watch?',
dc2fc736 3353 'only_matching': True,
b95aab84
PH
3354 }, {
3355 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3356 'only_matching': True,
3357 }, {
3358 'url': 'https://www.youtube.com/watch?feature=foo',
3359 'only_matching': True,
c1708b89
PH
3360 }, {
3361 'url': 'https://www.youtube.com/watch?hl=en-GB',
3362 'only_matching': True,
287be8c6
PH
3363 }, {
3364 'url': 'https://www.youtube.com/watch?t=2372',
3365 'only_matching': True,
c4808c60
PH
3366 }]
3367
15870e90
PH
3368 def _real_extract(self, url):
3369 raise ExtractorError(
78caa52a
PH
3370 'Did you forget to quote the URL? Remember that & is a meta '
3371 'character in most shells, so you want to put the URL in quotes, '
3867038a 3372 'like youtube-dl '
2d3d2997 3373 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3374 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3375 expected=True)
772fd5cc
PH
3376
3377
3378class YoutubeTruncatedIDIE(InfoExtractor):
3379 IE_NAME = 'youtube:truncated_id'
3380 IE_DESC = False # Do not list
b95aab84 3381 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3382
3383 _TESTS = [{
3384 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3385 'only_matching': True,
3386 }]
3387
3388 def _real_extract(self, url):
3389 video_id = self._match_id(url)
3390 raise ExtractorError(
3391 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3392 expected=True)