]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
Option `--windows-filenames` to force use of windows compatible filenames
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
d77ab8e2 8import random
c5e8d7af 9import re
8a784c74 10import time
e0df6211 11import traceback
c5e8d7af 12
b05654f0 13from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 14from ..compat import (
edf3e38e 15 compat_chr,
29f7c58a 16 compat_HTTPError,
8d81f3e3 17 compat_kwargs,
c5e8d7af 18 compat_parse_qs,
545cc85d 19 compat_str,
7fd002c0 20 compat_urllib_parse_unquote_plus,
15707c7e 21 compat_urllib_parse_urlencode,
7c80519c 22 compat_urllib_parse_urlparse,
7c61bd36 23 compat_urlparse,
4bb4a188 24)
545cc85d 25from ..jsinterp import JSInterpreter
4bb4a188 26from ..utils import (
c5e8d7af 27 clean_html,
c5e8d7af 28 ExtractorError,
b60419c5 29 format_field,
2d30521a 30 float_or_none,
dd27fd17 31 int_or_none,
94278f72 32 mimetype2ext,
6310acf5 33 parse_codecs,
7c80519c 34 parse_duration,
cc2db878 35 # qualities, # TODO: Enable this after fixing formatSort
3995d37d 36 remove_start,
cf7e015f 37 smuggle_url,
dbdaaa23 38 str_or_none,
c93d53f5 39 str_to_int,
556dbe7f 40 try_get,
c5e8d7af
PH
41 unescapeHTML,
42 unified_strdate,
cf7e015f 43 unsmuggle_url,
8bdd16b4 44 update_url_query,
21c340b8 45 url_or_none,
6e6bc8da 46 urlencode_postdata,
8bdd16b4 47 urljoin,
c5e8d7af
PH
48)
49
5f6a1245 50
de7f3446 51class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
52 """Provide base functions for Youtube extractors"""
53 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 54 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
55
56 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
57 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
58 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 59
3462ffa8 60 _RESERVED_NAMES = (
29f7c58a 61 r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
62 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
63 r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
3462ffa8 64
b2e8bc1b
JMF
65 _NETRC_MACHINE = 'youtube'
66 # If True it will raise an error if no login info is provided
67 _LOGIN_REQUIRED = False
68
70d5c17b 69 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 70
25f14e9f
S
71 def _ids_to_results(self, ids):
72 return [
73 self.url_result(vid_id, 'Youtube', video_id=vid_id)
74 for vid_id in ids]
75
b2e8bc1b 76 def _login(self):
83317f69 77 """
78 Attempt to log in to YouTube.
79 True is returned if successful or skipped.
80 False is returned if login failed.
81
82 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
83 """
68217024 84 username, password = self._get_login_info()
b2e8bc1b
JMF
85 # No authentication to be performed
86 if username is None:
70d35d16 87 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 88 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 89 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
90 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 91 return True
b2e8bc1b 92
7cc3570e
PH
93 login_page = self._download_webpage(
94 self._LOGIN_URL, None,
69ea8ca4
PH
95 note='Downloading login page',
96 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
97 if login_page is False:
98 return
b2e8bc1b 99
1212e997 100 login_form = self._hidden_inputs(login_page)
c5e8d7af 101
e00eb564
S
102 def req(url, f_req, note, errnote):
103 data = login_form.copy()
104 data.update({
105 'pstMsg': 1,
106 'checkConnection': 'youtube',
107 'checkedDomains': 'youtube',
108 'hl': 'en',
109 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 110 'f.req': json.dumps(f_req),
e00eb564
S
111 'flowName': 'GlifWebSignIn',
112 'flowEntry': 'ServiceLogin',
baf67a60
S
113 # TODO: reverse actual botguard identifier generation algo
114 'bgRequest': '["identifier",""]',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
9a6628aa
S
181 login_challenge = try_get(res, lambda x: x[0][0], list)
182 if login_challenge:
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 187 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
231 else:
232 CHALLENGES = {
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
236 }
237 challenge = CHALLENGES.get(
238 challenge_str,
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
241 return False
3995d37d
S
242 else:
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
244
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
247 return False
e00eb564
S
248
249 check_cookie_results = self._download_webpage(
3995d37d
S
250 check_cookie_url, None, 'Checking cookie', fatal=False)
251
252 if check_cookie_results is False:
253 return False
e00eb564 254
3995d37d
S
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
b2e8bc1b 257 return False
e00eb564 258
b2e8bc1b
JMF
259 return True
260
30226342 261 def _download_webpage_handle(self, *args, **kwargs):
c1148516 262 query = kwargs.get('query', {}).copy()
c1148516 263 kwargs['query'] = query
30226342 264 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
265 *args, **compat_kwargs(kwargs))
266
b2e8bc1b
JMF
267 def _real_initialize(self):
268 if self._downloader is None:
269 return
b2e8bc1b
JMF
270 if not self._login():
271 return
c5e8d7af 272
8bdd16b4 273 _DEFAULT_API_DATA = {
274 'context': {
275 'client': {
276 'clientName': 'WEB',
277 'clientVersion': '2.20201021.03.00',
278 }
279 },
280 }
8377574c 281
a0566bbf 282 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 283 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
284 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 285
545cc85d 286 def _call_api(self, ep, query, video_id, fatal=True):
8bdd16b4 287 data = self._DEFAULT_API_DATA.copy()
288 data.update(query)
9833e7a0 289
545cc85d 290 return self._download_json(
8bdd16b4 291 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
292 note='Downloading API JSON', errnote='Unable to download API page',
545cc85d 293 data=json.dumps(data).encode('utf8'), fatal=fatal,
8bdd16b4 294 headers={'content-type': 'application/json'},
295 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
c54f4aad 296
8bdd16b4 297 def _extract_yt_initial_data(self, video_id, webpage):
298 return self._parse_json(
299 self._search_regex(
29f7c58a 300 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 301 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 302 video_id)
0c148415 303
29f7c58a 304 def _extract_ytcfg(self, video_id, webpage):
305 return self._parse_json(
306 self._search_regex(
307 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
308 default='{}'), video_id, fatal=False)
309
30a074c2 310 def _extract_video(self, renderer):
311 video_id = renderer.get('videoId')
312 title = try_get(
313 renderer,
314 (lambda x: x['title']['runs'][0]['text'],
315 lambda x: x['title']['simpleText']), compat_str)
316 description = try_get(
317 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
318 compat_str)
319 duration = parse_duration(try_get(
320 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
321 view_count_text = try_get(
322 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
323 view_count = str_to_int(self._search_regex(
324 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
325 'view count', default=None))
326 uploader = try_get(
327 renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
328 return {
329 '_type': 'url_transparent',
330 'ie_key': YoutubeIE.ie_key(),
331 'id': video_id,
332 'url': video_id,
333 'title': title,
334 'description': description,
335 'duration': duration,
336 'view_count': view_count,
337 'uploader': uploader,
338 }
339
0c148415 340
360e1ca5 341class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 342 IE_DESC = 'YouTube.com'
cb7dfeea 343 _VALID_URL = r"""(?x)^
c5e8d7af 344 (
edb53e2d 345 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 346 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 347 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 348 (?:www\.)?pwnyoutube\.com/|
8b561bfc 349 (?:www\.)?hooktube\.com/|
f7000f3a 350 (?:www\.)?yourepeat\.com/|
e69ae5b9 351 tube\.majestyc\.net/|
ba036333 352 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
c86d5023 353 (?:www\.)?invidious\.pussthecat\.org/|
354 (?:www\.)?invidious\.048596\.xyz/|
355 (?:www\.)?invidious\.zee\.li/|
356 (?:www\.)?vid\.puffyan\.us/|
357 (?:(?:www|au)\.)?ytprivate\.com/|
358 (?:www\.)?invidious\.namazso\.eu/|
359 (?:www\.)?invidious\.ethibox\.fr/|
360 (?:www\.)?inv\.skyn3t\.in/|
361 (?:www\.)?invidious\.himiko\.cloud/|
362 (?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/|
363 (?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/|
364 (?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/|
365 (?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/|
77d95677 366 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 367 (?:(?:www|no)\.)?invidiou\.sh/|
29f7c58a 368 (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
8ae113ca 369 (?:www\.)?invidious\.kabi\.tk/|
ba036333 370 (?:www\.)?invidious\.13ad\.de/|
791d2e81 371 (?:www\.)?invidious\.mastodon\.host/|
29f7c58a 372 (?:www\.)?invidious\.zapashcanon\.fr/|
373 (?:www\.)?invidious\.kavin\.rocks/|
374 (?:www\.)?invidious\.tube/|
375 (?:www\.)?invidiou\.site/|
376 (?:www\.)?invidious\.site/|
377 (?:www\.)?invidious\.xyz/|
494d664e 378 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 379 (?:www\.)?invidious\.drycat\.fr/|
ba036333 380 (?:www\.)?tube\.poal\.co/|
29f7c58a 381 (?:www\.)?tube\.connect\.cafe/|
8ae113ca 382 (?:www\.)?vid\.wxzm\.sx/|
29f7c58a 383 (?:www\.)?vid\.mint\.lgbt/|
384bf91f 384 (?:www\.)?yewtu\.be/|
494d664e 385 (?:www\.)?yt\.elukerio\.org/|
894b3826 386 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 387 (?:www\.)?invidious\.ggc-project\.de/|
388 (?:www\.)?yt\.maisputain\.ovh/|
1db5ab6b 389 (?:www\.)?invidious\.toot\.koeln/|
390 (?:www\.)?invidious\.fdn\.fr/|
391 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 392 (?:www\.)?kgg2m7yk5aybusll\.onion/|
393 (?:www\.)?qklhadlycap4cnod\.onion/|
394 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
395 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
396 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
397 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 398 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 399 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
ac7553d0 403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 404 |(?: # or the v= param in all its forms
f7000f3a 405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 406 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
408 v=
409 )
f4b05232 410 ))
cbaed4bb
S
411 |(?:
412 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 415 )/
edb53e2d 416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 417 )
c5e8d7af 418 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 419 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
420 (?!.*?\blist=
421 (?:
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
424 )
425 )
c5e8d7af 426 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
e40c758c 428 _PLAYER_INFO_RE = (
cc2db878 429 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
430 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 431 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 432 )
2c62dc26 433 _formats = {
c2d3cb4c 434 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
435 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
436 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
437 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
438 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
439 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
440 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
441 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 442 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 443 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
444 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
445 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
446 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
447 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
448 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 449 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 450 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 452
453
454 # 3D videos
c2d3cb4c 455 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
456 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
457 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
458 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 459 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
460 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
461 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 462
96fb5605 463 # Apple HTTP Live Streaming
11f12195 464 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 465 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
466 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
467 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
468 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
469 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 470 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
471 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
472
473 # DASH mp4 video
d23028a8
S
474 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 479 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
480 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
482 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
484 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
485 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 486
f6f1fc92 487 # Dash mp4 audio
d23028a8
S
488 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
489 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
490 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
491 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
492 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
493 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
494 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
495
496 # Dash webm
d23028a8
S
497 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
504 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 512 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
513 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
515 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
519
520 # Dash webm audio
d23028a8
S
521 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
522 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 523
0857baad 524 # Dash webm audio with opus inside
d23028a8
S
525 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
526 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
527 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 528
ce6b9a2d
PH
529 # RTMP (unnamed)
530 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
531
532 # av01 video only formats sometimes served with "unknown" codecs
533 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 537 }
29f7c58a 538 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 539
fd5c4aab
S
540 _GEO_BYPASS = False
541
78caa52a 542 IE_NAME = 'youtube'
2eb88d95
PH
543 _TESTS = [
544 {
2d3d2997 545 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
546 'info_dict': {
547 'id': 'BaW_jenozKc',
548 'ext': 'mp4',
3867038a 549 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
550 'uploader': 'Philipp Hagemeister',
551 'uploader_id': 'phihag',
ec85ded8 552 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
553 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
554 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 555 'upload_date': '20121002',
3867038a 556 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 557 'categories': ['Science & Technology'],
3867038a 558 'tags': ['youtube-dl'],
556dbe7f 559 'duration': 10,
dbdaaa23 560 'view_count': int,
3e7c1224
PH
561 'like_count': int,
562 'dislike_count': int,
7c80519c 563 'start_time': 1,
297a564b 564 'end_time': 9,
2eb88d95 565 }
0e853ca4 566 },
fccd3771 567 {
4bc3a23e
PH
568 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
569 'note': 'Embed-only video (#1746)',
570 'info_dict': {
571 'id': 'yZIXLfi8CZQ',
572 'ext': 'mp4',
573 'upload_date': '20120608',
574 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
575 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
576 'uploader': 'SET India',
94bfcd23 577 'uploader_id': 'setindia',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 579 'age_limit': 18,
545cc85d 580 },
581 'skip': 'Private video',
fccd3771 582 },
11b56058 583 {
8bdd16b4 584 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
585 'note': 'Use the first video ID in the URL',
586 'info_dict': {
587 'id': 'BaW_jenozKc',
588 'ext': 'mp4',
3867038a 589 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
590 'uploader': 'Philipp Hagemeister',
591 'uploader_id': 'phihag',
ec85ded8 592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 593 'upload_date': '20121002',
3867038a 594 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 595 'categories': ['Science & Technology'],
3867038a 596 'tags': ['youtube-dl'],
556dbe7f 597 'duration': 10,
dbdaaa23 598 'view_count': int,
11b56058
PM
599 'like_count': int,
600 'dislike_count': int,
34a7de29
S
601 },
602 'params': {
603 'skip_download': True,
604 },
11b56058 605 },
dd27fd17 606 {
2d3d2997 607 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
608 'note': '256k DASH audio (format 141) via DASH manifest',
609 'info_dict': {
610 'id': 'a9LDPn-MO4I',
611 'ext': 'm4a',
612 'upload_date': '20121002',
613 'uploader_id': '8KVIDEO',
ec85ded8 614 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
615 'description': '',
616 'uploader': '8KVIDEO',
617 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 618 },
4bc3a23e
PH
619 'params': {
620 'youtube_include_dash_manifest': True,
621 'format': '141',
4919603f 622 },
de3c7fe0 623 'skip': 'format 141 not served anymore',
dd27fd17 624 },
8bdd16b4 625 # DASH manifest with encrypted signature
626 {
627 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
628 'info_dict': {
629 'id': 'IB3lcPjvWLA',
630 'ext': 'm4a',
631 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
632 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
633 'duration': 244,
634 'uploader': 'AfrojackVEVO',
635 'uploader_id': 'AfrojackVEVO',
636 'upload_date': '20131011',
cc2db878 637 'abr': 129.495,
8bdd16b4 638 },
639 'params': {
640 'youtube_include_dash_manifest': True,
641 'format': '141/bestaudio[ext=m4a]',
642 },
643 },
aa79ac0c
PH
644 # Controversy video
645 {
646 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
647 'info_dict': {
648 'id': 'T4XJQO3qol8',
649 'ext': 'mp4',
556dbe7f 650 'duration': 219,
aa79ac0c 651 'upload_date': '20100909',
4fe54c12 652 'uploader': 'Amazing Atheist',
aa79ac0c 653 'uploader_id': 'TheAmazingAtheist',
ec85ded8 654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 655 'title': 'Burning Everyone\'s Koran',
545cc85d 656 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 657 }
c522adb1 658 },
dd2d55f1 659 # Normal age-gate video (embed allowed)
c522adb1 660 {
2d3d2997 661 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
662 'info_dict': {
663 'id': 'HtVdAasjOgU',
664 'ext': 'mp4',
665 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 666 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 667 'duration': 142,
c522adb1
JMF
668 'uploader': 'The Witcher',
669 'uploader_id': 'WitcherGame',
ec85ded8 670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 671 'upload_date': '20140605',
34952f09 672 'age_limit': 18,
c522adb1
JMF
673 },
674 },
8bdd16b4 675 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
676 # YouTube Red ad is not captured for creator
677 {
678 'url': '__2ABJjxzNo',
679 'info_dict': {
680 'id': '__2ABJjxzNo',
681 'ext': 'mp4',
682 'duration': 266,
683 'upload_date': '20100430',
684 'uploader_id': 'deadmau5',
685 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 686 'creator': 'deadmau5',
687 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 688 'uploader': 'deadmau5',
689 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 690 'alt_title': 'Some Chords',
8bdd16b4 691 },
692 'expected_warnings': [
693 'DASH manifest missing',
694 ]
695 },
067aa17e 696 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
697 {
698 'url': 'lqQg6PlCWgI',
699 'info_dict': {
700 'id': 'lqQg6PlCWgI',
701 'ext': 'mp4',
556dbe7f 702 'duration': 6085,
90227264 703 'upload_date': '20150827',
cbe2bd91 704 'uploader_id': 'olympic',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 706 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 707 'uploader': 'Olympic',
cbe2bd91
PH
708 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
709 },
710 'params': {
711 'skip_download': 'requires avconv',
e52a40ab 712 }
cbe2bd91 713 },
6271f1ca
PH
714 # Non-square pixels
715 {
716 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
717 'info_dict': {
718 'id': '_b-2C3KPAM0',
719 'ext': 'mp4',
720 'stretched_ratio': 16 / 9.,
556dbe7f 721 'duration': 85,
6271f1ca
PH
722 'upload_date': '20110310',
723 'uploader_id': 'AllenMeow',
ec85ded8 724 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 725 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 726 'uploader': '孫ᄋᄅ',
6271f1ca
PH
727 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
728 },
06b491eb
S
729 },
730 # url_encoded_fmt_stream_map is empty string
731 {
732 'url': 'qEJwOuvDf7I',
733 'info_dict': {
734 'id': 'qEJwOuvDf7I',
f57b7835 735 'ext': 'webm',
06b491eb
S
736 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
737 'description': '',
738 'upload_date': '20150404',
739 'uploader_id': 'spbelect',
740 'uploader': 'Наблюдатели Петербурга',
741 },
742 'params': {
743 'skip_download': 'requires avconv',
e323cf3f
S
744 },
745 'skip': 'This live event has ended.',
06b491eb 746 },
067aa17e 747 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
748 {
749 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
750 'info_dict': {
751 'id': 'FIl7x6_3R5Y',
eb6793ba 752 'ext': 'webm',
da77d856
S
753 'title': 'md5:7b81415841e02ecd4313668cde88737a',
754 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 755 'duration': 220,
da77d856
S
756 'upload_date': '20150625',
757 'uploader_id': 'dorappi2000',
ec85ded8 758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 759 'uploader': 'dorappi2000',
eb6793ba 760 'formats': 'mincount:31',
da77d856 761 },
eb6793ba 762 'skip': 'not actual anymore',
2ee8f5d8 763 },
8a1a26ce
YCH
764 # DASH manifest with segment_list
765 {
766 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
767 'md5': '8ce563a1d667b599d21064e982ab9e31',
768 'info_dict': {
769 'id': 'CsmdDsKjzN8',
770 'ext': 'mp4',
17ee98e1 771 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
772 'uploader': 'Airtek',
773 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
774 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
775 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
776 },
777 'params': {
778 'youtube_include_dash_manifest': True,
779 'format': '135', # bestvideo
be49068d
S
780 },
781 'skip': 'This live event has ended.',
2ee8f5d8 782 },
cf7e015f
S
783 {
784 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 785 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 786 'info_dict': {
545cc85d 787 'id': 'jvGDaLqkpTg',
788 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
789 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
790 },
791 'playlist': [{
792 'info_dict': {
545cc85d 793 'id': 'jvGDaLqkpTg',
cf7e015f 794 'ext': 'mp4',
545cc85d 795 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
796 'description': 'md5:e03b909557865076822aa169218d6a5d',
797 'duration': 10643,
798 'upload_date': '20161111',
799 'uploader': 'Team PGP',
800 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
801 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
802 },
803 }, {
804 'info_dict': {
545cc85d 805 'id': '3AKt1R1aDnw',
cf7e015f 806 'ext': 'mp4',
545cc85d 807 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
808 'description': 'md5:e03b909557865076822aa169218d6a5d',
809 'duration': 10991,
810 'upload_date': '20161111',
811 'uploader': 'Team PGP',
812 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
545cc85d 817 'id': 'RtAMM00gpVc',
cf7e015f 818 'ext': 'mp4',
545cc85d 819 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
820 'description': 'md5:e03b909557865076822aa169218d6a5d',
821 'duration': 10995,
822 'upload_date': '20161111',
823 'uploader': 'Team PGP',
824 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
826 },
827 }, {
828 'info_dict': {
545cc85d 829 'id': '6N2fdlP3C5U',
cf7e015f 830 'ext': 'mp4',
545cc85d 831 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
832 'description': 'md5:e03b909557865076822aa169218d6a5d',
833 'duration': 10990,
834 'upload_date': '20161111',
835 'uploader': 'Team PGP',
836 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
838 },
839 }],
840 'params': {
841 'skip_download': True,
842 },
cbaed4bb 843 },
f9f49d87 844 {
067aa17e 845 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
846 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
847 'info_dict': {
848 'id': 'gVfLd0zydlo',
849 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
850 },
851 'playlist_count': 2,
be49068d 852 'skip': 'Not multifeed anymore',
f9f49d87 853 },
cbaed4bb 854 {
2d3d2997 855 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 856 'only_matching': True,
0e49d9a6 857 },
6d4fc66b 858 {
2d3d2997 859 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
860 'only_matching': True,
861 },
0e49d9a6 862 {
067aa17e 863 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 864 # Also tests cut-off URL expansion in video description (see
067aa17e
S
865 # https://github.com/ytdl-org/youtube-dl/issues/1892,
866 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
867 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
868 'info_dict': {
869 'id': 'lsguqyKfVQg',
870 'ext': 'mp4',
871 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 872 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 873 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 874 'duration': 133,
0e49d9a6
LL
875 'upload_date': '20151119',
876 'uploader_id': 'IronSoulElf',
ec85ded8 877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 878 'uploader': 'IronSoulElf',
eb6793ba
S
879 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
880 'track': 'Dark Walk - Position Music',
881 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 882 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
883 },
884 'params': {
885 'skip_download': True,
886 },
887 },
61f92af1 888 {
067aa17e 889 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
890 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
891 'only_matching': True,
892 },
313dfc45
LL
893 {
894 # Video with yt:stretch=17:0
895 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
896 'info_dict': {
897 'id': 'Q39EVAstoRM',
898 'ext': 'mp4',
899 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
900 'description': 'md5:ee18a25c350637c8faff806845bddee9',
901 'upload_date': '20151107',
902 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
903 'uploader': 'CH GAMER DROID',
904 },
905 'params': {
906 'skip_download': True,
907 },
be49068d 908 'skip': 'This video does not exist.',
313dfc45 909 },
7caf9830
S
910 {
911 # Video licensed under Creative Commons
912 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
913 'info_dict': {
914 'id': 'M4gD1WSo5mA',
915 'ext': 'mp4',
916 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
917 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 918 'duration': 721,
7caf9830
S
919 'upload_date': '20150127',
920 'uploader_id': 'BerkmanCenter',
ec85ded8 921 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 922 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
923 'license': 'Creative Commons Attribution license (reuse allowed)',
924 },
925 'params': {
926 'skip_download': True,
927 },
928 },
fd050249
S
929 {
930 # Channel-like uploader_url
931 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
932 'info_dict': {
933 'id': 'eQcmzGIKrzg',
934 'ext': 'mp4',
935 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 936 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 937 'duration': 4060,
fd050249 938 'upload_date': '20151119',
eb6793ba 939 'uploader': 'Bernie Sanders',
fd050249 940 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 941 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
942 'license': 'Creative Commons Attribution license (reuse allowed)',
943 },
944 'params': {
945 'skip_download': True,
946 },
947 },
040ac686
S
948 {
949 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
950 'only_matching': True,
7f29cf54
S
951 },
952 {
067aa17e 953 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
954 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
955 'only_matching': True,
6496ccb4
S
956 },
957 {
958 # Rental video preview
959 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
960 'info_dict': {
961 'id': 'uGpuVWrhIzE',
962 'ext': 'mp4',
963 'title': 'Piku - Trailer',
964 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
965 'upload_date': '20150811',
966 'uploader': 'FlixMatrix',
967 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 968 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
969 'license': 'Standard YouTube License',
970 },
971 'params': {
972 'skip_download': True,
973 },
eb6793ba 974 'skip': 'This video is not available.',
022a5d66 975 },
12afdc2a
S
976 {
977 # YouTube Red video with episode data
978 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
979 'info_dict': {
980 'id': 'iqKdEhx-dD4',
981 'ext': 'mp4',
982 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 983 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 984 'duration': 2085,
12afdc2a
S
985 'upload_date': '20170118',
986 'uploader': 'Vsauce',
987 'uploader_id': 'Vsauce',
988 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
989 'series': 'Mind Field',
990 'season_number': 1,
991 'episode_number': 1,
992 },
993 'params': {
994 'skip_download': True,
995 },
996 'expected_warnings': [
997 'Skipping DASH manifest',
998 ],
999 },
c7121fa7
S
1000 {
1001 # The following content has been identified by the YouTube community
1002 # as inappropriate or offensive to some audiences.
1003 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1004 'info_dict': {
1005 'id': '6SJNVb0GnPI',
1006 'ext': 'mp4',
1007 'title': 'Race Differences in Intelligence',
1008 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1009 'duration': 965,
1010 'upload_date': '20140124',
1011 'uploader': 'New Century Foundation',
1012 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1013 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1014 },
1015 'params': {
1016 'skip_download': True,
1017 },
545cc85d 1018 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1019 },
022a5d66
S
1020 {
1021 # itag 212
1022 'url': '1t24XAntNCY',
1023 'only_matching': True,
fd5c4aab
S
1024 },
1025 {
1026 # geo restricted to JP
1027 'url': 'sJL6WA-aGkQ',
1028 'only_matching': True,
1029 },
cd5a74a2
S
1030 {
1031 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1032 'only_matching': True,
1033 },
825cd268
RA
1034 {
1035 # DRM protected
1036 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1037 'only_matching': True,
4fe54c12
S
1038 },
1039 {
1040 # Video with unsupported adaptive stream type formats
1041 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1042 'info_dict': {
1043 'id': 'Z4Vy8R84T1U',
1044 'ext': 'mp4',
1045 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1046 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1047 'duration': 433,
1048 'upload_date': '20130923',
1049 'uploader': 'Amelia Putri Harwita',
1050 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1051 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1052 'formats': 'maxcount:10',
1053 },
1054 'params': {
1055 'skip_download': True,
1056 'youtube_include_dash_manifest': False,
1057 },
5429d6a9 1058 'skip': 'not actual anymore',
5caabd3c 1059 },
1060 {
822b9d9c 1061 # Youtube Music Auto-generated description
5caabd3c 1062 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1063 'info_dict': {
1064 'id': 'MgNrAu2pzNs',
1065 'ext': 'mp4',
1066 'title': 'Voyeur Girl',
1067 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1068 'upload_date': '20190312',
5429d6a9
S
1069 'uploader': 'Stephen - Topic',
1070 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1071 'artist': 'Stephen',
1072 'track': 'Voyeur Girl',
1073 'album': 'it\'s too much love to know my dear',
1074 'release_date': '20190313',
1075 'release_year': 2019,
1076 },
1077 'params': {
1078 'skip_download': True,
1079 },
1080 },
66b48727
RA
1081 {
1082 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1083 'only_matching': True,
1084 },
011e75e6
S
1085 {
1086 # invalid -> valid video id redirection
1087 'url': 'DJztXj2GPfl',
1088 'info_dict': {
1089 'id': 'DJztXj2GPfk',
1090 'ext': 'mp4',
1091 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1092 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1093 'upload_date': '20090125',
1094 'uploader': 'Prochorowka',
1095 'uploader_id': 'Prochorowka',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1097 'artist': 'Panjabi MC',
1098 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1099 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
545cc85d 1104 'skip': 'Video unavailable',
ea74e00b
DP
1105 },
1106 {
1107 # empty description results in an empty string
1108 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1109 'info_dict': {
1110 'id': 'x41yOUIvK2k',
1111 'ext': 'mp4',
1112 'title': 'IMG 3456',
1113 'description': '',
1114 'upload_date': '20170613',
1115 'uploader_id': 'ElevageOrVert',
1116 'uploader': 'ElevageOrVert',
1117 },
1118 'params': {
1119 'skip_download': True,
1120 },
1121 },
a0566bbf 1122 {
29f7c58a 1123 # with '};' inside yt initial data (see [1])
1124 # see [2] for an example with '};' inside ytInitialPlayerResponse
1125 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1126 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1127 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1128 'info_dict': {
1129 'id': 'CHqg6qOn4no',
1130 'ext': 'mp4',
1131 'title': 'Part 77 Sort a list of simple types in c#',
1132 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1133 'upload_date': '20130831',
1134 'uploader_id': 'kudvenkat',
1135 'uploader': 'kudvenkat',
1136 },
1137 'params': {
1138 'skip_download': True,
1139 },
1140 },
29f7c58a 1141 {
1142 # another example of '};' in ytInitialData
1143 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1144 'only_matching': True,
1145 },
1146 {
1147 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1148 'only_matching': True,
1149 },
545cc85d 1150 {
cc2db878 1151 # https://github.com/ytdl-org/youtube-dl/pull/28094
1152 'url': 'OtqTfy26tG0',
1153 'info_dict': {
1154 'id': 'OtqTfy26tG0',
1155 'ext': 'mp4',
1156 'title': 'Burn Out',
1157 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1158 'upload_date': '20141120',
1159 'uploader': 'The Cinematic Orchestra - Topic',
1160 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1161 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1162 'artist': 'The Cinematic Orchestra',
1163 'track': 'Burn Out',
1164 'album': 'Every Day',
1165 'release_data': None,
1166 'release_year': None,
1167 },
1168 'params': {
1169 'skip_download': True,
1170 },
545cc85d 1171 },
2eb88d95
PH
1172 ]
1173
e0df6211
PH
1174 def __init__(self, *args, **kwargs):
1175 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1176 self._code_cache = {}
83799698 1177 self._player_cache = {}
e0df6211 1178
60064c53
PH
1179 def _signature_cache_id(self, example_sig):
1180 """ Return a string representation of a signature """
78caa52a 1181 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1182
e40c758c
S
1183 @classmethod
1184 def _extract_player_info(cls, player_url):
1185 for player_re in cls._PLAYER_INFO_RE:
1186 id_m = re.search(player_re, player_url)
1187 if id_m:
1188 break
1189 else:
c081b35c 1190 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1191 return id_m.group('id')
e40c758c
S
1192
1193 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1194 player_id = self._extract_player_info(player_url)
e0df6211 1195
c4417ddb 1196 # Read from filesystem cache
545cc85d 1197 func_id = 'js_%s_%s' % (
1198 player_id, self._signature_cache_id(example_sig))
c4417ddb 1199 assert os.path.basename(func_id) == func_id
a0e07d31 1200
69ea8ca4 1201 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1202 if cache_spec is not None:
78caa52a 1203 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1204
545cc85d 1205 if player_id not in self._code_cache:
1206 self._code_cache[player_id] = self._download_webpage(
e0df6211 1207 player_url, video_id,
545cc85d 1208 note='Downloading player ' + player_id,
69ea8ca4 1209 errnote='Download of %s failed' % player_url)
545cc85d 1210 code = self._code_cache[player_id]
1211 res = self._parse_sig_js(code)
e0df6211 1212
785521bf
PH
1213 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1214 cache_res = res(test_string)
1215 cache_spec = [ord(c) for c in cache_res]
83799698 1216
69ea8ca4 1217 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1218 return res
1219
60064c53 1220 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1221 def gen_sig_code(idxs):
1222 def _genslice(start, end, step):
78caa52a 1223 starts = '' if start == 0 else str(start)
8bcc8756 1224 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1225 steps = '' if step == 1 else (':%d' % step)
78caa52a 1226 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1227
1228 step = None
7af808a5
PH
1229 # Quelch pyflakes warnings - start will be set when step is set
1230 start = '(Never used)'
edf3e38e
PH
1231 for i, prev in zip(idxs[1:], idxs[:-1]):
1232 if step is not None:
1233 if i - prev == step:
1234 continue
1235 yield _genslice(start, prev, step)
1236 step = None
1237 continue
1238 if i - prev in [-1, 1]:
1239 step = i - prev
1240 start = prev
1241 continue
1242 else:
78caa52a 1243 yield 's[%d]' % prev
edf3e38e 1244 if step is None:
78caa52a 1245 yield 's[%d]' % i
edf3e38e
PH
1246 else:
1247 yield _genslice(start, i, step)
1248
78caa52a 1249 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1250 cache_res = func(test_string)
edf3e38e 1251 cache_spec = [ord(c) for c in cache_res]
78caa52a 1252 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1253 signature_id_tuple = '(%s)' % (
1254 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1255 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1256 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1257 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1258
e0df6211
PH
1259 def _parse_sig_js(self, jscode):
1260 funcname = self._search_regex(
abefc03f
S
1261 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1263 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1264 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1265 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1266 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1267 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1268 # Obsolete patterns
1269 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1270 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1271 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1272 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1273 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1274 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1275 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1276 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1277 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1278
1279 jsi = JSInterpreter(jscode)
1280 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1281 return lambda s: initial_function([s])
1282
545cc85d 1283 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1284 """Turn the encrypted s field into a working signature"""
6b37f0be 1285
c8bf86d5 1286 if player_url is None:
69ea8ca4 1287 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1288
69ea8ca4 1289 if player_url.startswith('//'):
78caa52a 1290 player_url = 'https:' + player_url
3c90cc8b
S
1291 elif not re.match(r'https?://', player_url):
1292 player_url = compat_urlparse.urljoin(
1293 'https://www.youtube.com', player_url)
c8bf86d5 1294 try:
62af3a0e 1295 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1296 if player_id not in self._player_cache:
1297 func = self._extract_signature_function(
60064c53 1298 video_id, player_url, s
c8bf86d5
PH
1299 )
1300 self._player_cache[player_id] = func
1301 func = self._player_cache[player_id]
1302 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1303 self._print_sig_code(func, s)
c8bf86d5
PH
1304 return func(s)
1305 except Exception as e:
1306 tb = traceback.format_exc()
1307 raise ExtractorError(
78caa52a 1308 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1309
545cc85d 1310 def _mark_watched(self, video_id, player_response):
21c340b8
S
1311 playback_url = url_or_none(try_get(
1312 player_response,
545cc85d 1313 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1314 if not playback_url:
1315 return
1316 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1317 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1318
1319 # cpn generation algorithm is reverse engineered from base.js.
1320 # In fact it works even with dummy cpn.
1321 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1322 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1323
1324 qs.update({
1325 'ver': ['2'],
1326 'cpn': [cpn],
1327 })
1328 playback_url = compat_urlparse.urlunparse(
15707c7e 1329 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1330
1331 self._download_webpage(
1332 playback_url, video_id, 'Marking watched',
1333 'Unable to mark watched', fatal=False)
1334
66c9fa36
S
1335 @staticmethod
1336 def _extract_urls(webpage):
1337 # Embedded YouTube player
1338 entries = [
1339 unescapeHTML(mobj.group('url'))
1340 for mobj in re.finditer(r'''(?x)
1341 (?:
1342 <iframe[^>]+?src=|
1343 data-video-url=|
1344 <embed[^>]+?src=|
1345 embedSWF\(?:\s*|
1346 <object[^>]+data=|
1347 new\s+SWFObject\(
1348 )
1349 (["\'])
1350 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1351 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1352 \1''', webpage)]
1353
1354 # lazyYT YouTube embed
1355 entries.extend(list(map(
1356 unescapeHTML,
1357 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1358
1359 # Wordpress "YouTube Video Importer" plugin
1360 matches = re.findall(r'''(?x)<div[^>]+
1361 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1362 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1363 entries.extend(m[-1] for m in matches)
1364
1365 return entries
1366
1367 @staticmethod
1368 def _extract_url(webpage):
1369 urls = YoutubeIE._extract_urls(webpage)
1370 return urls[0] if urls else None
1371
97665381
PH
1372 @classmethod
1373 def extract_id(cls, url):
1374 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1375 if mobj is None:
69ea8ca4 1376 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1377 video_id = mobj.group(2)
1378 return video_id
1379
545cc85d 1380 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1381 chapters_list = try_get(
8bdd16b4 1382 data,
84213ea8
S
1383 lambda x: x['playerOverlays']
1384 ['playerOverlayRenderer']
1385 ['decoratedPlayerBarRenderer']
1386 ['decoratedPlayerBarRenderer']
1387 ['playerBar']
1388 ['chapteredPlayerBarRenderer']
1389 ['chapters'],
1390 list)
1391 if not chapters_list:
1392 return
1393
1394 def chapter_time(chapter):
1395 return float_or_none(
1396 try_get(
1397 chapter,
1398 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1399 int),
1400 scale=1000)
1401 chapters = []
1402 for next_num, chapter in enumerate(chapters_list, start=1):
1403 start_time = chapter_time(chapter)
1404 if start_time is None:
1405 continue
1406 end_time = (chapter_time(chapters_list[next_num])
1407 if next_num < len(chapters_list) else duration)
1408 if end_time is None:
1409 continue
1410 title = try_get(
1411 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1412 compat_str)
1413 chapters.append({
1414 'start_time': start_time,
1415 'end_time': end_time,
1416 'title': title,
1417 })
1418 return chapters
1419
545cc85d 1420 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1421 return self._parse_json(self._search_regex(
1422 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1423 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1424
c5e8d7af 1425 def _real_extract(self, url):
cf7e015f 1426 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1427 video_id = self._match_id(url)
1428 base_url = self.http_scheme() + '//www.youtube.com/'
a718ef84 1429 webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1'
545cc85d 1430 webpage = self._download_webpage(webpage_url, video_id, fatal=False)
1431
1432 player_response = None
1433 if webpage:
1434 player_response = self._extract_yt_initial_variable(
1435 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1436 video_id, 'initial player response')
1437 if not player_response:
1438 player_response = self._call_api(
1439 'player', {'videoId': video_id}, video_id)
1440
1441 playability_status = player_response.get('playabilityStatus') or {}
1442 if playability_status.get('reason') == 'Sign in to confirm your age':
1443 pr = self._parse_json(try_get(compat_parse_qs(
1444 self._download_webpage(
1445 base_url + 'get_video_info', video_id,
1446 'Refetching age-gated info webpage',
1447 'unable to download video info webpage', query={
1448 'video_id': video_id,
1449 'eurl': 'https://www.youtube.com/embed/' + video_id,
1450 }, fatal=False)),
1451 lambda x: x['player_response'][0],
1452 compat_str) or '{}', video_id)
1453 if pr:
1454 player_response = pr
1455
1456 trailer_video_id = try_get(
1457 playability_status,
1458 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1459 compat_str)
1460 if trailer_video_id:
1461 return self.url_result(
1462 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1463
545cc85d 1464 def get_text(x):
1465 if not x:
c2d125d9 1466 return
545cc85d 1467 return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
15be3eb5 1468
545cc85d 1469 search_meta = (
1470 lambda x: self._html_search_meta(x, webpage, default=None)) \
1471 if webpage else lambda x: None
dbdaaa23 1472
545cc85d 1473 video_details = player_response.get('videoDetails') or {}
37357d21 1474 microformat = try_get(
545cc85d 1475 player_response,
1476 lambda x: x['microformat']['playerMicroformatRenderer'],
1477 dict) or {}
1478 video_title = video_details.get('title') \
1479 or get_text(microformat.get('title')) \
1480 or search_meta(['og:title', 'twitter:title', 'title'])
1481 video_description = video_details.get('shortDescription')
cf7e015f 1482
8fe10494 1483 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1484 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1485 multifeed_metadata_list = try_get(
1486 player_response,
1487 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1488 compat_str)
8fe10494
S
1489 if multifeed_metadata_list:
1490 entries = []
1491 feed_ids = []
1492 for feed in multifeed_metadata_list.split(','):
1493 # Unquote should take place before split on comma (,) since textual
1494 # fields may contain comma as well (see
067aa17e 1495 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1496 feed_data = compat_parse_qs(
1497 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1498
1499 def feed_entry(name):
545cc85d 1500 return try_get(
1501 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1502
1503 feed_id = feed_entry('id')
1504 if not feed_id:
1505 continue
1506 feed_title = feed_entry('title')
1507 title = video_title
1508 if feed_title:
1509 title += ' (%s)' % feed_title
8fe10494
S
1510 entries.append({
1511 '_type': 'url_transparent',
1512 'ie_key': 'Youtube',
1513 'url': smuggle_url(
545cc85d 1514 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1515 {'force_singlefeed': True}),
6b09401b 1516 'title': title,
8fe10494 1517 })
6b09401b 1518 feed_ids.append(feed_id)
8fe10494
S
1519 self.to_screen(
1520 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1521 % (', '.join(feed_ids), video_id))
545cc85d 1522 return self.playlist_result(
1523 entries, video_id, video_title, video_description)
8fe10494
S
1524 else:
1525 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1526
545cc85d 1527 formats = []
1528 itags = []
cc2db878 1529 itag_qualities = {}
545cc85d 1530 player_url = None
cc2db878 1531 # TODO: Enable this after fixing formatSort
8a784c74 1532 # q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1533 streaming_data = player_response.get('streamingData') or {}
1534 streaming_formats = streaming_data.get('formats') or []
1535 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1536 for fmt in streaming_formats:
1537 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1538 continue
321bf820 1539
cc2db878 1540 itag = str_or_none(fmt.get('itag'))
1541 quality = fmt.get('quality')
1542 if itag and quality:
1543 itag_qualities[itag] = quality
1544 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1545 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1546 # number of fragment that would subsequently requested with (`&sq=N`)
1547 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1548 continue
1549
545cc85d 1550 fmt_url = fmt.get('url')
1551 if not fmt_url:
1552 sc = compat_parse_qs(fmt.get('signatureCipher'))
1553 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1554 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1555 if not (sc and fmt_url and encrypted_sig):
1556 continue
1557 if not player_url:
1558 if not webpage:
1559 continue
1560 player_url = self._search_regex(
1561 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1562 webpage, 'player URL', fatal=False)
1563 if not player_url:
201e9eaa 1564 continue
545cc85d 1565 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1566 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1567 fmt_url += '&' + sp + '=' + signature
1568
545cc85d 1569 if itag:
1570 itags.append(itag)
cc2db878 1571 tbr = float_or_none(
1572 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1573 dct = {
1574 'asr': int_or_none(fmt.get('audioSampleRate')),
1575 'filesize': int_or_none(fmt.get('contentLength')),
1576 'format_id': itag,
1577 'format_note': fmt.get('qualityLabel') or quality,
1578 'fps': int_or_none(fmt.get('fps')),
1579 'height': int_or_none(fmt.get('height')),
cc2db878 1580 # 'quality': q(quality), # TODO: Enable this after fixing formatSort
1581 'tbr': tbr,
545cc85d 1582 'url': fmt_url,
1583 'width': fmt.get('width'),
1584 }
1585 mimetype = fmt.get('mimeType')
1586 if mimetype:
1587 mobj = re.match(
1588 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1589 if mobj:
1590 dct['ext'] = mimetype2ext(mobj.group(1))
1591 dct.update(parse_codecs(mobj.group(2)))
cc2db878 1592 no_audio = dct.get('acodec') == 'none'
1593 no_video = dct.get('vcodec') == 'none'
1594 if no_audio:
1595 dct['vbr'] = tbr
1596 if no_video:
1597 dct['abr'] = tbr
1598 if no_audio or no_video:
545cc85d 1599 dct['downloader_options'] = {
1600 # Youtube throttles chunks >~10M
1601 'http_chunk_size': 10485760,
bf1317d2 1602 }
545cc85d 1603 formats.append(dct)
1604
1605 hls_manifest_url = streaming_data.get('hlsManifestUrl')
1606 if hls_manifest_url:
1607 for f in self._extract_m3u8_formats(
1608 hls_manifest_url, video_id, 'mp4', fatal=False):
1609 itag = self._search_regex(
1610 r'/itag/(\d+)', f['url'], 'itag', default=None)
1611 if itag:
1612 f['format_id'] = itag
1613 formats.append(f)
1614
1615 if self._downloader.params.get('youtube_include_dash_manifest'):
1616 dash_manifest_url = streaming_data.get('dashManifestUrl')
1617 if dash_manifest_url:
545cc85d 1618 for f in self._extract_mpd_formats(
1619 dash_manifest_url, video_id, fatal=False):
cc2db878 1620 itag = f['format_id']
1621 if itag in itags:
1622 continue
1623 # if itag in itag_qualities: # TODO: Enable this after fixing formatSort
1624 # f['quality'] = q(itag_qualities[itag])
545cc85d 1625 filesize = int_or_none(self._search_regex(
1626 r'/clen/(\d+)', f.get('fragment_base_url')
1627 or f['url'], 'file size', default=None))
1628 if filesize:
1629 f['filesize'] = filesize
cc2db878 1630 formats.append(f)
bf1317d2 1631
545cc85d 1632 if not formats:
63ad4d43 1633 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 1634 raise ExtractorError(
1635 'This video is DRM protected.', expected=True)
1636 pemr = try_get(
1637 playability_status,
1638 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
1639 dict) or {}
1640 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
1641 subreason = pemr.get('subreason')
1642 if subreason:
1643 subreason = clean_html(get_text(subreason))
1644 if subreason == 'The uploader has not made this video available in your country.':
1645 countries = microformat.get('availableCountries')
1646 if not countries:
1647 regions_allowed = search_meta('regionsAllowed')
1648 countries = regions_allowed.split(',') if regions_allowed else None
1649 self.raise_geo_restricted(
1650 subreason, countries)
1651 reason += '\n' + subreason
1652 if reason:
1653 raise ExtractorError(reason, expected=True)
bf1317d2 1654
545cc85d 1655 self._sort_formats(formats)
bf1317d2 1656
545cc85d 1657 keywords = video_details.get('keywords') or []
1658 if not keywords and webpage:
1659 keywords = [
1660 unescapeHTML(m.group('content'))
1661 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
1662 for keyword in keywords:
1663 if keyword.startswith('yt:stretch='):
1664 w, h = keyword.split('=')[1].split(':')
1665 w, h = int(w), int(h)
1666 if w > 0 and h > 0:
1667 ratio = w / h
1668 for f in formats:
1669 if f.get('vcodec') != 'none':
1670 f['stretched_ratio'] = ratio
6449cd80 1671
545cc85d 1672 thumbnails = []
1673 for container in (video_details, microformat):
1674 for thumbnail in (try_get(
1675 container,
1676 lambda x: x['thumbnail']['thumbnails'], list) or []):
1677 thumbnail_url = thumbnail.get('url')
1678 if not thumbnail_url:
bf1317d2 1679 continue
545cc85d 1680 thumbnails.append({
1681 'height': int_or_none(thumbnail.get('height')),
1682 'url': thumbnail_url,
1683 'width': int_or_none(thumbnail.get('width')),
1684 })
1685 if thumbnails:
1686 break
a6211d23 1687 else:
545cc85d 1688 thumbnail = search_meta(['og:image', 'twitter:image'])
1689 if thumbnail:
1690 thumbnails = [{'url': thumbnail}]
1691
1692 category = microformat.get('category') or search_meta('genre')
1693 channel_id = video_details.get('channelId') \
1694 or microformat.get('externalChannelId') \
1695 or search_meta('channelId')
1696 duration = int_or_none(
1697 video_details.get('lengthSeconds')
1698 or microformat.get('lengthSeconds')) \
1699 or parse_duration(search_meta('duration'))
1700 is_live = video_details.get('isLive')
1701 owner_profile_url = microformat.get('ownerProfileUrl')
1702
1703 info = {
1704 'id': video_id,
1705 'title': self._live_title(video_title) if is_live else video_title,
1706 'formats': formats,
1707 'thumbnails': thumbnails,
1708 'description': video_description,
1709 'upload_date': unified_strdate(
1710 microformat.get('uploadDate')
1711 or search_meta('uploadDate')),
1712 'uploader': video_details['author'],
1713 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
1714 'uploader_url': owner_profile_url,
1715 'channel_id': channel_id,
1716 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
1717 'duration': duration,
1718 'view_count': int_or_none(
1719 video_details.get('viewCount')
1720 or microformat.get('viewCount')
1721 or search_meta('interactionCount')),
1722 'average_rating': float_or_none(video_details.get('averageRating')),
1723 'age_limit': 18 if (
1724 microformat.get('isFamilySafe') is False
1725 or search_meta('isFamilyFriendly') == 'false'
1726 or search_meta('og:restrictions:age') == '18+') else 0,
1727 'webpage_url': webpage_url,
1728 'categories': [category] if category else None,
1729 'tags': keywords,
1730 'is_live': is_live,
1731 'playable_in_embed': playability_status.get('playableInEmbed'),
1732 }
b477fc13 1733
545cc85d 1734 pctr = try_get(
1735 player_response,
1736 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
1737 subtitles = {}
1738 if pctr:
1739 def process_language(container, base_url, lang_code, query):
1740 lang_subs = []
1741 for fmt in self._SUBTITLE_FORMATS:
1742 query.update({
1743 'fmt': fmt,
1744 })
1745 lang_subs.append({
1746 'ext': fmt,
1747 'url': update_url_query(base_url, query),
1748 })
1749 container[lang_code] = lang_subs
7e72694b 1750
545cc85d 1751 for caption_track in (pctr.get('captionTracks') or []):
1752 base_url = caption_track.get('baseUrl')
1753 if not base_url:
1754 continue
1755 if caption_track.get('kind') != 'asr':
1756 lang_code = caption_track.get('languageCode')
1757 if not lang_code:
1758 continue
1759 process_language(
1760 subtitles, base_url, lang_code, {})
1761 continue
1762 automatic_captions = {}
1763 for translation_language in (pctr.get('translationLanguages') or []):
1764 translation_language_code = translation_language.get('languageCode')
1765 if not translation_language_code:
1766 continue
1767 process_language(
1768 automatic_captions, base_url, translation_language_code,
1769 {'tlang': translation_language_code})
1770 info['automatic_captions'] = automatic_captions
1771 info['subtitles'] = subtitles
7e72694b 1772
545cc85d 1773 parsed_url = compat_urllib_parse_urlparse(url)
1774 for component in [parsed_url.fragment, parsed_url.query]:
1775 query = compat_parse_qs(component)
1776 for k, v in query.items():
1777 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
1778 d_k += '_time'
1779 if d_k not in info and k in s_ks:
1780 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
1781
1782 # Youtube Music Auto-generated description
822b9d9c 1783 if video_description:
38d70284 1784 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 1785 if mobj:
822b9d9c
RA
1786 release_year = mobj.group('release_year')
1787 release_date = mobj.group('release_date')
1788 if release_date:
1789 release_date = release_date.replace('-', '')
1790 if not release_year:
545cc85d 1791 release_year = release_date[:4]
1792 info.update({
1793 'album': mobj.group('album'.strip()),
1794 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
1795 'track': mobj.group('track').strip(),
1796 'release_date': release_date,
cc2db878 1797 'release_year': int_or_none(release_year),
545cc85d 1798 })
7e72694b 1799
545cc85d 1800 initial_data = None
1801 if webpage:
1802 initial_data = self._extract_yt_initial_variable(
1803 webpage, self._YT_INITIAL_DATA_RE, video_id,
1804 'yt initial data')
1805 if not initial_data:
1806 initial_data = self._call_api(
1807 'next', {'videoId': video_id}, video_id, fatal=False)
1808
1809 if not is_live:
1810 try:
1811 # This will error if there is no livechat
1812 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1813 info['subtitles']['live_chat'] = [{
1814 'video_id': video_id,
1815 'ext': 'json',
1816 'protocol': 'youtube_live_chat_replay',
1817 }]
1818 except (KeyError, IndexError, TypeError):
1819 pass
1820
1821 if initial_data:
1822 chapters = self._extract_chapters_from_json(
1823 initial_data, video_id, duration)
1824 if not chapters:
1825 for engagment_pannel in (initial_data.get('engagementPanels') or []):
1826 contents = try_get(
1827 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
1828 list)
1829 if not contents:
1830 continue
1831
1832 def chapter_time(mmlir):
1833 return parse_duration(
1834 get_text(mmlir.get('timeDescription')))
1835
1836 chapters = []
1837 for next_num, content in enumerate(contents, start=1):
1838 mmlir = content.get('macroMarkersListItemRenderer') or {}
1839 start_time = chapter_time(mmlir)
1840 end_time = chapter_time(try_get(
1841 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
1842 if next_num < len(contents) else duration
1843 if start_time is None or end_time is None:
1844 continue
1845 chapters.append({
1846 'start_time': start_time,
1847 'end_time': end_time,
1848 'title': get_text(mmlir.get('title')),
1849 })
1850 if chapters:
1851 break
1852 if chapters:
1853 info['chapters'] = chapters
1854
1855 contents = try_get(
1856 initial_data,
1857 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
1858 list) or []
1859 for content in contents:
1860 vpir = content.get('videoPrimaryInfoRenderer')
1861 if vpir:
1862 stl = vpir.get('superTitleLink')
1863 if stl:
1864 stl = get_text(stl)
1865 if try_get(
1866 vpir,
1867 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
1868 info['location'] = stl
1869 else:
1870 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
1871 if mobj:
1872 info.update({
1873 'series': mobj.group(1),
1874 'season_number': int(mobj.group(2)),
1875 'episode_number': int(mobj.group(3)),
1876 })
1877 for tlb in (try_get(
1878 vpir,
1879 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
1880 list) or []):
1881 tbr = tlb.get('toggleButtonRenderer') or {}
1882 for getter, regex in [(
1883 lambda x: x['defaultText']['accessibility']['accessibilityData'],
1884 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
1885 lambda x: x['accessibility'],
1886 lambda x: x['accessibilityData']['accessibilityData'],
1887 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
1888 label = (try_get(tbr, getter, dict) or {}).get('label')
1889 if label:
1890 mobj = re.match(regex, label)
1891 if mobj:
1892 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
1893 break
1894 sbr_tooltip = try_get(
1895 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
1896 if sbr_tooltip:
1897 like_count, dislike_count = sbr_tooltip.split(' / ')
1898 info.update({
1899 'like_count': str_to_int(like_count),
1900 'dislike_count': str_to_int(dislike_count),
1901 })
1902 vsir = content.get('videoSecondaryInfoRenderer')
1903 if vsir:
1904 info['channel'] = get_text(try_get(
1905 vsir,
1906 lambda x: x['owner']['videoOwnerRenderer']['title'],
1907 compat_str))
1908 rows = try_get(
1909 vsir,
1910 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
1911 list) or []
1912 multiple_songs = False
1913 for row in rows:
1914 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
1915 multiple_songs = True
1916 break
1917 for row in rows:
1918 mrr = row.get('metadataRowRenderer') or {}
1919 mrr_title = mrr.get('title')
1920 if not mrr_title:
1921 continue
1922 mrr_title = get_text(mrr['title'])
1923 mrr_contents_text = get_text(mrr['contents'][0])
1924 if mrr_title == 'License':
1925 info['license'] = mrr_contents_text
1926 elif not multiple_songs:
1927 if mrr_title == 'Album':
1928 info['album'] = mrr_contents_text
1929 elif mrr_title == 'Artist':
1930 info['artist'] = mrr_contents_text
1931 elif mrr_title == 'Song':
1932 info['track'] = mrr_contents_text
1933
1934 fallbacks = {
1935 'channel': 'uploader',
1936 'channel_id': 'uploader_id',
1937 'channel_url': 'uploader_url',
1938 }
1939 for to, frm in fallbacks.items():
1940 if not info.get(to):
1941 info[to] = info.get(frm)
1942
1943 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
1944 v = info.get(s_k)
1945 if v:
1946 info[d_k] = v
b84071c0 1947
06167fbb 1948 # get xsrf for annotations or comments
1949 get_annotations = self._downloader.params.get('writeannotations', False)
1950 get_comments = self._downloader.params.get('getcomments', False)
1951 if get_annotations or get_comments:
29f7c58a 1952 xsrf_token = None
545cc85d 1953 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 1954 if ytcfg:
1955 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
1956 if not xsrf_token:
1957 xsrf_token = self._search_regex(
1958 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 1959 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 1960
1961 # annotations
06167fbb 1962 if get_annotations:
64b6a4e9
RA
1963 invideo_url = try_get(
1964 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
1965 if xsrf_token and invideo_url:
29f7c58a 1966 xsrf_field_name = None
1967 if ytcfg:
1968 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
1969 if not xsrf_field_name:
1970 xsrf_field_name = self._search_regex(
1971 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 1972 webpage, 'xsrf field name',
29f7c58a 1973 group='xsrf_field_name', default='session_token')
8a784c74 1974 info['annotations'] = self._download_webpage(
64b6a4e9
RA
1975 self._proto_relative_url(invideo_url),
1976 video_id, note='Downloading annotations',
1977 errnote='Unable to download video annotations', fatal=False,
1978 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 1979
06167fbb 1980 # Get comments
1981 # TODO: Refactor and move to seperate function
1982 if get_comments:
1983 expected_video_comment_count = 0
1984 video_comments = []
1985
1986 def find_value(html, key, num_chars=2, separator='"'):
1987 pos_begin = html.find(key) + len(key) + num_chars
1988 pos_end = html.find(separator, pos_begin)
1989 return html[pos_begin: pos_end]
1990
1991 def search_dict(partial, key):
1992 if isinstance(partial, dict):
1993 for k, v in partial.items():
1994 if k == key:
1995 yield v
1996 else:
1997 for o in search_dict(v, key):
1998 yield o
1999 elif isinstance(partial, list):
2000 for i in partial:
2001 for o in search_dict(i, key):
2002 yield o
2003
8a784c74 2004 continuations = []
2005 if initial_data:
2006 try:
2007 ncd = next(search_dict(initial_data, 'nextContinuationData'))
2008 continuations = [ncd['continuation']]
2009 # Handle videos where comments have been disabled entirely
2010 except StopIteration:
2011 pass
06167fbb 2012
8d0ea5f9 2013 def get_continuation(continuation, session_token, replies=False):
06167fbb 2014 query = {
66c935fb 2015 'pbj': 1,
2016 'ctoken': continuation,
06167fbb 2017 }
2018 if replies:
2019 query['action_get_comment_replies'] = 1
2020 else:
2021 query['action_get_comments'] = 1
2022
2023 while True:
2024 content, handle = self._download_webpage_handle(
2025 'https://www.youtube.com/comment_service_ajax',
2026 video_id,
2027 note=False,
2028 expected_status=[413],
2029 data=urlencode_postdata({
2030 'session_token': session_token
2031 }),
2032 query=query,
2033 headers={
2034 'Accept': '*/*',
2035 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2036 'X-YouTube-Client-Name': '1',
2037 'X-YouTube-Client-Version': '2.20201202.06.01'
2038 }
2039 )
2040
2041 response_code = handle.getcode()
2042 if (response_code == 200):
2043 return self._parse_json(content, video_id)
8d0ea5f9 2044 if (response_code == 413):
06167fbb 2045 return None
2046 raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2047
2048 first_continuation = True
885d36d4 2049 chain_msg = ''
2050 self.to_screen('Downloading comments')
06167fbb 2051 while continuations:
885d36d4 2052 continuation = continuations.pop()
8d0ea5f9 2053 comment_response = get_continuation(continuation, xsrf_token)
06167fbb 2054 if not comment_response:
2055 continue
2056 if list(search_dict(comment_response, 'externalErrorMessage')):
2057 raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2058
8d0ea5f9
B
2059 if 'continuationContents' not in comment_response['response']:
2060 # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2061 continue
2062 # not sure if this actually helps
2063 if 'xsrf_token' in comment_response:
2064 xsrf_token = comment_response['xsrf_token']
2065
06167fbb 2066 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2067 if first_continuation:
2068 expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2069 first_continuation = False
2070 if 'contents' not in item_section:
2071 # continuation returned no comments?
2072 # set an empty array as to not break the for loop
2073 item_section['contents'] = []
2074
2075 for meta_comment in item_section['contents']:
2076 comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2077 video_comments.append({
2078 'id': comment['commentId'],
2079 'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
8d0ea5f9 2080 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
06167fbb 2081 'author': comment.get('authorText', {}).get('simpleText', ''),
2082 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2083 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2084 'parent': 'root'
2085 })
2086 if 'replies' not in meta_comment['commentThreadRenderer']:
2087 continue
2088
8d0ea5f9
B
2089 reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2090 while reply_continuations:
06167fbb 2091 time.sleep(1)
8d0ea5f9
B
2092 continuation = reply_continuations.pop()
2093 replies_data = get_continuation(continuation, xsrf_token, True)
06167fbb 2094 if not replies_data or 'continuationContents' not in replies_data[1]['response']:
8d0ea5f9 2095 continue
06167fbb 2096
2097 if self._downloader.params.get('verbose', False):
885d36d4 2098 chain_msg = ' (chain %s)' % comment['commentId']
2099 self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
06167fbb 2100 reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
885d36d4 2101 for reply_meta in reply_comment_meta.get('contents', {}):
06167fbb 2102 reply_comment = reply_meta['commentRenderer']
2103 video_comments.append({
2104 'id': reply_comment['commentId'],
2105 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
8d0ea5f9 2106 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
06167fbb 2107 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2108 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2109 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2110 'parent': comment['commentId']
2111 })
2112 if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
8d0ea5f9 2113 continue
8d0ea5f9 2114 reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
06167fbb 2115
885d36d4 2116 self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
06167fbb 2117 if 'continuations' in item_section:
8d0ea5f9 2118 continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
06167fbb 2119 time.sleep(1)
2120
885d36d4 2121 self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
545cc85d 2122 info.update({
2123 'comments': video_comments,
2124 'comment_count': expected_video_comment_count
2125 })
4ea3be0a 2126
545cc85d 2127 self.mark_watched(video_id, player_response)
d77ab8e2 2128
545cc85d 2129 return info
c5e8d7af 2130
5f6a1245 2131
8bdd16b4 2132class YoutubeTabIE(YoutubeBaseInfoExtractor):
2133 IE_DESC = 'YouTube.com tab'
70d5c17b 2134 _VALID_URL = r'''(?x)
2135 https?://
2136 (?:\w+\.)?
2137 (?:
2138 youtube(?:kids)?\.com|
2139 invidio\.us
2140 )/
2141 (?:
2142 (?:channel|c|user)/|
2143 (?P<not_channel>
3d3dddc9 2144 feed/|
70d5c17b 2145 (?:playlist|watch)\?.*?\blist=
2146 )|
29f7c58a 2147 (?!(?:%s)\b) # Direct URLs
70d5c17b 2148 )
2149 (?P<id>[^/?\#&]+)
2150 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2151 IE_NAME = 'youtube:tab'
2152
81127aa5 2153 _TESTS = [{
8bdd16b4 2154 # playlists, multipage
2155 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2156 'playlist_mincount': 94,
2157 'info_dict': {
2158 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2159 'title': 'Игорь Клейнер - Playlists',
2160 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2161 'uploader': 'Игорь Клейнер',
2162 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2163 },
2164 }, {
2165 # playlists, multipage, different order
2166 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2167 'playlist_mincount': 94,
2168 'info_dict': {
2169 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2170 'title': 'Игорь Клейнер - Playlists',
2171 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2172 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2173 'uploader': 'Игорь Клейнер',
8bdd16b4 2174 },
2175 }, {
2176 # playlists, singlepage
2177 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2178 'playlist_mincount': 4,
2179 'info_dict': {
2180 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2181 'title': 'ThirstForScience - Playlists',
2182 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2183 'uploader': 'ThirstForScience',
2184 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2185 }
2186 }, {
2187 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2188 'only_matching': True,
2189 }, {
2190 # basic, single video playlist
0e30a7b9 2191 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2192 'info_dict': {
0e30a7b9 2193 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2194 'uploader': 'Sergey M.',
2195 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2196 'title': 'youtube-dl public playlist',
81127aa5 2197 },
0e30a7b9 2198 'playlist_count': 1,
9291475f 2199 }, {
8bdd16b4 2200 # empty playlist
0e30a7b9 2201 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2202 'info_dict': {
0e30a7b9 2203 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2204 'uploader': 'Sergey M.',
2205 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2206 'title': 'youtube-dl empty playlist',
9291475f
PH
2207 },
2208 'playlist_count': 0,
2209 }, {
8bdd16b4 2210 # Home tab
2211 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2212 'info_dict': {
8bdd16b4 2213 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2214 'title': 'lex will - Home',
2215 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2216 'uploader': 'lex will',
2217 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2218 },
8bdd16b4 2219 'playlist_mincount': 2,
9291475f 2220 }, {
8bdd16b4 2221 # Videos tab
2222 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2223 'info_dict': {
8bdd16b4 2224 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2225 'title': 'lex will - Videos',
2226 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2227 'uploader': 'lex will',
2228 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2229 },
8bdd16b4 2230 'playlist_mincount': 975,
9291475f 2231 }, {
8bdd16b4 2232 # Videos tab, sorted by popular
2233 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2234 'info_dict': {
8bdd16b4 2235 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2236 'title': 'lex will - Videos',
2237 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2238 'uploader': 'lex will',
2239 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2240 },
8bdd16b4 2241 'playlist_mincount': 199,
9291475f 2242 }, {
8bdd16b4 2243 # Playlists tab
2244 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2245 'info_dict': {
8bdd16b4 2246 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2247 'title': 'lex will - Playlists',
2248 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2249 'uploader': 'lex will',
2250 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2251 },
8bdd16b4 2252 'playlist_mincount': 17,
ac7553d0 2253 }, {
8bdd16b4 2254 # Community tab
2255 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2256 'info_dict': {
8bdd16b4 2257 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2258 'title': 'lex will - Community',
2259 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2260 'uploader': 'lex will',
2261 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2262 },
2263 'playlist_mincount': 18,
87dadd45 2264 }, {
8bdd16b4 2265 # Channels tab
2266 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2267 'info_dict': {
8bdd16b4 2268 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2269 'title': 'lex will - Channels',
2270 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2271 'uploader': 'lex will',
2272 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2273 },
deaec5af 2274 'playlist_mincount': 12,
6b08cdf6 2275 }, {
a0566bbf 2276 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2277 'only_matching': True,
2278 }, {
a0566bbf 2279 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2280 'only_matching': True,
2281 }, {
a0566bbf 2282 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2283 'only_matching': True,
2284 }, {
2285 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2286 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2287 'info_dict': {
2288 'title': '29C3: Not my department',
2289 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2290 'uploader': 'Christiaan008',
2291 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2292 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2293 },
2294 'playlist_count': 96,
2295 }, {
2296 'note': 'Large playlist',
2297 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2298 'info_dict': {
8bdd16b4 2299 'title': 'Uploads from Cauchemar',
2300 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2301 'uploader': 'Cauchemar',
2302 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2303 },
8bdd16b4 2304 'playlist_mincount': 1123,
2305 }, {
2306 # even larger playlist, 8832 videos
2307 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2308 'only_matching': True,
4b7df0d3
JMF
2309 }, {
2310 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2311 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2312 'info_dict': {
acf757f4
PH
2313 'title': 'Uploads from Interstellar Movie',
2314 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2315 'uploader': 'Interstellar Movie',
8bdd16b4 2316 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2317 },
481cc733 2318 'playlist_mincount': 21,
8bdd16b4 2319 }, {
2320 # https://github.com/ytdl-org/youtube-dl/issues/21844
2321 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2322 'info_dict': {
2323 'title': 'Data Analysis with Dr Mike Pound',
2324 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2325 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2326 'uploader': 'Computerphile',
deaec5af 2327 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2328 },
2329 'playlist_mincount': 11,
2330 }, {
a0566bbf 2331 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2332 'only_matching': True,
dacb3a86
S
2333 }, {
2334 # Playlist URL that does not actually serve a playlist
2335 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2336 'info_dict': {
2337 'id': 'FqZTN594JQw',
2338 'ext': 'webm',
2339 'title': "Smiley's People 01 detective, Adventure Series, Action",
2340 'uploader': 'STREEM',
2341 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2342 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2343 'upload_date': '20150526',
2344 'license': 'Standard YouTube License',
2345 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2346 'categories': ['People & Blogs'],
2347 'tags': list,
dbdaaa23 2348 'view_count': int,
dacb3a86
S
2349 'like_count': int,
2350 'dislike_count': int,
2351 },
2352 'params': {
2353 'skip_download': True,
2354 },
13a75688 2355 'skip': 'This video is not available.',
dacb3a86 2356 'add_ie': [YoutubeIE.ie_key()],
481cc733 2357 }, {
8bdd16b4 2358 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2359 'only_matching': True,
66b48727 2360 }, {
8bdd16b4 2361 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2362 'only_matching': True,
a0566bbf 2363 }, {
2364 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2365 'info_dict': {
2366 'id': '9Auq9mYxFEE',
2367 'ext': 'mp4',
deaec5af 2368 'title': compat_str,
a0566bbf 2369 'uploader': 'Sky News',
2370 'uploader_id': 'skynews',
2371 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2372 'upload_date': '20191102',
deaec5af 2373 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2374 'categories': ['News & Politics'],
2375 'tags': list,
2376 'like_count': int,
2377 'dislike_count': int,
2378 },
2379 'params': {
2380 'skip_download': True,
2381 },
2382 }, {
2383 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2384 'info_dict': {
2385 'id': 'a48o2S1cPoo',
2386 'ext': 'mp4',
2387 'title': 'The Young Turks - Live Main Show',
2388 'uploader': 'The Young Turks',
2389 'uploader_id': 'TheYoungTurks',
2390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2391 'upload_date': '20150715',
2392 'license': 'Standard YouTube License',
2393 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2394 'categories': ['News & Politics'],
2395 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2396 'like_count': int,
2397 'dislike_count': int,
2398 },
2399 'params': {
2400 'skip_download': True,
2401 },
2402 'only_matching': True,
2403 }, {
2404 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2405 'only_matching': True,
2406 }, {
2407 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2408 'only_matching': True,
3d3dddc9 2409 }, {
2410 'url': 'https://www.youtube.com/feed/trending',
2411 'only_matching': True,
2412 }, {
2413 # needs auth
2414 'url': 'https://www.youtube.com/feed/library',
2415 'only_matching': True,
2416 }, {
2417 # needs auth
2418 'url': 'https://www.youtube.com/feed/history',
2419 'only_matching': True,
2420 }, {
2421 # needs auth
2422 'url': 'https://www.youtube.com/feed/subscriptions',
2423 'only_matching': True,
2424 }, {
2425 # needs auth
2426 'url': 'https://www.youtube.com/feed/watch_later',
2427 'only_matching': True,
2428 }, {
2429 # no longer available?
2430 'url': 'https://www.youtube.com/feed/recommended',
2431 'only_matching': True,
29f7c58a 2432 }, {
2433 # inline playlist with not always working continuations
2434 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2435 'only_matching': True,
2436 }, {
2437 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2438 'only_matching': True,
2439 }, {
2440 'url': 'https://www.youtube.com/course',
2441 'only_matching': True,
2442 }, {
2443 'url': 'https://www.youtube.com/zsecurity',
2444 'only_matching': True,
2445 }, {
2446 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2447 'only_matching': True,
2448 }, {
2449 'url': 'https://www.youtube.com/TheYoungTurks/live',
2450 'only_matching': True,
2451 }]
2452
2453 @classmethod
2454 def suitable(cls, url):
2455 return False if YoutubeIE.suitable(url) else super(
2456 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2457
2458 def _extract_channel_id(self, webpage):
2459 channel_id = self._html_search_meta(
2460 'channelId', webpage, 'channel id', default=None)
2461 if channel_id:
2462 return channel_id
2463 channel_url = self._html_search_meta(
2464 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2465 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2466 'twitter:app:url:googleplay'), webpage, 'channel url')
2467 return self._search_regex(
2468 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2469 channel_url, 'channel id')
15f6397c 2470
8bdd16b4 2471 @staticmethod
2472 def _extract_grid_item_renderer(item):
2473 for item_kind in ('Playlist', 'Video', 'Channel'):
2474 renderer = item.get('grid%sRenderer' % item_kind)
2475 if renderer:
2476 return renderer
2477
8bdd16b4 2478 def _grid_entries(self, grid_renderer):
2479 for item in grid_renderer['items']:
2480 if not isinstance(item, dict):
39b62db1 2481 continue
8bdd16b4 2482 renderer = self._extract_grid_item_renderer(item)
2483 if not isinstance(renderer, dict):
2484 continue
2485 title = try_get(
2486 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2487 # playlist
2488 playlist_id = renderer.get('playlistId')
2489 if playlist_id:
2490 yield self.url_result(
2491 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2492 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2493 video_title=title)
2494 # video
2495 video_id = renderer.get('videoId')
2496 if video_id:
2497 yield self._extract_video(renderer)
2498 # channel
2499 channel_id = renderer.get('channelId')
2500 if channel_id:
2501 title = try_get(
2502 renderer, lambda x: x['title']['simpleText'], compat_str)
2503 yield self.url_result(
2504 'https://www.youtube.com/channel/%s' % channel_id,
2505 ie=YoutubeTabIE.ie_key(), video_title=title)
2506
3d3dddc9 2507 def _shelf_entries_from_content(self, shelf_renderer):
2508 content = shelf_renderer.get('content')
2509 if not isinstance(content, dict):
8bdd16b4 2510 return
3d3dddc9 2511 renderer = content.get('gridRenderer')
2512 if renderer:
2513 # TODO: add support for nested playlists so each shelf is processed
2514 # as separate playlist
2515 # TODO: this includes only first N items
2516 for entry in self._grid_entries(renderer):
2517 yield entry
2518 renderer = content.get('horizontalListRenderer')
2519 if renderer:
2520 # TODO
2521 pass
8bdd16b4 2522
29f7c58a 2523 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2524 ep = try_get(
2525 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2526 compat_str)
2527 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2528 if shelf_url:
29f7c58a 2529 # Skipping links to another channels, note that checking for
2530 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2531 # will not work
2532 if skip_channels and '/channels?' in shelf_url:
2533 return
3d3dddc9 2534 title = try_get(
2535 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2536 yield self.url_result(shelf_url, video_title=title)
2537 # Shelf may not contain shelf URL, fallback to extraction from content
2538 for entry in self._shelf_entries_from_content(shelf_renderer):
2539 yield entry
c5e8d7af 2540
8bdd16b4 2541 def _playlist_entries(self, video_list_renderer):
2542 for content in video_list_renderer['contents']:
2543 if not isinstance(content, dict):
2544 continue
2545 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2546 if not isinstance(renderer, dict):
2547 continue
2548 video_id = renderer.get('videoId')
2549 if not video_id:
2550 continue
2551 yield self._extract_video(renderer)
07aeced6 2552
3d3dddc9 2553 r""" # Not needed in the new implementation
3462ffa8 2554 def _itemSection_entries(self, item_sect_renderer):
2555 for content in item_sect_renderer['contents']:
2556 if not isinstance(content, dict):
2557 continue
2558 renderer = content.get('videoRenderer', {})
2559 if not isinstance(renderer, dict):
2560 continue
2561 video_id = renderer.get('videoId')
2562 if not video_id:
2563 continue
2564 yield self._extract_video(renderer)
3d3dddc9 2565 """
3462ffa8 2566
2567 def _rich_entries(self, rich_grid_renderer):
2568 renderer = try_get(
70d5c17b 2569 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2570 video_id = renderer.get('videoId')
2571 if not video_id:
2572 return
2573 yield self._extract_video(renderer)
2574
8bdd16b4 2575 def _video_entry(self, video_renderer):
2576 video_id = video_renderer.get('videoId')
2577 if video_id:
2578 return self._extract_video(video_renderer)
dacb3a86 2579
8bdd16b4 2580 def _post_thread_entries(self, post_thread_renderer):
2581 post_renderer = try_get(
2582 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2583 if not post_renderer:
2584 return
2585 # video attachment
2586 video_renderer = try_get(
2587 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2588 video_id = None
2589 if video_renderer:
2590 entry = self._video_entry(video_renderer)
2591 if entry:
2592 yield entry
2593 # inline video links
2594 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2595 for run in runs:
2596 if not isinstance(run, dict):
2597 continue
2598 ep_url = try_get(
2599 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2600 if not ep_url:
2601 continue
2602 if not YoutubeIE.suitable(ep_url):
2603 continue
2604 ep_video_id = YoutubeIE._match_id(ep_url)
2605 if video_id == ep_video_id:
2606 continue
2607 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2608
8bdd16b4 2609 def _post_thread_continuation_entries(self, post_thread_continuation):
2610 contents = post_thread_continuation.get('contents')
2611 if not isinstance(contents, list):
2612 return
2613 for content in contents:
2614 renderer = content.get('backstagePostThreadRenderer')
2615 if not isinstance(renderer, dict):
2616 continue
2617 for entry in self._post_thread_entries(renderer):
2618 yield entry
07aeced6 2619
29f7c58a 2620 @staticmethod
2621 def _build_continuation_query(continuation, ctp=None):
2622 query = {
2623 'ctoken': continuation,
2624 'continuation': continuation,
2625 }
2626 if ctp:
2627 query['itct'] = ctp
2628 return query
2629
8bdd16b4 2630 @staticmethod
2631 def _extract_next_continuation_data(renderer):
2632 next_continuation = try_get(
2633 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2634 if not next_continuation:
2635 return
2636 continuation = next_continuation.get('continuation')
2637 if not continuation:
2638 return
2639 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2640 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2641
8bdd16b4 2642 @classmethod
2643 def _extract_continuation(cls, renderer):
2644 next_continuation = cls._extract_next_continuation_data(renderer)
2645 if next_continuation:
2646 return next_continuation
cc2db878 2647 contents = []
2648 for key in ('contents', 'items'):
2649 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2650 for content in contents:
2651 if not isinstance(content, dict):
2652 continue
2653 continuation_ep = try_get(
2654 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2655 dict)
2656 if not continuation_ep:
2657 continue
2658 continuation = try_get(
2659 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2660 if not continuation:
2661 continue
2662 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2663 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2664
8bdd16b4 2665 def _entries(self, tab, identity_token):
3462ffa8 2666
70d5c17b 2667 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2668 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2669 for content in contents:
2670 if not isinstance(content, dict):
8bdd16b4 2671 continue
70d5c17b 2672 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2673 if not is_renderer:
70d5c17b 2674 renderer = content.get('richItemRenderer')
3462ffa8 2675 if renderer:
2676 for entry in self._rich_entries(renderer):
2677 yield entry
2678 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2679 continue
3462ffa8 2680 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2681 for isr_content in isr_contents:
2682 if not isinstance(isr_content, dict):
2683 continue
69184e41 2684
2685 known_renderers = {
2686 'playlistVideoListRenderer': self._playlist_entries,
2687 'gridRenderer': self._grid_entries,
2688 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
2689 'backstagePostThreadRenderer': self._post_thread_entries,
2690 'videoRenderer': lambda x: [self._video_entry(x)],
2691 }
2692 for key, renderer in isr_content.items():
2693 if key not in known_renderers:
2694 continue
2695 for entry in known_renderers[key](renderer):
2696 if entry:
2697 yield entry
3462ffa8 2698 continuation_list[0] = self._extract_continuation(renderer)
69184e41 2699 break
70d5c17b 2700
3462ffa8 2701 if not continuation_list[0]:
2702 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 2703
2704 if not continuation_list[0]:
2705 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 2706
2707 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 2708 tab_content = try_get(tab, lambda x: x['content'], dict)
2709 if not tab_content:
2710 return
3462ffa8 2711 parent_renderer = (
29f7c58a 2712 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
2713 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 2714 for entry in extract_entries(parent_renderer):
2715 yield entry
3462ffa8 2716 continuation = continuation_list[0]
8bdd16b4 2717
2718 headers = {
2719 'x-youtube-client-name': '1',
2720 'x-youtube-client-version': '2.20201112.04.01',
2721 }
2722 if identity_token:
2723 headers['x-youtube-identity-token'] = identity_token
ebf1b291 2724
8bdd16b4 2725 for page_num in itertools.count(1):
2726 if not continuation:
2727 break
29f7c58a 2728 count = 0
2729 retries = 3
2730 while count <= retries:
2731 try:
2732 # Downloading page may result in intermittent 5xx HTTP error
2733 # that is usually worked around with a retry
2734 browse = self._download_json(
2735 'https://www.youtube.com/browse_ajax', None,
2736 'Downloading page %d%s'
2737 % (page_num, ' (retry #%d)' % count if count else ''),
2738 headers=headers, query=continuation)
2739 break
2740 except ExtractorError as e:
2741 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
2742 count += 1
2743 if count <= retries:
2744 continue
2745 raise
8bdd16b4 2746 if not browse:
2747 break
2748 response = try_get(browse, lambda x: x[1]['response'], dict)
2749 if not response:
2750 break
ebf1b291 2751
69184e41 2752 known_continuation_renderers = {
2753 'playlistVideoListContinuation': self._playlist_entries,
2754 'gridContinuation': self._grid_entries,
2755 'itemSectionContinuation': self._post_thread_continuation_entries,
2756 'sectionListContinuation': extract_entries, # for feeds
2757 }
8bdd16b4 2758 continuation_contents = try_get(
69184e41 2759 response, lambda x: x['continuationContents'], dict) or {}
2760 continuation_renderer = None
2761 for key, value in continuation_contents.items():
2762 if key not in known_continuation_renderers:
3462ffa8 2763 continue
69184e41 2764 continuation_renderer = value
2765 continuation_list = [None]
2766 for entry in known_continuation_renderers[key](continuation_renderer):
2767 yield entry
2768 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
2769 break
2770 if continuation_renderer:
2771 continue
c5e8d7af 2772
a1b535bd 2773 known_renderers = {
2774 'gridPlaylistRenderer': (self._grid_entries, 'items'),
2775 'gridVideoRenderer': (self._grid_entries, 'items'),
2776 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
2777 'itemSectionRenderer': (self._playlist_entries, 'contents'),
2778 }
8bdd16b4 2779 continuation_items = try_get(
2780 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 2781 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
2782 video_items_renderer = None
2783 for key, value in continuation_item.items():
2784 if key not in known_renderers:
8bdd16b4 2785 continue
a1b535bd 2786 video_items_renderer = {known_renderers[key][1]: continuation_items}
2787 for entry in known_renderers[key][0](video_items_renderer):
2788 yield entry
2789 continuation = self._extract_continuation(video_items_renderer)
2790 break
2791 if video_items_renderer:
2792 continue
8bdd16b4 2793 break
9558dcec 2794
8bdd16b4 2795 @staticmethod
2796 def _extract_selected_tab(tabs):
2797 for tab in tabs:
2798 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
2799 return tab['tabRenderer']
2b3c2546 2800 else:
8bdd16b4 2801 raise ExtractorError('Unable to find selected tab')
b82f815f 2802
8bdd16b4 2803 @staticmethod
2804 def _extract_uploader(data):
2805 uploader = {}
2806 sidebar_renderer = try_get(
2807 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
2808 if sidebar_renderer:
2809 for item in sidebar_renderer:
2810 if not isinstance(item, dict):
2811 continue
2812 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
2813 if not isinstance(renderer, dict):
2814 continue
2815 owner = try_get(
2816 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
2817 if owner:
2818 uploader['uploader'] = owner.get('text')
2819 uploader['uploader_id'] = try_get(
2820 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
2821 uploader['uploader_url'] = urljoin(
2822 'https://www.youtube.com/',
2823 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 2824 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 2825
2826 def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
b60419c5 2827 playlist_id = title = description = channel_url = channel_name = channel_id = None
2828 thumbnails_list = tags = []
2829
8bdd16b4 2830 selected_tab = self._extract_selected_tab(tabs)
2831 renderer = try_get(
2832 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
2833 if renderer:
b60419c5 2834 channel_name = renderer.get('title')
2835 channel_url = renderer.get('channelUrl')
2836 channel_id = renderer.get('externalId')
64c0d954 2837
64c0d954 2838 if not renderer:
2839 renderer = try_get(
2840 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
8bdd16b4 2841 if renderer:
2842 title = renderer.get('title')
ecc97af3 2843 description = renderer.get('description', '')
b60419c5 2844 playlist_id = channel_id
2845 tags = renderer.get('keywords', '').split()
2846 thumbnails_list = (
2847 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 2848 or try_get(
2849 data,
2850 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
2851 list)
b60419c5 2852 or [])
2853
2854 thumbnails = []
2855 for t in thumbnails_list:
2856 if not isinstance(t, dict):
2857 continue
2858 thumbnail_url = url_or_none(t.get('url'))
2859 if not thumbnail_url:
2860 continue
2861 thumbnails.append({
2862 'url': thumbnail_url,
2863 'width': int_or_none(t.get('width')),
2864 'height': int_or_none(t.get('height')),
2865 })
64c0d954 2866
3462ffa8 2867 if playlist_id is None:
70d5c17b 2868 playlist_id = item_id
2869 if title is None:
b60419c5 2870 title = playlist_id
2871 title += format_field(selected_tab, 'title', ' - %s')
2872
2873 metadata = {
2874 'playlist_id': playlist_id,
2875 'playlist_title': title,
2876 'playlist_description': description,
2877 'uploader': channel_name,
2878 'uploader_id': channel_id,
2879 'uploader_url': channel_url,
2880 'thumbnails': thumbnails,
2881 'tags': tags,
2882 }
2883 if not channel_id:
2884 metadata.update(self._extract_uploader(data))
2885 metadata.update({
2886 'channel': metadata['uploader'],
2887 'channel_id': metadata['uploader_id'],
2888 'channel_url': metadata['uploader_url']})
2889 return self.playlist_result(
29f7c58a 2890 self._entries(selected_tab, identity_token),
b60419c5 2891 **metadata)
73c4ac2c 2892
29f7c58a 2893 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 2894 title = playlist.get('title') or try_get(
2895 data, lambda x: x['titleText']['simpleText'], compat_str)
2896 playlist_id = playlist.get('playlistId') or item_id
29f7c58a 2897 # Inline playlist rendition continuation does not always work
2898 # at Youtube side, so delegating regular tab-based playlist URL
2899 # processing whenever possible.
2900 playlist_url = urljoin(url, try_get(
2901 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2902 compat_str))
2903 if playlist_url and playlist_url != url:
2904 return self.url_result(
2905 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2906 video_title=title)
8bdd16b4 2907 return self.playlist_result(
2908 self._playlist_entries(playlist), playlist_id=playlist_id,
2909 playlist_title=title)
c5e8d7af 2910
29f7c58a 2911 @staticmethod
2912 def _extract_alerts(data):
02ced43c 2913 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
29f7c58a 2914 if not isinstance(alert_dict, dict):
2915 continue
02ced43c 2916 for renderer in alert_dict:
2917 alert = alert_dict[renderer]
2918 alert_type = alert.get('type')
2919 if not alert_type:
2920 continue
2921 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
2922 if message:
2923 yield alert_type, message
2924 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
2925 message = try_get(run, lambda x: x['text'], compat_str)
2926 if message:
2927 yield alert_type, message
2928
29f7c58a 2929 def _extract_identity_token(self, webpage, item_id):
2930 ytcfg = self._extract_ytcfg(item_id, webpage)
2931 if ytcfg:
2932 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
2933 if token:
2934 return token
2935 return self._search_regex(
2936 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
2937 'identity token', default=None)
2938
8bdd16b4 2939 def _real_extract(self, url):
2940 item_id = self._match_id(url)
2941 url = compat_urlparse.urlunparse(
2942 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
036fcf3a 2943 is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
70d5c17b 2944 if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
036fcf3a 2945 self._downloader.report_warning(
2946 'A channel/user page was given. All the channel\'s videos will be downloaded. '
c76eb41b 2947 'To download only the videos in the home page, add a "/featured" to the URL')
036fcf3a 2948 url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
2949
8bdd16b4 2950 # Handle both video/playlist URLs
2951 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2952 video_id = qs.get('v', [None])[0]
2953 playlist_id = qs.get('list', [None])[0]
f0c532a4 2954
29f7c58a 2955 if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
f0c532a4 2956 if playlist_id:
2957 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
2958 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
2959 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
2960 else:
2961 raise ExtractorError('Unable to recognize tab page')
8bdd16b4 2962 if video_id and playlist_id:
2963 if self._downloader.params.get('noplaylist'):
2964 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2965 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2966 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2fa90513 2967
8bdd16b4 2968 webpage = self._download_webpage(url, item_id)
29f7c58a 2969 identity_token = self._extract_identity_token(webpage, item_id)
8bdd16b4 2970 data = self._extract_yt_initial_data(item_id, webpage)
6b8eb0c0 2971 err_msg = None
02ced43c 2972 for alert_type, alert_message in self._extract_alerts(data):
6b8eb0c0 2973 if alert_type.lower() == 'error':
2974 if err_msg:
2975 self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
2976 err_msg = alert_message
2977 else:
2978 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
2979 if err_msg:
2980 raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
8bdd16b4 2981 tabs = try_get(
2982 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
2983 if tabs:
2984 return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
2985 playlist = try_get(
2986 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
2987 if playlist:
29f7c58a 2988 return self._extract_from_playlist(item_id, url, data, playlist)
a0566bbf 2989 # Fallback to video extraction if no playlist alike page is recognized.
2990 # First check for the current video then try the v attribute of URL query.
2991 video_id = try_get(
2992 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
2993 compat_str) or video_id
8bdd16b4 2994 if video_id:
2995 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
2996 # Failed to recognize
2997 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 2998
c5e8d7af 2999
8bdd16b4 3000class YoutubePlaylistIE(InfoExtractor):
3001 IE_DESC = 'YouTube.com playlists'
3002 _VALID_URL = r'''(?x)(?:
3003 (?:https?://)?
3004 (?:\w+\.)?
3005 (?:
3006 (?:
3007 youtube(?:kids)?\.com|
29f7c58a 3008 invidio\.us
8bdd16b4 3009 )
3010 /.*?\?.*?\blist=
3011 )?
3012 (?P<id>%(playlist_id)s)
3013 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3014 IE_NAME = 'youtube:playlist'
cdc628a4 3015 _TESTS = [{
8bdd16b4 3016 'note': 'issue #673',
3017 'url': 'PLBB231211A4F62143',
cdc628a4 3018 'info_dict': {
8bdd16b4 3019 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3020 'id': 'PLBB231211A4F62143',
3021 'uploader': 'Wickydoo',
3022 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3023 },
3024 'playlist_mincount': 29,
3025 }, {
3026 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3027 'info_dict': {
3028 'title': 'YDL_safe_search',
3029 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3030 },
3031 'playlist_count': 2,
3032 'skip': 'This playlist is private',
9558dcec 3033 }, {
8bdd16b4 3034 'note': 'embedded',
3035 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3036 'playlist_count': 4,
9558dcec 3037 'info_dict': {
8bdd16b4 3038 'title': 'JODA15',
3039 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3040 'uploader': 'milan',
3041 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3042 }
cdc628a4 3043 }, {
8bdd16b4 3044 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3045 'playlist_mincount': 982,
3046 'info_dict': {
3047 'title': '2018 Chinese New Singles (11/6 updated)',
3048 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3049 'uploader': 'LBK',
3050 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3051 }
daa0df9e 3052 }, {
29f7c58a 3053 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3054 'only_matching': True,
3055 }, {
3056 # music album playlist
3057 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3058 'only_matching': True,
3059 }]
3060
3061 @classmethod
3062 def suitable(cls, url):
3063 return False if YoutubeTabIE.suitable(url) else super(
3064 YoutubePlaylistIE, cls).suitable(url)
3065
3066 def _real_extract(self, url):
3067 playlist_id = self._match_id(url)
3068 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3069 if not qs:
3070 qs = {'list': playlist_id}
3071 return self.url_result(
3072 update_url_query('https://www.youtube.com/playlist', qs),
3073 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3074
3075
3076class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3077 IE_DESC = 'youtu.be'
29f7c58a 3078 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3079 _TESTS = [{
8bdd16b4 3080 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3081 'info_dict': {
3082 'id': 'yeWKywCrFtk',
3083 'ext': 'mp4',
3084 'title': 'Small Scale Baler and Braiding Rugs',
3085 'uploader': 'Backus-Page House Museum',
3086 'uploader_id': 'backuspagemuseum',
3087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3088 'upload_date': '20161008',
3089 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3090 'categories': ['Nonprofits & Activism'],
3091 'tags': list,
3092 'like_count': int,
3093 'dislike_count': int,
3094 },
3095 'params': {
3096 'noplaylist': True,
3097 'skip_download': True,
3098 },
39e7107d 3099 }, {
8bdd16b4 3100 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3101 'only_matching': True,
cdc628a4
PH
3102 }]
3103
8bdd16b4 3104 def _real_extract(self, url):
29f7c58a 3105 mobj = re.match(self._VALID_URL, url)
3106 video_id = mobj.group('id')
3107 playlist_id = mobj.group('playlist_id')
8bdd16b4 3108 return self.url_result(
29f7c58a 3109 update_url_query('https://www.youtube.com/watch', {
3110 'v': video_id,
3111 'list': playlist_id,
3112 'feature': 'youtu.be',
3113 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3114
3115
3116class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3117 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3118 _VALID_URL = r'ytuser:(?P<id>.+)'
3119 _TESTS = [{
3120 'url': 'ytuser:phihag',
3121 'only_matching': True,
3122 }]
3123
3124 def _real_extract(self, url):
3125 user_id = self._match_id(url)
3126 return self.url_result(
3127 'https://www.youtube.com/user/%s' % user_id,
3128 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3129
b05654f0 3130
3d3dddc9 3131class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3132 IE_NAME = 'youtube:favorites'
3133 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3134 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3135 _LOGIN_REQUIRED = True
3136 _TESTS = [{
3137 'url': ':ytfav',
3138 'only_matching': True,
3139 }, {
3140 'url': ':ytfavorites',
3141 'only_matching': True,
3142 }]
3143
3144 def _real_extract(self, url):
3145 return self.url_result(
3146 'https://www.youtube.com/playlist?list=LL',
3147 ie=YoutubeTabIE.ie_key())
3148
3149
8bdd16b4 3150class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3151 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3152 # there doesn't appear to be a real limit, for example if you search for
3153 # 'python' you get more than 8.000.000 results
3154 _MAX_RESULTS = float('inf')
78caa52a 3155 IE_NAME = 'youtube:search'
b05654f0 3156 _SEARCH_KEY = 'ytsearch'
6c894ea1 3157 _SEARCH_PARAMS = None
9dd8e46a 3158 _TESTS = []
b05654f0 3159
6c894ea1
U
3160 def _entries(self, query, n):
3161 data = {
3162 'context': {
3163 'client': {
3164 'clientName': 'WEB',
3165 'clientVersion': '2.20201021.03.00',
3166 }
3167 },
3168 'query': query,
a22b2fd1 3169 }
6c894ea1
U
3170 if self._SEARCH_PARAMS:
3171 data['params'] = self._SEARCH_PARAMS
3172 total = 0
3173 for page_num in itertools.count(1):
3174 search = self._download_json(
3175 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3176 video_id='query "%s"' % query,
3177 note='Downloading page %s' % page_num,
3178 errnote='Unable to download API page', fatal=False,
3179 data=json.dumps(data).encode('utf8'),
3180 headers={'content-type': 'application/json'})
3181 if not search:
b4c08069 3182 break
6c894ea1
U
3183 slr_contents = try_get(
3184 search,
3185 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3186 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3187 list)
3188 if not slr_contents:
a22b2fd1 3189 break
0366ae87 3190
0366ae87
M
3191 # Youtube sometimes adds promoted content to searches,
3192 # changing the index location of videos and token.
3193 # So we search through all entries till we find them.
30a074c2 3194 continuation_token = None
3195 for slr_content in slr_contents:
a96c6d15 3196 if continuation_token is None:
3197 continuation_token = try_get(
3198 slr_content,
3199 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3200 compat_str)
3201
30a074c2 3202 isr_contents = try_get(
3203 slr_content,
3204 lambda x: x['itemSectionRenderer']['contents'],
3205 list)
9da76d30 3206 if not isr_contents:
30a074c2 3207 continue
3208 for content in isr_contents:
3209 if not isinstance(content, dict):
3210 continue
3211 video = content.get('videoRenderer')
3212 if not isinstance(video, dict):
3213 continue
3214 video_id = video.get('videoId')
3215 if not video_id:
3216 continue
3217
3218 yield self._extract_video(video)
3219 total += 1
3220 if total == n:
3221 return
0366ae87 3222
0366ae87 3223 if not continuation_token:
6c894ea1 3224 break
0366ae87 3225 data['continuation'] = continuation_token
b05654f0 3226
6c894ea1
U
3227 def _get_n_results(self, query, n):
3228 """Get a specified number of results for a query"""
3229 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3230
c9ae7b95 3231
a3dd9248 3232class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3233 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3234 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3235 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3236 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3237
c9ae7b95 3238
386e1dd9 3239class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3240 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3241 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3242 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3243 # _MAX_RESULTS = 100
3462ffa8 3244 _TESTS = [{
3245 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3246 'playlist_mincount': 5,
3247 'info_dict': {
3248 'title': 'youtube-dl test video',
3249 }
3250 }, {
3251 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3252 'only_matching': True,
3253 }]
3254
386e1dd9 3255 @classmethod
3256 def _make_valid_url(cls):
3257 return cls._VALID_URL
3258
3462ffa8 3259 def _real_extract(self, url):
386e1dd9 3260 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3261 query = (qs.get('search_query') or qs.get('q'))[0]
3262 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3263 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3264
3265
3266class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3267 """
25f14e9f 3268 Base class for feed extractors
3d3dddc9 3269 Subclasses must define the _FEED_NAME property.
d7ae0639 3270 """
b2e8bc1b 3271 _LOGIN_REQUIRED = True
3462ffa8 3272 # _MAX_PAGES = 5
ef2f3c7f 3273 _TESTS = []
d7ae0639
JMF
3274
3275 @property
3276 def IE_NAME(self):
78caa52a 3277 return 'youtube:%s' % self._FEED_NAME
04cc9617 3278
81f0259b 3279 def _real_initialize(self):
b2e8bc1b 3280 self._login()
81f0259b 3281
3853309f 3282 def _real_extract(self, url):
3d3dddc9 3283 return self.url_result(
3284 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3285 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3286
3287
ef2f3c7f 3288class YoutubeWatchLaterIE(InfoExtractor):
3289 IE_NAME = 'youtube:watchlater'
70d5c17b 3290 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3291 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3292 _TESTS = [{
8bdd16b4 3293 'url': ':ytwatchlater',
bc7a9cd8
S
3294 'only_matching': True,
3295 }]
25f14e9f
S
3296
3297 def _real_extract(self, url):
ef2f3c7f 3298 return self.url_result(
3299 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3300
3301
25f14e9f
S
3302class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3303 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3304 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3305 _FEED_NAME = 'recommended'
3d3dddc9 3306 _TESTS = [{
3307 'url': ':ytrec',
3308 'only_matching': True,
3309 }, {
3310 'url': ':ytrecommended',
3311 'only_matching': True,
3312 }, {
3313 'url': 'https://youtube.com',
3314 'only_matching': True,
3315 }]
1ed5b5c9 3316
1ed5b5c9 3317
25f14e9f 3318class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3319 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3320 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3321 _FEED_NAME = 'subscriptions'
3d3dddc9 3322 _TESTS = [{
3323 'url': ':ytsubs',
3324 'only_matching': True,
3325 }, {
3326 'url': ':ytsubscriptions',
3327 'only_matching': True,
3328 }]
1ed5b5c9 3329
1ed5b5c9 3330
25f14e9f
S
3331class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3332 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3d3dddc9 3333 _VALID_URL = r':ythistory'
25f14e9f 3334 _FEED_NAME = 'history'
3d3dddc9 3335 _TESTS = [{
3336 'url': ':ythistory',
3337 'only_matching': True,
3338 }]
1ed5b5c9
JMF
3339
3340
15870e90
PH
3341class YoutubeTruncatedURLIE(InfoExtractor):
3342 IE_NAME = 'youtube:truncated_url'
3343 IE_DESC = False # Do not list
975d35db 3344 _VALID_URL = r'''(?x)
b95aab84
PH
3345 (?:https?://)?
3346 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3347 (?:watch\?(?:
c4808c60 3348 feature=[a-z_]+|
b95aab84
PH
3349 annotation_id=annotation_[^&]+|
3350 x-yt-cl=[0-9]+|
c1708b89 3351 hl=[^&]*|
287be8c6 3352 t=[0-9]+
b95aab84
PH
3353 )?
3354 |
3355 attribution_link\?a=[^&]+
3356 )
3357 $
975d35db 3358 '''
15870e90 3359
c4808c60 3360 _TESTS = [{
2d3d2997 3361 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3362 'only_matching': True,
dc2fc736 3363 }, {
2d3d2997 3364 'url': 'https://www.youtube.com/watch?',
dc2fc736 3365 'only_matching': True,
b95aab84
PH
3366 }, {
3367 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3368 'only_matching': True,
3369 }, {
3370 'url': 'https://www.youtube.com/watch?feature=foo',
3371 'only_matching': True,
c1708b89
PH
3372 }, {
3373 'url': 'https://www.youtube.com/watch?hl=en-GB',
3374 'only_matching': True,
287be8c6
PH
3375 }, {
3376 'url': 'https://www.youtube.com/watch?t=2372',
3377 'only_matching': True,
c4808c60
PH
3378 }]
3379
15870e90
PH
3380 def _real_extract(self, url):
3381 raise ExtractorError(
78caa52a
PH
3382 'Did you forget to quote the URL? Remember that & is a meta '
3383 'character in most shells, so you want to put the URL in quotes, '
3867038a 3384 'like youtube-dl '
2d3d2997 3385 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3386 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3387 expected=True)
772fd5cc
PH
3388
3389
3390class YoutubeTruncatedIDIE(InfoExtractor):
3391 IE_NAME = 'youtube:truncated_id'
3392 IE_DESC = False # Do not list
b95aab84 3393 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3394
3395 _TESTS = [{
3396 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3397 'only_matching': True,
3398 }]
3399
3400 def _real_extract(self, url):
3401 video_id = self._match_id(url)
3402 raise ExtractorError(
3403 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3404 expected=True)